1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define SB_RA_SCHED_CHECK DEBUG
28 
29 #include "util/os_time.h"
30 #include "r600_pipe.h"
31 #include "r600_shader.h"
32 
33 #include "sb_public.h"
34 
35 #include <stack>
36 #include <map>
37 
38 #include "sb_bc.h"
39 #include "sb_shader.h"
40 #include "sb_pass.h"
41 #include "sb_sched.h"
42 
43 using namespace r600_sb;
44 
45 static sb_hw_class translate_chip_class(enum chip_class cc);
46 static sb_hw_chip translate_chip(enum radeon_family rf);
47 
r600_sb_context_create(struct r600_context * rctx)48 sb_context *r600_sb_context_create(struct r600_context *rctx) {
49 
50 	sb_context *sctx = new sb_context();
51 
52 	if (sctx->init(rctx->isa, translate_chip(rctx->b.family),
53 			translate_chip_class(rctx->b.chip_class))) {
54 		delete sctx;
55 		sctx = NULL;
56 	}
57 
58 	unsigned df = rctx->screen->b.debug_flags;
59 
60 	sb_context::dump_pass = df & DBG_SB_DUMP;
61 	sb_context::dump_stat = df & DBG_SB_STAT;
62 	sb_context::dry_run = df & DBG_SB_DRY_RUN;
63 	sb_context::no_fallback = df & DBG_SB_NO_FALLBACK;
64 	sb_context::safe_math = df & DBG_SB_SAFEMATH;
65 
66 	sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0);
67 	sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0);
68 	sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0);
69 
70 	return sctx;
71 }
72 
r600_sb_context_destroy(void * sctx)73 void r600_sb_context_destroy(void * sctx) {
74 	if (sctx) {
75 		sb_context *ctx = static_cast<sb_context*>(sctx);
76 
77 		if (sb_context::dump_stat) {
78 			sblog << "\ncontext src stats: ";
79 			ctx->src_stats.dump();
80 			sblog << "context opt stats: ";
81 			ctx->opt_stats.dump();
82 			sblog << "context diff: ";
83 			ctx->src_stats.dump_diff(ctx->opt_stats);
84 		}
85 
86 		delete ctx;
87 	}
88 }
89 
r600_sb_bytecode_process(struct r600_context * rctx,struct r600_bytecode * bc,struct r600_shader * pshader,int dump_bytecode,int optimize)90 int r600_sb_bytecode_process(struct r600_context *rctx,
91                              struct r600_bytecode *bc,
92                              struct r600_shader *pshader,
93                              int dump_bytecode,
94                              int optimize) {
95 	int r = 0;
96 	unsigned shader_id = bc->debug_id;
97 
98 	sb_context *ctx = (sb_context *)rctx->sb_context;
99 	if (!ctx) {
100 		rctx->sb_context = ctx = r600_sb_context_create(rctx);
101 	}
102 
103 	int64_t time_start = 0;
104 	if (sb_context::dump_stat) {
105 		time_start = os_time_get_nano();
106 	}
107 
108 	SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
109 
110 	bc_parser parser(*ctx, bc, pshader);
111 
112 	if ((r = parser.decode())) {
113 		assert(!"sb: bytecode decoding error");
114 		return r;
115 	}
116 
117 	shader *sh = parser.get_shader();
118 
119 	if (dump_bytecode) {
120 		bc_dump(*sh, bc->bytecode, bc->ndw).run();
121 	}
122 
123 	if (!optimize) {
124 		delete sh;
125 		return 0;
126 	}
127 
128 	if (sh->target != TARGET_FETCH) {
129 		sh->src_stats.ndw = bc->ndw;
130 		sh->collect_stats(false);
131 	}
132 
133 	/* skip some shaders (use shaders from default backend)
134 	 * dskip_start - range start, dskip_end - range_end,
135 	 * e.g. start = 5, end = 6 means shaders 5 & 6
136 	 *
137 	 * dskip_mode == 0 - disabled,
138 	 * dskip_mode == 1 - don't process the shaders from the [start;end] range
139 	 * dskip_mode == 2 - process only the shaders from the range
140 	 */
141 	if (sb_context::dskip_mode) {
142 		if ((sb_context::dskip_start <= shader_id &&
143 				shader_id <= sb_context::dskip_end) ==
144 						(sb_context::dskip_mode == 1)) {
145 			sblog << "sb: skipped shader " << shader_id << " : " << "["
146 					<< sb_context::dskip_start << "; "
147 					<< sb_context::dskip_end << "] mode "
148 					<< sb_context::dskip_mode << "\n";
149 			return 0;
150 		}
151 	}
152 
153 	if ((r = parser.prepare())) {
154 		assert(!"sb: bytecode parsing error");
155 		return r;
156 	}
157 
158 	SB_DUMP_PASS( sblog << "\n\n###### after parse\n"; sh->dump_ir(); );
159 
160 #define SB_RUN_PASS(n, dump) \
161 	do { \
162 		r = n(*sh).run(); \
163 		if (r) { \
164 			sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
165 			if (sb_context::no_fallback) \
166 				return r; \
167 			sblog << "sb: using unoptimized bytecode...\n"; \
168 			delete sh; \
169 			return 0; \
170 		} \
171 		if (dump) { \
172 			SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \
173 				sh->dump_ir();); \
174 		} \
175 		assert(!r); \
176 	} while (0)
177 
178 	SB_RUN_PASS(ssa_prepare,		0);
179 	SB_RUN_PASS(ssa_rename,			1);
180 
181 	if (sh->has_alu_predication)
182 		SB_RUN_PASS(psi_ops,		1);
183 
184 	SB_RUN_PASS(liveness,			0);
185 
186 	sh->dce_flags = DF_REMOVE_DEAD | DF_EXPAND;
187 	SB_RUN_PASS(dce_cleanup,		0);
188 	SB_RUN_PASS(def_use,			0);
189 
190 	sh->set_undef(sh->root->live_before);
191 
192 	// if conversion breaks the dependency tracking between CF_EMIT ops when it removes
193 	// the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS
194 	if ((sh->target != TARGET_GS && sh->target != TARGET_HS) || pshader->needs_scratch_space)
195 		SB_RUN_PASS(if_conversion,		1);
196 
197 	// if_conversion breaks info about uses, but next pass (peephole)
198 	// doesn't need it, so we can skip def/use update here
199 	// until it's really required
200 	//SB_RUN_PASS(def_use,			0);
201 
202 	SB_RUN_PASS(peephole,			1);
203 	SB_RUN_PASS(def_use,			0);
204 
205 	SB_RUN_PASS(gvn,				1);
206 
207 	SB_RUN_PASS(def_use,			1);
208 
209 	sh->dce_flags = DF_REMOVE_DEAD | DF_REMOVE_UNUSED;
210 	SB_RUN_PASS(dce_cleanup,		1);
211 
212 	SB_RUN_PASS(ra_split,			0);
213 	SB_RUN_PASS(def_use,			0);
214 
215 	// create 'basic blocks'. it's not like we build CFG, they are just
216 	// container nodes in the correct locations for code placement
217 	sh->create_bbs();
218 
219 	SB_RUN_PASS(gcm,				1);
220 
221 	sh->compute_interferences = true;
222 	SB_RUN_PASS(liveness,			0);
223 
224 	sh->dce_flags = DF_REMOVE_DEAD;
225 	SB_RUN_PASS(dce_cleanup,		1);
226 
227 	SB_RUN_PASS(ra_coalesce,		1);
228 	SB_RUN_PASS(ra_init,			1);
229 
230 	SB_RUN_PASS(post_scheduler,		1);
231 
232 	sh->expand_bbs();
233 
234 #if SB_RA_SCHED_CHECK
235 	// check code correctness after regalloc/scheduler
236 	SB_RUN_PASS(ra_checker,			0);
237 #endif
238 
239 	SB_RUN_PASS(bc_finalizer,		0);
240 
241 	sh->optimized = true;
242 
243 	bc_builder builder(*sh);
244 
245 	if ((r = builder.build())) {
246 		assert(0);
247 		return r;
248 	}
249 
250 	bytecode &nbc = builder.get_bytecode();
251 
252 	if (dump_bytecode) {
253 		bc_dump(*sh, &nbc).run();
254 	}
255 
256 	if (!sb_context::dry_run) {
257 
258 		free(bc->bytecode);
259 		bc->ndw = nbc.ndw();
260 		bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
261 		nbc.write_data(bc->bytecode);
262 
263 		bc->ngpr = sh->ngpr;
264 		bc->nstack = sh->nstack;
265 	} else {
266 		SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; );
267 	}
268 
269 	if (sb_context::dump_stat) {
270 		int64_t t = os_time_get_nano() - time_start;
271 
272 		sblog << "sb: processing shader " << shader_id << " done ( "
273 				<< ((double)t)/1000000.0 << " ms ).\n";
274 
275 		sh->opt_stats.ndw = bc->ndw;
276 		sh->collect_stats(true);
277 
278 		sblog << "src stats: ";
279 		sh->src_stats.dump();
280 		sblog << "opt stats: ";
281 		sh->opt_stats.dump();
282 		sblog << "diff: ";
283 		sh->src_stats.dump_diff(sh->opt_stats);
284 	}
285 
286 	delete sh;
287 	return 0;
288 }
289 
translate_chip(enum radeon_family rf)290 static sb_hw_chip translate_chip(enum radeon_family rf) {
291 	switch (rf) {
292 
293 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
294 		TRANSLATE_CHIP(R600);
295 		TRANSLATE_CHIP(RV610);
296 		TRANSLATE_CHIP(RV630);
297 		TRANSLATE_CHIP(RV670);
298 		TRANSLATE_CHIP(RV620);
299 		TRANSLATE_CHIP(RV635);
300 		TRANSLATE_CHIP(RS780);
301 		TRANSLATE_CHIP(RS880);
302 		TRANSLATE_CHIP(RV770);
303 		TRANSLATE_CHIP(RV730);
304 		TRANSLATE_CHIP(RV710);
305 		TRANSLATE_CHIP(RV740);
306 		TRANSLATE_CHIP(CEDAR);
307 		TRANSLATE_CHIP(REDWOOD);
308 		TRANSLATE_CHIP(JUNIPER);
309 		TRANSLATE_CHIP(CYPRESS);
310 		TRANSLATE_CHIP(HEMLOCK);
311 		TRANSLATE_CHIP(PALM);
312 		TRANSLATE_CHIP(SUMO);
313 		TRANSLATE_CHIP(SUMO2);
314 		TRANSLATE_CHIP(BARTS);
315 		TRANSLATE_CHIP(TURKS);
316 		TRANSLATE_CHIP(CAICOS);
317 		TRANSLATE_CHIP(CAYMAN);
318 		TRANSLATE_CHIP(ARUBA);
319 #undef TRANSLATE_CHIP
320 
321 		default:
322 			assert(!"unknown chip");
323 			return HW_CHIP_UNKNOWN;
324 	}
325 }
326 
translate_chip_class(enum chip_class cc)327 static sb_hw_class translate_chip_class(enum chip_class cc) {
328 	switch(cc) {
329 		case R600: return HW_CLASS_R600;
330 		case R700: return HW_CLASS_R700;
331 		case EVERGREEN: return HW_CLASS_EVERGREEN;
332 		case CAYMAN: return HW_CLASS_CAYMAN;
333 
334 		default:
335 			assert(!"unknown chip class");
336 			return HW_CLASS_UNKNOWN;
337 	}
338 }
339