1 /*
2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "util/u_atomic.h"
28 #include "util/u_string.h"
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/format/u_format.h"
32 
33 #include "drm/freedreno_drmif.h"
34 
35 #include "ir3_shader.h"
36 #include "ir3_compiler.h"
37 #include "ir3_nir.h"
38 
39 #include "disasm.h"
40 
41 int
ir3_glsl_type_size(const struct glsl_type * type,bool bindless)42 ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
43 {
44 	return glsl_count_attribute_slots(type, false);
45 }
46 
47 /* for vertex shader, the inputs are loaded into registers before the shader
48  * is executed, so max_regs from the shader instructions might not properly
49  * reflect the # of registers actually used, especially in case passthrough
50  * varyings.
51  *
52  * Likewise, for fragment shader, we can have some regs which are passed
53  * input values but never touched by the resulting shader (ie. as result
54  * of dead code elimination or simply because we don't know how to turn
55  * the reg off.
56  */
57 static void
fixup_regfootprint(struct ir3_shader_variant * v)58 fixup_regfootprint(struct ir3_shader_variant *v)
59 {
60 	unsigned i;
61 
62 	for (i = 0; i < v->inputs_count; i++) {
63 		/* skip frag inputs fetch via bary.f since their reg's are
64 		 * not written by gpu before shader starts (and in fact the
65 		 * regid's might not even be valid)
66 		 */
67 		if (v->inputs[i].bary)
68 			continue;
69 
70 		/* ignore high regs that are global to all threads in a warp
71 		 * (they exist by default) (a5xx+)
72 		 */
73 		if (v->inputs[i].regid >= regid(48,0))
74 			continue;
75 
76 		if (v->inputs[i].compmask) {
77 			unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
78 			int32_t regid = v->inputs[i].regid + n;
79 			if (v->inputs[i].half) {
80 				if (!v->mergedregs) {
81 					v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
82 				} else {
83 					v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
84 				}
85 			} else {
86 				v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
87 			}
88 		}
89 	}
90 
91 	for (i = 0; i < v->outputs_count; i++) {
92 		/* for ex, VS shaders with tess don't have normal varying outs: */
93 		if (!VALIDREG(v->outputs[i].regid))
94 			continue;
95 		int32_t regid = v->outputs[i].regid + 3;
96 		if (v->outputs[i].half) {
97 			if (!v->mergedregs) {
98 				v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
99 			} else {
100 				v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
101 			}
102 		} else {
103 			v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
104 		}
105 	}
106 
107 	for (i = 0; i < v->num_sampler_prefetch; i++) {
108 		unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;
109 		int32_t regid = v->sampler_prefetch[i].dst + n;
110 		if (v->sampler_prefetch[i].half_precision) {
111 			if (!v->mergedregs) {
112 				v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
113 			} else {
114 				v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
115 			}
116 		} else {
117 			v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
118 		}
119 	}
120 }
121 
122 /* wrapper for ir3_assemble() which does some info fixup based on
123  * shader state.  Non-static since used by ir3_cmdline too.
124  */
ir3_shader_assemble(struct ir3_shader_variant * v)125 void * ir3_shader_assemble(struct ir3_shader_variant *v)
126 {
127 	const struct ir3_compiler *compiler = v->shader->compiler;
128 	void *bin;
129 
130 	bin = ir3_assemble(v);
131 	if (!bin)
132 		return NULL;
133 
134 	/* NOTE: if relative addressing is used, we set constlen in
135 	 * the compiler (to worst-case value) since we don't know in
136 	 * the assembler what the max addr reg value can be:
137 	 */
138 	v->constlen = MAX2(v->constlen, v->info.max_const + 1);
139 
140 	/* On a4xx and newer, constlen must be a multiple of 16 dwords even though
141 	 * uploads are in units of 4 dwords. Round it up here to make calculations
142 	 * regarding the shared constlen simpler.
143 	 */
144 	if (compiler->gpu_id >= 400)
145 		v->constlen = align(v->constlen, 4);
146 
147 	fixup_regfootprint(v);
148 
149 	return bin;
150 }
151 
152 static void
assemble_variant(struct ir3_shader_variant * v)153 assemble_variant(struct ir3_shader_variant *v)
154 {
155 	v->bin = ir3_shader_assemble(v);
156 
157 	if (shader_debug_enabled(v->shader->type)) {
158 		fprintf(stdout, "Native code for unnamed %s shader %s:\n",
159 			ir3_shader_stage(v), v->shader->nir->info.name);
160 		if (v->shader->type == MESA_SHADER_FRAGMENT)
161 			fprintf(stdout, "SIMD0\n");
162 		ir3_shader_disasm(v, v->bin, stdout);
163 	}
164 
165 	/* no need to keep the ir around beyond this point: */
166 	ir3_destroy(v->ir);
167 	v->ir = NULL;
168 }
169 
170 static bool
compile_variant(struct ir3_shader_variant * v)171 compile_variant(struct ir3_shader_variant *v)
172 {
173 	int ret = ir3_compile_shader_nir(v->shader->compiler, v);
174 	if (ret) {
175 		_debug_printf("compile failed! (%s:%s)", v->shader->nir->info.name,
176 				v->shader->nir->info.label);
177 		return false;
178 	}
179 
180 	assemble_variant(v);
181 	if (!v->bin) {
182 		_debug_printf("assemble failed! (%s:%s)", v->shader->nir->info.name,
183 				v->shader->nir->info.label);
184 		return false;
185 	}
186 
187 	return true;
188 }
189 
190 /*
191  * For creating normal shader variants, 'nonbinning' is NULL.  For
192  * creating binning pass shader, it is link to corresponding normal
193  * (non-binning) variant.
194  */
195 static struct ir3_shader_variant *
alloc_variant(struct ir3_shader * shader,const struct ir3_shader_key * key,struct ir3_shader_variant * nonbinning)196 alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
197 		struct ir3_shader_variant *nonbinning)
198 {
199 	void *mem_ctx = shader;
200 	/* hang the binning variant off it's non-binning counterpart instead
201 	 * of the shader, to simplify the error cleanup paths
202 	 */
203 	if (nonbinning)
204 		mem_ctx = nonbinning;
205 	struct ir3_shader_variant *v = rzalloc_size(mem_ctx, sizeof(*v));
206 
207 	if (!v)
208 		return NULL;
209 
210 	v->id = ++shader->variant_count;
211 	v->shader = shader;
212 	v->binning_pass = !!nonbinning;
213 	v->nonbinning = nonbinning;
214 	v->key = *key;
215 	v->type = shader->type;
216 	v->mergedregs = shader->compiler->gpu_id >= 600;
217 
218 	if (!v->binning_pass)
219 		v->const_state = rzalloc_size(v, sizeof(*v->const_state));
220 
221 	return v;
222 }
223 
224 static bool
needs_binning_variant(struct ir3_shader_variant * v)225 needs_binning_variant(struct ir3_shader_variant *v)
226 {
227 	if ((v->type == MESA_SHADER_VERTEX) && ir3_has_binning_vs(&v->key))
228 		return true;
229 	return false;
230 }
231 
232 static struct ir3_shader_variant *
create_variant(struct ir3_shader * shader,const struct ir3_shader_key * key)233 create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key)
234 {
235 	struct ir3_shader_variant *v = alloc_variant(shader, key, NULL);
236 
237 	if (!v)
238 		goto fail;
239 
240 	if (needs_binning_variant(v)) {
241 		v->binning = alloc_variant(shader, key, v);
242 		if (!v->binning)
243 			goto fail;
244 	}
245 
246 	if (ir3_disk_cache_retrieve(shader->compiler, v))
247 		return v;
248 
249 	if (!shader->nir_finalized) {
250 		ir3_nir_post_finalize(shader->compiler, shader->nir);
251 
252 		if (ir3_shader_debug & IR3_DBG_DISASM) {
253 			printf("dump nir%d: type=%d", shader->id, shader->type);
254 			nir_print_shader(shader->nir, stdout);
255 		}
256 
257 		shader->nir_finalized = true;
258 	}
259 
260 	if (!compile_variant(v))
261 		goto fail;
262 
263 	if (needs_binning_variant(v) && !compile_variant(v->binning))
264 		goto fail;
265 
266 	ir3_disk_cache_store(shader->compiler, v);
267 
268 	return v;
269 
270 fail:
271 	ralloc_free(v);
272 	return NULL;
273 }
274 
275 static inline struct ir3_shader_variant *
shader_variant(struct ir3_shader * shader,const struct ir3_shader_key * key)276 shader_variant(struct ir3_shader *shader, const struct ir3_shader_key *key)
277 {
278 	struct ir3_shader_variant *v;
279 
280 	for (v = shader->variants; v; v = v->next)
281 		if (ir3_shader_key_equal(key, &v->key))
282 			return v;
283 
284 	return NULL;
285 }
286 
287 struct ir3_shader_variant *
ir3_shader_get_variant(struct ir3_shader * shader,const struct ir3_shader_key * key,bool binning_pass,bool * created)288 ir3_shader_get_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
289 		bool binning_pass, bool *created)
290 {
291 	mtx_lock(&shader->variants_lock);
292 	struct ir3_shader_variant *v = shader_variant(shader, key);
293 
294 	if (!v) {
295 		/* compile new variant if it doesn't exist already: */
296 		v = create_variant(shader, key);
297 		if (v) {
298 			v->next = shader->variants;
299 			shader->variants = v;
300 			*created = true;
301 		}
302 	}
303 
304 	if (v && binning_pass) {
305 		v = v->binning;
306 		assert(v);
307 	}
308 
309 	mtx_unlock(&shader->variants_lock);
310 
311 	return v;
312 }
313 
314 void
ir3_shader_destroy(struct ir3_shader * shader)315 ir3_shader_destroy(struct ir3_shader *shader)
316 {
317 	ralloc_free(shader->nir);
318 	mtx_destroy(&shader->variants_lock);
319 	ralloc_free(shader);
320 }
321 
322 /**
323  * Creates a bitmask of the used bits of the shader key by this particular
324  * shader.  Used by the gallium driver to skip state-dependent recompiles when
325  * possible.
326  */
327 static void
ir3_setup_used_key(struct ir3_shader * shader)328 ir3_setup_used_key(struct ir3_shader *shader)
329 {
330 	nir_shader *nir = shader->nir;
331 	struct shader_info *info = &nir->info;
332 	struct ir3_shader_key *key = &shader->key_mask;
333 
334 	/* This key flag is just used to make for a cheaper ir3_shader_key_equal
335 	 * check in the common case.
336 	 */
337 	key->has_per_samp = true;
338 
339 	key->safe_constlen = true;
340 
341 	/* When clip/cull distances are natively supported, we only use
342 	 * ucp_enables to determine whether to lower legacy clip planes to
343 	 * gl_ClipDistance.
344 	 */
345 	if (info->stage != MESA_SHADER_FRAGMENT || !shader->compiler->has_clip_cull)
346 		key->ucp_enables = 0xff;
347 
348 	if (info->stage == MESA_SHADER_FRAGMENT) {
349 		key->fsaturate_s = ~0;
350 		key->fsaturate_t = ~0;
351 		key->fsaturate_r = ~0;
352 		key->fastc_srgb = ~0;
353 		key->fsamples = ~0;
354 
355 		if (info->inputs_read & VARYING_BITS_COLOR) {
356 			key->rasterflat = true;
357 			key->color_two_side = true;
358 		}
359 
360 		if (info->inputs_read & VARYING_BIT_LAYER) {
361 			key->layer_zero = true;
362 		}
363 
364 		if (info->inputs_read & VARYING_BIT_VIEWPORT) {
365 			key->view_zero = true;
366 		}
367 
368 		if ((info->outputs_written & ~(FRAG_RESULT_DEPTH |
369 								FRAG_RESULT_STENCIL |
370 								FRAG_RESULT_SAMPLE_MASK)) != 0) {
371 			key->fclamp_color = true;
372 		}
373 
374 		/* Only used for deciding on behavior of
375 		 * nir_intrinsic_load_barycentric_sample
376 		 */
377 		key->msaa = info->fs.uses_sample_qualifier;
378 	} else {
379 		key->tessellation = ~0;
380 		key->has_gs = true;
381 
382 		if (info->outputs_written & VARYING_BITS_COLOR)
383 			key->vclamp_color = true;
384 
385 		if (info->stage == MESA_SHADER_VERTEX) {
386 			key->vsaturate_s = ~0;
387 			key->vsaturate_t = ~0;
388 			key->vsaturate_r = ~0;
389 			key->vastc_srgb = ~0;
390 			key->vsamples = ~0;
391 		}
392 	}
393 }
394 
395 
396 /* Given an array of constlen's, decrease some of them so that the sum stays
397  * within "combined_limit" while trying to fairly share the reduction. Returns
398  * a bitfield of which stages should be trimmed.
399  */
400 static uint32_t
trim_constlens(unsigned * constlens,unsigned first_stage,unsigned last_stage,unsigned combined_limit,unsigned safe_limit)401 trim_constlens(unsigned *constlens,
402 			   unsigned first_stage, unsigned last_stage,
403 			   unsigned combined_limit, unsigned safe_limit)
404 {
405    unsigned cur_total = 0;
406    for (unsigned i = first_stage; i <= last_stage; i++) {
407       cur_total += constlens[i];
408    }
409 
410    unsigned max_stage = 0;
411    unsigned max_const = 0;
412    uint32_t trimmed = 0;
413 
414    while (cur_total > combined_limit) {
415 	   for (unsigned i = first_stage; i <= last_stage; i++) {
416 		   if (constlens[i] >= max_const) {
417 			   max_stage = i;
418 			   max_const = constlens[i];
419 		   }
420 	   }
421 
422 	   assert(max_const > safe_limit);
423 	   trimmed |= 1 << max_stage;
424 	   cur_total = cur_total - max_const + safe_limit;
425 	   constlens[max_stage] = safe_limit;
426    }
427 
428    return trimmed;
429 }
430 
431 /* Figures out which stages in the pipeline to use the "safe" constlen for, in
432  * order to satisfy all shared constlen limits.
433  */
434 uint32_t
ir3_trim_constlen(struct ir3_shader_variant ** variants,const struct ir3_compiler * compiler)435 ir3_trim_constlen(struct ir3_shader_variant **variants,
436 				  const struct ir3_compiler *compiler)
437 {
438 	unsigned constlens[MESA_SHADER_STAGES] = {};
439 
440 	for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
441 		if (variants[i])
442 			constlens[i] = variants[i]->constlen;
443 	}
444 
445 	uint32_t trimmed = 0;
446 	STATIC_ASSERT(MESA_SHADER_STAGES <= 8 * sizeof(trimmed));
447 
448 	/* There are two shared limits to take into account, the geometry limit on
449 	 * a6xx and the total limit. The frag limit on a6xx only matters for a
450 	 * single stage, so it's always satisfied with the first variant.
451 	 */
452 	if (compiler->gpu_id >= 600) {
453 		trimmed |=
454 			trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_GEOMETRY,
455 						   compiler->max_const_geom, compiler->max_const_safe);
456 	}
457 	trimmed |=
458 		trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_FRAGMENT,
459 					   compiler->max_const_pipeline, compiler->max_const_safe);
460 
461 	return trimmed;
462 }
463 
464 struct ir3_shader *
ir3_shader_from_nir(struct ir3_compiler * compiler,nir_shader * nir,unsigned reserved_user_consts,struct ir3_stream_output_info * stream_output)465 ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
466 		unsigned reserved_user_consts, struct ir3_stream_output_info *stream_output)
467 {
468 	struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
469 
470 	mtx_init(&shader->variants_lock, mtx_plain);
471 	shader->compiler = compiler;
472 	shader->id = p_atomic_inc_return(&shader->compiler->shader_count);
473 	shader->type = nir->info.stage;
474 	if (stream_output)
475 		memcpy(&shader->stream_output, stream_output, sizeof(shader->stream_output));
476 	shader->num_reserved_user_consts = reserved_user_consts;
477 	shader->nir = nir;
478 
479 	ir3_disk_cache_init_shader_key(compiler, shader);
480 
481 	ir3_setup_used_key(shader);
482 
483 	return shader;
484 }
485 
dump_reg(FILE * out,const char * name,uint32_t r)486 static void dump_reg(FILE *out, const char *name, uint32_t r)
487 {
488 	if (r != regid(63,0)) {
489 		const char *reg_type = (r & HALF_REG_ID) ? "hr" : "r";
490 		fprintf(out, "; %s: %s%d.%c\n", name, reg_type,
491 				(r & ~HALF_REG_ID) >> 2, "xyzw"[r & 0x3]);
492 	}
493 }
494 
dump_output(FILE * out,struct ir3_shader_variant * so,unsigned slot,const char * name)495 static void dump_output(FILE *out, struct ir3_shader_variant *so,
496 		unsigned slot, const char *name)
497 {
498 	uint32_t regid;
499 	regid = ir3_find_output_regid(so, slot);
500 	dump_reg(out, name, regid);
501 }
502 
503 static const char *
input_name(struct ir3_shader_variant * so,int i)504 input_name(struct ir3_shader_variant *so, int i)
505 {
506 	if (so->inputs[i].sysval) {
507 		return gl_system_value_name(so->inputs[i].slot);
508 	} else if (so->type == MESA_SHADER_VERTEX) {
509 		return gl_vert_attrib_name(so->inputs[i].slot);
510 	} else {
511 		return gl_varying_slot_name(so->inputs[i].slot);
512 	}
513 }
514 
515 static const char *
output_name(struct ir3_shader_variant * so,int i)516 output_name(struct ir3_shader_variant *so, int i)
517 {
518 	if (so->type == MESA_SHADER_FRAGMENT) {
519 		return gl_frag_result_name(so->outputs[i].slot);
520 	} else {
521 		switch (so->outputs[i].slot) {
522 		case VARYING_SLOT_GS_HEADER_IR3:
523 			return "GS_HEADER";
524 		case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
525 			return "GS_VERTEX_FLAGS";
526 		case VARYING_SLOT_TCS_HEADER_IR3:
527 			return "TCS_HEADER";
528 		default:
529 			return gl_varying_slot_name(so->outputs[i].slot);
530 		}
531 	}
532 }
533 
534 void
ir3_shader_disasm(struct ir3_shader_variant * so,uint32_t * bin,FILE * out)535 ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
536 {
537 	struct ir3 *ir = so->ir;
538 	struct ir3_register *reg;
539 	const char *type = ir3_shader_stage(so);
540 	uint8_t regid;
541 	unsigned i;
542 
543 	foreach_input_n (instr, i, ir) {
544 		reg = instr->regs[0];
545 		regid = reg->num;
546 		fprintf(out, "@in(%sr%d.%c)\tin%d",
547 				(reg->flags & IR3_REG_HALF) ? "h" : "",
548 				(regid >> 2), "xyzw"[regid & 0x3], i);
549 
550 		if (reg->wrmask > 0x1)
551 			fprintf(out, " (wrmask=0x%x)", reg->wrmask);
552 		fprintf(out, "\n");
553 	}
554 
555 	/* print pre-dispatch texture fetches: */
556 	for (i = 0; i < so->num_sampler_prefetch; i++) {
557 		const struct ir3_sampler_prefetch *fetch = &so->sampler_prefetch[i];
558 		fprintf(out, "@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, cmd=%u\n",
559 				fetch->half_precision ? "h" : "",
560 				fetch->dst >> 2, "xyzw"[fetch->dst & 0x3],
561 				fetch->src, fetch->samp_id, fetch->tex_id,
562 				fetch->wrmask, fetch->cmd);
563 	}
564 
565 	foreach_output_n (instr, i, ir) {
566 		reg = instr->regs[0];
567 		regid = reg->num;
568 		fprintf(out, "@out(%sr%d.%c)\tout%d",
569 				(reg->flags & IR3_REG_HALF) ? "h" : "",
570 				(regid >> 2), "xyzw"[regid & 0x3], i);
571 		if (reg->wrmask > 0x1)
572 			fprintf(out, " (wrmask=0x%x)", reg->wrmask);
573 		fprintf(out, "\n");
574 	}
575 
576 	const struct ir3_const_state *const_state = ir3_const_state(so);
577 	for (i = 0; i < DIV_ROUND_UP(const_state->immediates_count, 4); i++) {
578 		fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
579 		fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
580 				const_state->immediates[i * 4 + 0],
581 				const_state->immediates[i * 4 + 1],
582 				const_state->immediates[i * 4 + 2],
583 				const_state->immediates[i * 4 + 3]);
584 	}
585 
586 	disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id);
587 
588 	fprintf(out, "; %s: outputs:", type);
589 	for (i = 0; i < so->outputs_count; i++) {
590 		uint8_t regid = so->outputs[i].regid;
591 		const char *reg_type = so->outputs[i].half ? "hr" : "r";
592 		fprintf(out, " %s%d.%c (%s)",
593 				reg_type, (regid >> 2), "xyzw"[regid & 0x3],
594 				output_name(so, i));
595 	}
596 	fprintf(out, "\n");
597 
598 	fprintf(out, "; %s: inputs:", type);
599 	for (i = 0; i < so->inputs_count; i++) {
600 		uint8_t regid = so->inputs[i].regid;
601 		fprintf(out, " r%d.%c (%s slot=%d cm=%x,il=%u,b=%u)",
602 				(regid >> 2), "xyzw"[regid & 0x3],
603 				input_name(so, i),
604 				so->inputs[i].slot,
605 				so->inputs[i].compmask,
606 				so->inputs[i].inloc,
607 				so->inputs[i].bary);
608 	}
609 	fprintf(out, "\n");
610 
611 	/* print generic shader info: */
612 	fprintf(out, "; %s prog %d/%d: %u instr, %u nops, %u non-nops, %u mov, %u cov, %u dwords\n",
613 			type, so->shader->id, so->id,
614 			so->info.instrs_count,
615 			so->info.nops_count,
616 			so->info.instrs_count - so->info.nops_count,
617 			so->info.mov_count, so->info.cov_count,
618 			so->info.sizedwords);
619 
620 	fprintf(out, "; %s prog %d/%d: %u last-baryf, %d half, %d full, %u constlen\n",
621 			type, so->shader->id, so->id,
622 			so->info.last_baryf,
623 			so->info.max_half_reg + 1,
624 			so->info.max_reg + 1,
625 			so->constlen);
626 
627 	fprintf(out, "; %s prog %d/%d: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, \n",
628 			type, so->shader->id, so->id,
629 			so->info.instrs_per_cat[0],
630 			so->info.instrs_per_cat[1],
631 			so->info.instrs_per_cat[2],
632 			so->info.instrs_per_cat[3],
633 			so->info.instrs_per_cat[4],
634 			so->info.instrs_per_cat[5],
635 			so->info.instrs_per_cat[6],
636 			so->info.instrs_per_cat[7]);
637 
638 	fprintf(out, "; %s prog %d/%d: %u sstall, %u (ss), %u (sy), %d max_sun, %d loops\n",
639 			type, so->shader->id, so->id,
640 			so->info.sstall,
641 			so->info.ss,
642 			so->info.sy,
643 			so->max_sun,
644 			so->loops);
645 
646 	/* print shader type specific info: */
647 	switch (so->type) {
648 	case MESA_SHADER_VERTEX:
649 		dump_output(out, so, VARYING_SLOT_POS, "pos");
650 		dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
651 		break;
652 	case MESA_SHADER_FRAGMENT:
653 		dump_reg(out, "pos (ij_pixel)",
654 			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL));
655 		dump_reg(out, "pos (ij_centroid)",
656 			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID));
657 		dump_reg(out, "pos (ij_size)",
658 			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PERSP_SIZE));
659 		dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
660 		if (so->color0_mrt) {
661 			dump_output(out, so, FRAG_RESULT_COLOR, "color");
662 		} else {
663 			dump_output(out, so, FRAG_RESULT_DATA0, "data0");
664 			dump_output(out, so, FRAG_RESULT_DATA1, "data1");
665 			dump_output(out, so, FRAG_RESULT_DATA2, "data2");
666 			dump_output(out, so, FRAG_RESULT_DATA3, "data3");
667 			dump_output(out, so, FRAG_RESULT_DATA4, "data4");
668 			dump_output(out, so, FRAG_RESULT_DATA5, "data5");
669 			dump_output(out, so, FRAG_RESULT_DATA6, "data6");
670 			dump_output(out, so, FRAG_RESULT_DATA7, "data7");
671 		}
672 		dump_reg(out, "fragcoord",
673 			ir3_find_sysval_regid(so, SYSTEM_VALUE_FRAG_COORD));
674 		dump_reg(out, "fragface",
675 			ir3_find_sysval_regid(so, SYSTEM_VALUE_FRONT_FACE));
676 		break;
677 	default:
678 		/* TODO */
679 		break;
680 	}
681 
682 	fprintf(out, "\n");
683 }
684 
685 uint64_t
ir3_shader_outputs(const struct ir3_shader * so)686 ir3_shader_outputs(const struct ir3_shader *so)
687 {
688 	return so->nir->info.outputs_written;
689 }
690