1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "si_pipe.h"
25 #include "si_compute.h"
26 #include "sid.h"
27 #include "gfx9d.h"
28 #include "sid_tables.h"
29 #include "ddebug/dd_util.h"
30 #include "util/u_dump.h"
31 #include "util/u_log.h"
32 #include "util/u_memory.h"
33 #include "ac_debug.h"
34 
35 static void si_dump_bo_list(struct si_context *sctx,
36 			    const struct radeon_saved_cs *saved, FILE *f);
37 
38 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
39 
si_dump_shader(struct si_screen * sscreen,enum pipe_shader_type processor,const struct si_shader * shader,FILE * f)40 static void si_dump_shader(struct si_screen *sscreen,
41 			   enum pipe_shader_type processor,
42 			   const struct si_shader *shader, FILE *f)
43 {
44 	if (shader->shader_log)
45 		fwrite(shader->shader_log, shader->shader_log_size, 1, f);
46 	else
47 		si_shader_dump(sscreen, shader, NULL, processor, f, false);
48 }
49 
50 struct si_log_chunk_shader {
51 	/* The shader destroy code assumes a current context for unlinking of
52 	 * PM4 packets etc.
53 	 *
54 	 * While we should be able to destroy shaders without a context, doing
55 	 * so would happen only very rarely and be therefore likely to fail
56 	 * just when you're trying to debug something. Let's just remember the
57 	 * current context in the chunk.
58 	 */
59 	struct si_context *ctx;
60 	struct si_shader *shader;
61 	enum pipe_shader_type processor;
62 
63 	/* For keep-alive reference counts */
64 	struct si_shader_selector *sel;
65 	struct si_compute *program;
66 };
67 
68 static void
si_log_chunk_shader_destroy(void * data)69 si_log_chunk_shader_destroy(void *data)
70 {
71 	struct si_log_chunk_shader *chunk = data;
72 	si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
73 	si_compute_reference(&chunk->program, NULL);
74 	FREE(chunk);
75 }
76 
77 static void
si_log_chunk_shader_print(void * data,FILE * f)78 si_log_chunk_shader_print(void *data, FILE *f)
79 {
80 	struct si_log_chunk_shader *chunk = data;
81 	struct si_screen *sscreen = chunk->ctx->screen;
82 	si_dump_shader(sscreen, chunk->processor,
83 		       chunk->shader, f);
84 }
85 
86 static struct u_log_chunk_type si_log_chunk_type_shader = {
87 	.destroy = si_log_chunk_shader_destroy,
88 	.print = si_log_chunk_shader_print,
89 };
90 
si_dump_gfx_shader(struct si_context * ctx,const struct si_shader_ctx_state * state,struct u_log_context * log)91 static void si_dump_gfx_shader(struct si_context *ctx,
92 			       const struct si_shader_ctx_state *state,
93 			       struct u_log_context *log)
94 {
95 	struct si_shader *current = state->current;
96 
97 	if (!state->cso || !current)
98 		return;
99 
100 	struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
101 	chunk->ctx = ctx;
102 	chunk->processor = state->cso->info.processor;
103 	chunk->shader = current;
104 	si_shader_selector_reference(ctx, &chunk->sel, current->selector);
105 	u_log_chunk(log, &si_log_chunk_type_shader, chunk);
106 }
107 
si_dump_compute_shader(struct si_context * ctx,struct u_log_context * log)108 static void si_dump_compute_shader(struct si_context *ctx,
109 				   struct u_log_context *log)
110 {
111 	const struct si_cs_shader_state *state = &ctx->cs_shader_state;
112 
113 	if (!state->program)
114 		return;
115 
116 	struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
117 	chunk->ctx = ctx;
118 	chunk->processor = PIPE_SHADER_COMPUTE;
119 	chunk->shader = &state->program->shader;
120 	si_compute_reference(&chunk->program, state->program);
121 	u_log_chunk(log, &si_log_chunk_type_shader, chunk);
122 }
123 
124 /**
125  * Shader compiles can be overridden with arbitrary ELF objects by setting
126  * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
127  */
si_replace_shader(unsigned num,struct ac_shader_binary * binary)128 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
129 {
130 	const char *p = debug_get_option_replace_shaders();
131 	const char *semicolon;
132 	char *copy = NULL;
133 	FILE *f;
134 	long filesize, nread;
135 	char *buf = NULL;
136 	bool replaced = false;
137 
138 	if (!p)
139 		return false;
140 
141 	while (*p) {
142 		unsigned long i;
143 		char *endp;
144 		i = strtoul(p, &endp, 0);
145 
146 		p = endp;
147 		if (*p != ':') {
148 			fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
149 			exit(1);
150 		}
151 		++p;
152 
153 		if (i == num)
154 			break;
155 
156 		p = strchr(p, ';');
157 		if (!p)
158 			return false;
159 		++p;
160 	}
161 	if (!*p)
162 		return false;
163 
164 	semicolon = strchr(p, ';');
165 	if (semicolon) {
166 		p = copy = strndup(p, semicolon - p);
167 		if (!copy) {
168 			fprintf(stderr, "out of memory\n");
169 			return false;
170 		}
171 	}
172 
173 	fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
174 
175 	f = fopen(p, "r");
176 	if (!f) {
177 		perror("radeonsi: failed to open file");
178 		goto out_free;
179 	}
180 
181 	if (fseek(f, 0, SEEK_END) != 0)
182 		goto file_error;
183 
184 	filesize = ftell(f);
185 	if (filesize < 0)
186 		goto file_error;
187 
188 	if (fseek(f, 0, SEEK_SET) != 0)
189 		goto file_error;
190 
191 	buf = MALLOC(filesize);
192 	if (!buf) {
193 		fprintf(stderr, "out of memory\n");
194 		goto out_close;
195 	}
196 
197 	nread = fread(buf, 1, filesize, f);
198 	if (nread != filesize)
199 		goto file_error;
200 
201 	ac_elf_read(buf, filesize, binary);
202 	replaced = true;
203 
204 out_close:
205 	fclose(f);
206 out_free:
207 	FREE(buf);
208 	free(copy);
209 	return replaced;
210 
211 file_error:
212 	perror("radeonsi: reading shader");
213 	goto out_close;
214 }
215 
216 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
217  * read them, or use "aha -b -f file" to convert them to html.
218  */
219 #define COLOR_RESET	"\033[0m"
220 #define COLOR_RED	"\033[31m"
221 #define COLOR_GREEN	"\033[1;32m"
222 #define COLOR_YELLOW	"\033[1;33m"
223 #define COLOR_CYAN	"\033[1;36m"
224 
si_dump_mmapped_reg(struct si_context * sctx,FILE * f,unsigned offset)225 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
226 				unsigned offset)
227 {
228 	struct radeon_winsys *ws = sctx->b.ws;
229 	uint32_t value;
230 
231 	if (ws->read_registers(ws, offset, 1, &value))
232 		ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0);
233 }
234 
si_dump_debug_registers(struct si_context * sctx,FILE * f)235 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
236 {
237 	if (sctx->screen->info.drm_major == 2 &&
238 	    sctx->screen->info.drm_minor < 42)
239 		return; /* no radeon support */
240 
241 	fprintf(f, "Memory-mapped registers:\n");
242 	si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
243 
244 	/* No other registers can be read on DRM < 3.1.0. */
245 	if (sctx->screen->info.drm_major < 3 ||
246 	    sctx->screen->info.drm_minor < 1) {
247 		fprintf(f, "\n");
248 		return;
249 	}
250 
251 	si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
252 	si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
253 	si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
254 	si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
255 	si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
256 	si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
257 	si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
258 	if (sctx->b.chip_class <= VI) {
259 		si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
260 		si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
261 		si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
262 	}
263 	si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
264 	si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
265 	si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
266 	si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
267 	si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
268 	si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
269 	si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
270 	si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
271 	si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
272 	si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
273 	fprintf(f, "\n");
274 }
275 
276 struct si_log_chunk_cs {
277 	struct si_context *ctx;
278 	struct si_saved_cs *cs;
279 	bool dump_bo_list;
280 	unsigned gfx_begin, gfx_end;
281 };
282 
si_log_chunk_type_cs_destroy(void * data)283 static void si_log_chunk_type_cs_destroy(void *data)
284 {
285 	struct si_log_chunk_cs *chunk = data;
286 	si_saved_cs_reference(&chunk->cs, NULL);
287 	free(chunk);
288 }
289 
si_parse_current_ib(FILE * f,struct radeon_winsys_cs * cs,unsigned begin,unsigned end,int * last_trace_id,unsigned trace_id_count,const char * name,enum chip_class chip_class)290 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
291 				unsigned begin, unsigned end,
292 				int *last_trace_id, unsigned trace_id_count,
293 				const char *name, enum chip_class chip_class)
294 {
295 	unsigned orig_end = end;
296 
297 	assert(begin <= end);
298 
299 	fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
300 		name, begin);
301 
302 	for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
303 		struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
304 
305 		if (begin < chunk->cdw) {
306 			ac_parse_ib_chunk(f, chunk->buf + begin,
307 					  MIN2(end, chunk->cdw) - begin,
308 					  last_trace_id, trace_id_count,
309 				          chip_class, NULL, NULL);
310 		}
311 
312 		if (end <= chunk->cdw)
313 			return;
314 
315 		if (begin < chunk->cdw)
316 			fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
317 				name);
318 
319 		begin -= MIN2(begin, chunk->cdw);
320 		end -= chunk->cdw;
321 	}
322 
323 	assert(end <= cs->current.cdw);
324 
325 	ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
326 			  trace_id_count, chip_class, NULL, NULL);
327 
328 	fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
329 		name, orig_end);
330 }
331 
si_log_chunk_type_cs_print(void * data,FILE * f)332 static void si_log_chunk_type_cs_print(void *data, FILE *f)
333 {
334 	struct si_log_chunk_cs *chunk = data;
335 	struct si_context *ctx = chunk->ctx;
336 	struct si_saved_cs *scs = chunk->cs;
337 	int last_trace_id = -1;
338 
339 	/* We are expecting that the ddebug pipe has already
340 	 * waited for the context, so this buffer should be idle.
341 	 * If the GPU is hung, there is no point in waiting for it.
342 	 */
343 	uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
344 					      NULL,
345 					      PIPE_TRANSFER_UNSYNCHRONIZED |
346 					      PIPE_TRANSFER_READ);
347 	if (map)
348 		last_trace_id = map[0];
349 
350 	if (chunk->gfx_end != chunk->gfx_begin) {
351 		if (chunk->gfx_begin == 0) {
352 			if (ctx->init_config)
353 				ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
354 					    NULL, 0, "IB2: Init config", ctx->b.chip_class,
355 					    NULL, NULL);
356 
357 			if (ctx->init_config_gs_rings)
358 				ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
359 					    ctx->init_config_gs_rings->ndw,
360 					    NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
361 					    NULL, NULL);
362 		}
363 
364 		if (scs->flushed) {
365 			ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
366 				    chunk->gfx_end - chunk->gfx_begin,
367 				    &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
368 				    NULL, NULL);
369 		} else {
370 			si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
371 					    chunk->gfx_end, &last_trace_id, map ? 1 : 0,
372 					    "IB", ctx->b.chip_class);
373 		}
374 	}
375 
376 	if (chunk->dump_bo_list) {
377 		fprintf(f, "Flushing. Time: ");
378 		util_dump_ns(f, scs->time_flush);
379 		fprintf(f, "\n\n");
380 		si_dump_bo_list(ctx, &scs->gfx, f);
381 	}
382 }
383 
384 static const struct u_log_chunk_type si_log_chunk_type_cs = {
385 	.destroy = si_log_chunk_type_cs_destroy,
386 	.print = si_log_chunk_type_cs_print,
387 };
388 
si_log_cs(struct si_context * ctx,struct u_log_context * log,bool dump_bo_list)389 static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
390 		      bool dump_bo_list)
391 {
392 	assert(ctx->current_saved_cs);
393 
394 	struct si_saved_cs *scs = ctx->current_saved_cs;
395 	unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
396 
397 	if (!dump_bo_list &&
398 	    gfx_cur == scs->gfx_last_dw)
399 		return;
400 
401 	struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
402 
403 	chunk->ctx = ctx;
404 	si_saved_cs_reference(&chunk->cs, scs);
405 	chunk->dump_bo_list = dump_bo_list;
406 
407 	chunk->gfx_begin = scs->gfx_last_dw;
408 	chunk->gfx_end = gfx_cur;
409 	scs->gfx_last_dw = gfx_cur;
410 
411 	u_log_chunk(log, &si_log_chunk_type_cs, chunk);
412 }
413 
si_auto_log_cs(void * data,struct u_log_context * log)414 void si_auto_log_cs(void *data, struct u_log_context *log)
415 {
416 	struct si_context *ctx = (struct si_context *)data;
417 	si_log_cs(ctx, log, false);
418 }
419 
si_log_hw_flush(struct si_context * sctx)420 void si_log_hw_flush(struct si_context *sctx)
421 {
422 	if (!sctx->b.log)
423 		return;
424 
425 	si_log_cs(sctx, sctx->b.log, true);
426 }
427 
priority_to_string(enum radeon_bo_priority priority)428 static const char *priority_to_string(enum radeon_bo_priority priority)
429 {
430 #define ITEM(x) [RADEON_PRIO_##x] = #x
431 	static const char *table[64] = {
432 		ITEM(FENCE),
433 	        ITEM(TRACE),
434 	        ITEM(SO_FILLED_SIZE),
435 	        ITEM(QUERY),
436 	        ITEM(IB1),
437 	        ITEM(IB2),
438 	        ITEM(DRAW_INDIRECT),
439 	        ITEM(INDEX_BUFFER),
440 	        ITEM(VCE),
441 	        ITEM(UVD),
442 	        ITEM(SDMA_BUFFER),
443 	        ITEM(SDMA_TEXTURE),
444 		ITEM(CP_DMA),
445 	        ITEM(CONST_BUFFER),
446 	        ITEM(DESCRIPTORS),
447 	        ITEM(BORDER_COLORS),
448 	        ITEM(SAMPLER_BUFFER),
449 	        ITEM(VERTEX_BUFFER),
450 	        ITEM(SHADER_RW_BUFFER),
451 	        ITEM(COMPUTE_GLOBAL),
452 	        ITEM(SAMPLER_TEXTURE),
453 	        ITEM(SHADER_RW_IMAGE),
454 	        ITEM(SAMPLER_TEXTURE_MSAA),
455 	        ITEM(COLOR_BUFFER),
456 	        ITEM(DEPTH_BUFFER),
457 	        ITEM(COLOR_BUFFER_MSAA),
458 	        ITEM(DEPTH_BUFFER_MSAA),
459 	        ITEM(CMASK),
460 	        ITEM(DCC),
461 	        ITEM(HTILE),
462 		ITEM(SHADER_BINARY),
463 		ITEM(SHADER_RINGS),
464 		ITEM(SCRATCH_BUFFER),
465 	};
466 #undef ITEM
467 
468 	assert(priority < ARRAY_SIZE(table));
469 	return table[priority];
470 }
471 
bo_list_compare_va(const struct radeon_bo_list_item * a,const struct radeon_bo_list_item * b)472 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
473 				   const struct radeon_bo_list_item *b)
474 {
475 	return a->vm_address < b->vm_address ? -1 :
476 	       a->vm_address > b->vm_address ? 1 : 0;
477 }
478 
si_dump_bo_list(struct si_context * sctx,const struct radeon_saved_cs * saved,FILE * f)479 static void si_dump_bo_list(struct si_context *sctx,
480 			    const struct radeon_saved_cs *saved, FILE *f)
481 {
482 	unsigned i,j;
483 
484 	if (!saved->bo_list)
485 		return;
486 
487 	/* Sort the list according to VM adddresses first. */
488 	qsort(saved->bo_list, saved->bo_count,
489 	      sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
490 
491 	fprintf(f, "Buffer list (in units of pages = 4kB):\n"
492 		COLOR_YELLOW "        Size    VM start page         "
493 		"VM end page           Usage" COLOR_RESET "\n");
494 
495 	for (i = 0; i < saved->bo_count; i++) {
496 		/* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
497 		const unsigned page_size = sctx->b.screen->info.gart_page_size;
498 		uint64_t va = saved->bo_list[i].vm_address;
499 		uint64_t size = saved->bo_list[i].bo_size;
500 		bool hit = false;
501 
502 		/* If there's unused virtual memory between 2 buffers, print it. */
503 		if (i) {
504 			uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
505 						   saved->bo_list[i-1].bo_size;
506 
507 			if (va > previous_va_end) {
508 				fprintf(f, "  %10"PRIu64"    -- hole --\n",
509 					(va - previous_va_end) / page_size);
510 			}
511 		}
512 
513 		/* Print the buffer. */
514 		fprintf(f, "  %10"PRIu64"    0x%013"PRIX64"       0x%013"PRIX64"       ",
515 			size / page_size, va / page_size, (va + size) / page_size);
516 
517 		/* Print the usage. */
518 		for (j = 0; j < 64; j++) {
519 			if (!(saved->bo_list[i].priority_usage & (1ull << j)))
520 				continue;
521 
522 			fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
523 			hit = true;
524 		}
525 		fprintf(f, "\n");
526 	}
527 	fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
528 		   "      Other buffers can still be allocated there.\n\n");
529 }
530 
si_dump_framebuffer(struct si_context * sctx,struct u_log_context * log)531 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
532 {
533 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
534 	struct r600_texture *rtex;
535 	int i;
536 
537 	for (i = 0; i < state->nr_cbufs; i++) {
538 		if (!state->cbufs[i])
539 			continue;
540 
541 		rtex = (struct r600_texture*)state->cbufs[i]->texture;
542 		u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
543 		si_print_texture_info(sctx->b.screen, rtex, log);
544 		u_log_printf(log, "\n");
545 	}
546 
547 	if (state->zsbuf) {
548 		rtex = (struct r600_texture*)state->zsbuf->texture;
549 		u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
550 		si_print_texture_info(sctx->b.screen, rtex, log);
551 		u_log_printf(log, "\n");
552 	}
553 }
554 
555 typedef unsigned (*slot_remap_func)(unsigned);
556 
557 struct si_log_chunk_desc_list {
558 	/** Pointer to memory map of buffer where the list is uploader */
559 	uint32_t *gpu_list;
560 	/** Reference of buffer where the list is uploaded, so that gpu_list
561 	 * is kept live. */
562 	struct r600_resource *buf;
563 
564 	const char *shader_name;
565 	const char *elem_name;
566 	slot_remap_func slot_remap;
567 	enum chip_class chip_class;
568 	unsigned element_dw_size;
569 	unsigned num_elements;
570 
571 	uint32_t list[0];
572 };
573 
574 static void
si_log_chunk_desc_list_destroy(void * data)575 si_log_chunk_desc_list_destroy(void *data)
576 {
577 	struct si_log_chunk_desc_list *chunk = data;
578 	r600_resource_reference(&chunk->buf, NULL);
579 	FREE(chunk);
580 }
581 
582 static void
si_log_chunk_desc_list_print(void * data,FILE * f)583 si_log_chunk_desc_list_print(void *data, FILE *f)
584 {
585 	struct si_log_chunk_desc_list *chunk = data;
586 
587 	for (unsigned i = 0; i < chunk->num_elements; i++) {
588 		unsigned cpu_dw_offset = i * chunk->element_dw_size;
589 		unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
590 		const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
591 		uint32_t *cpu_list = chunk->list + cpu_dw_offset;
592 		uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
593 
594 		fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
595 			chunk->shader_name, chunk->elem_name, i, list_note);
596 
597 		switch (chunk->element_dw_size) {
598 		case 4:
599 			for (unsigned j = 0; j < 4; j++)
600 				ac_dump_reg(f, chunk->chip_class,
601 					    R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
602 					    gpu_list[j], 0xffffffff);
603 			break;
604 		case 8:
605 			for (unsigned j = 0; j < 8; j++)
606 				ac_dump_reg(f, chunk->chip_class,
607 					    R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
608 					    gpu_list[j], 0xffffffff);
609 
610 			fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
611 			for (unsigned j = 0; j < 4; j++)
612 				ac_dump_reg(f, chunk->chip_class,
613 					    R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
614 					    gpu_list[4+j], 0xffffffff);
615 			break;
616 		case 16:
617 			for (unsigned j = 0; j < 8; j++)
618 				ac_dump_reg(f, chunk->chip_class,
619 					    R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
620 					    gpu_list[j], 0xffffffff);
621 
622 			fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
623 			for (unsigned j = 0; j < 4; j++)
624 				ac_dump_reg(f, chunk->chip_class,
625 					    R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
626 					    gpu_list[4+j], 0xffffffff);
627 
628 			fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
629 			for (unsigned j = 0; j < 8; j++)
630 				ac_dump_reg(f, chunk->chip_class,
631 					    R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
632 					    gpu_list[8+j], 0xffffffff);
633 
634 			fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
635 			for (unsigned j = 0; j < 4; j++)
636 				ac_dump_reg(f, chunk->chip_class,
637 					    R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
638 					    gpu_list[12+j], 0xffffffff);
639 			break;
640 		}
641 
642 		if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
643 			fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
644 				COLOR_RESET "\n");
645 		}
646 
647 		fprintf(f, "\n");
648 	}
649 
650 }
651 
652 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
653 	.destroy = si_log_chunk_desc_list_destroy,
654 	.print = si_log_chunk_desc_list_print,
655 };
656 
si_dump_descriptor_list(struct si_screen * screen,struct si_descriptors * desc,const char * shader_name,const char * elem_name,unsigned element_dw_size,unsigned num_elements,slot_remap_func slot_remap,struct u_log_context * log)657 static void si_dump_descriptor_list(struct si_screen *screen,
658 				    struct si_descriptors *desc,
659 				    const char *shader_name,
660 				    const char *elem_name,
661 				    unsigned element_dw_size,
662 				    unsigned num_elements,
663 				    slot_remap_func slot_remap,
664 				    struct u_log_context *log)
665 {
666 	if (!desc->list)
667 		return;
668 
669 	/* In some cases, the caller doesn't know how many elements are really
670 	 * uploaded. Reduce num_elements to fit in the range of active slots. */
671 	unsigned active_range_dw_begin =
672 		desc->first_active_slot * desc->element_dw_size;
673 	unsigned active_range_dw_end =
674 		active_range_dw_begin + desc->num_active_slots * desc->element_dw_size;
675 
676 	while (num_elements > 0) {
677 		int i = slot_remap(num_elements - 1);
678 		unsigned dw_begin = i * element_dw_size;
679 		unsigned dw_end = dw_begin + element_dw_size;
680 
681 		if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end)
682 			break;
683 
684 		num_elements--;
685 	}
686 
687 	struct si_log_chunk_desc_list *chunk =
688 		CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
689 					     4 * element_dw_size * num_elements);
690 	chunk->shader_name = shader_name;
691 	chunk->elem_name = elem_name;
692 	chunk->element_dw_size = element_dw_size;
693 	chunk->num_elements = num_elements;
694 	chunk->slot_remap = slot_remap;
695 	chunk->chip_class = screen->info.chip_class;
696 
697 	r600_resource_reference(&chunk->buf, desc->buffer);
698 	chunk->gpu_list = desc->gpu_list;
699 
700 	for (unsigned i = 0; i < num_elements; ++i) {
701 		memcpy(&chunk->list[i * element_dw_size],
702 		       &desc->list[slot_remap(i) * element_dw_size],
703 		       4 * element_dw_size);
704 	}
705 
706 	u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
707 }
708 
si_identity(unsigned slot)709 static unsigned si_identity(unsigned slot)
710 {
711 	return slot;
712 }
713 
si_dump_descriptors(struct si_context * sctx,enum pipe_shader_type processor,const struct tgsi_shader_info * info,struct u_log_context * log)714 static void si_dump_descriptors(struct si_context *sctx,
715 				enum pipe_shader_type processor,
716 				const struct tgsi_shader_info *info,
717 				struct u_log_context *log)
718 {
719 	struct si_descriptors *descs =
720 		&sctx->descriptors[SI_DESCS_FIRST_SHADER +
721 				   processor * SI_NUM_SHADER_DESCS];
722 	static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
723 	const char *name = shader_name[processor];
724 	unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
725 	unsigned enabled_images;
726 
727 	if (info) {
728 		enabled_constbuf = info->const_buffers_declared;
729 		enabled_shaderbuf = info->shader_buffers_declared;
730 		enabled_samplers = info->samplers_declared;
731 		enabled_images = info->images_declared;
732 	} else {
733 		enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
734 				   SI_NUM_SHADER_BUFFERS;
735 		enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
736 				    u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
737 		enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
738 				    (32 - SI_NUM_SHADER_BUFFERS);
739 		enabled_samplers = sctx->samplers[processor].enabled_mask;
740 		enabled_images = sctx->images[processor].enabled_mask;
741 	}
742 
743 	if (processor == PIPE_SHADER_VERTEX) {
744 		assert(info); /* only CS may not have an info struct */
745 
746 		si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, name,
747 					" - Vertex buffer", 4, info->num_inputs,
748 					si_identity, log);
749 	}
750 
751 	si_dump_descriptor_list(sctx->screen,
752 				&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
753 				name, " - Constant buffer", 4,
754 				util_last_bit(enabled_constbuf),
755 				si_get_constbuf_slot, log);
756 	si_dump_descriptor_list(sctx->screen,
757 				&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
758 				name, " - Shader buffer", 4,
759 				util_last_bit(enabled_shaderbuf),
760 				si_get_shaderbuf_slot, log);
761 	si_dump_descriptor_list(sctx->screen,
762 				&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
763 				name, " - Sampler", 16,
764 				util_last_bit(enabled_samplers),
765 				si_get_sampler_slot, log);
766 	si_dump_descriptor_list(sctx->screen,
767 				&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
768 				name, " - Image", 8,
769 				util_last_bit(enabled_images),
770 				si_get_image_slot, log);
771 }
772 
si_dump_gfx_descriptors(struct si_context * sctx,const struct si_shader_ctx_state * state,struct u_log_context * log)773 static void si_dump_gfx_descriptors(struct si_context *sctx,
774 				    const struct si_shader_ctx_state *state,
775 				    struct u_log_context *log)
776 {
777 	if (!state->cso || !state->current)
778 		return;
779 
780 	si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
781 }
782 
si_dump_compute_descriptors(struct si_context * sctx,struct u_log_context * log)783 static void si_dump_compute_descriptors(struct si_context *sctx,
784 					struct u_log_context *log)
785 {
786 	if (!sctx->cs_shader_state.program)
787 		return;
788 
789 	si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
790 }
791 
792 struct si_shader_inst {
793 	char text[160];  /* one disasm line */
794 	unsigned offset; /* instruction offset */
795 	unsigned size;   /* instruction size = 4 or 8 */
796 };
797 
798 /* Split a disassembly string into lines and add them to the array pointed
799  * to by "instructions". */
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct si_shader_inst * instructions)800 static void si_add_split_disasm(const char *disasm,
801 				uint64_t start_addr,
802 				unsigned *num,
803 				struct si_shader_inst *instructions)
804 {
805 	struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
806 	char *next;
807 
808 	while ((next = strchr(disasm, '\n'))) {
809 		struct si_shader_inst *inst = &instructions[*num];
810 		unsigned len = next - disasm;
811 
812 		assert(len < ARRAY_SIZE(inst->text));
813 		memcpy(inst->text, disasm, len);
814 		inst->text[len] = 0;
815 		inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
816 
817 		const char *semicolon = strchr(disasm, ';');
818 		assert(semicolon);
819 		/* More than 16 chars after ";" means the instruction is 8 bytes long. */
820 		inst->size = next - semicolon > 16 ? 8 : 4;
821 
822 		snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
823 			" [PC=0x%"PRIx64", off=%u, size=%u]",
824 			start_addr + inst->offset, inst->offset, inst->size);
825 
826 		last_inst = inst;
827 		(*num)++;
828 		disasm = next + 1;
829 	}
830 }
831 
832 /* If the shader is being executed, print its asm instructions, and annotate
833  * those that are being executed right now with information about waves that
834  * execute them. This is most useful during a GPU hang.
835  */
si_print_annotated_shader(struct si_shader * shader,struct ac_wave_info * waves,unsigned num_waves,FILE * f)836 static void si_print_annotated_shader(struct si_shader *shader,
837 				      struct ac_wave_info *waves,
838 				      unsigned num_waves,
839 				      FILE *f)
840 {
841 	if (!shader || !shader->binary.disasm_string)
842 		return;
843 
844 	uint64_t start_addr = shader->bo->gpu_address;
845 	uint64_t end_addr = start_addr + shader->bo->b.b.width0;
846 	unsigned i;
847 
848 	/* See if any wave executes the shader. */
849 	for (i = 0; i < num_waves; i++) {
850 		if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
851 			break;
852 	}
853 	if (i == num_waves)
854 		return; /* the shader is not being executed */
855 
856 	/* Remember the first found wave. The waves are sorted according to PC. */
857 	waves = &waves[i];
858 	num_waves -= i;
859 
860 	/* Get the list of instructions.
861 	 * Buffer size / 4 is the upper bound of the instruction count.
862 	 */
863 	unsigned num_inst = 0;
864 	struct si_shader_inst *instructions =
865 		calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
866 
867 	if (shader->prolog) {
868 		si_add_split_disasm(shader->prolog->binary.disasm_string,
869 				    start_addr, &num_inst, instructions);
870 	}
871 	if (shader->previous_stage) {
872 		si_add_split_disasm(shader->previous_stage->binary.disasm_string,
873 				    start_addr, &num_inst, instructions);
874 	}
875 	if (shader->prolog2) {
876 		si_add_split_disasm(shader->prolog2->binary.disasm_string,
877 				    start_addr, &num_inst, instructions);
878 	}
879 	si_add_split_disasm(shader->binary.disasm_string,
880 			    start_addr, &num_inst, instructions);
881 	if (shader->epilog) {
882 		si_add_split_disasm(shader->epilog->binary.disasm_string,
883 				    start_addr, &num_inst, instructions);
884 	}
885 
886 	fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
887 		si_get_shader_name(shader, shader->selector->type));
888 
889 	/* Print instructions with annotations. */
890 	for (i = 0; i < num_inst; i++) {
891 		struct si_shader_inst *inst = &instructions[i];
892 
893 		fprintf(f, "%s\n", inst->text);
894 
895 		/* Print which waves execute the instruction right now. */
896 		while (num_waves && start_addr + inst->offset == waves->pc) {
897 			fprintf(f,
898 				"          " COLOR_GREEN "^ SE%u SH%u CU%u "
899 				"SIMD%u WAVE%u  EXEC=%016"PRIx64 "  ",
900 				waves->se, waves->sh, waves->cu, waves->simd,
901 				waves->wave, waves->exec);
902 
903 			if (inst->size == 4) {
904 				fprintf(f, "INST32=%08X" COLOR_RESET "\n",
905 					waves->inst_dw0);
906 			} else {
907 				fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
908 					waves->inst_dw0, waves->inst_dw1);
909 			}
910 
911 			waves->matched = true;
912 			waves = &waves[1];
913 			num_waves--;
914 		}
915 	}
916 
917 	fprintf(f, "\n\n");
918 	free(instructions);
919 }
920 
si_dump_annotated_shaders(struct si_context * sctx,FILE * f)921 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
922 {
923 	struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
924 	unsigned num_waves = ac_get_wave_info(waves);
925 
926 	fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
927 		"\n\n", num_waves);
928 
929 	si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
930 	si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
931 	si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
932 	si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
933 	si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
934 
935 	/* Print waves executing shaders that are not currently bound. */
936 	unsigned i;
937 	bool found = false;
938 	for (i = 0; i < num_waves; i++) {
939 		if (waves[i].matched)
940 			continue;
941 
942 		if (!found) {
943 			fprintf(f, COLOR_CYAN
944 				"Waves not executing currently-bound shaders:"
945 				COLOR_RESET "\n");
946 			found = true;
947 		}
948 		fprintf(f, "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016"PRIx64
949 			"  INST=%08X %08X  PC=%"PRIx64"\n",
950 			waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
951 			waves[i].wave, waves[i].exec, waves[i].inst_dw0,
952 			waves[i].inst_dw1, waves[i].pc);
953 	}
954 	if (found)
955 		fprintf(f, "\n\n");
956 }
957 
si_dump_command(const char * title,const char * command,FILE * f)958 static void si_dump_command(const char *title, const char *command, FILE *f)
959 {
960 	char line[2000];
961 
962 	FILE *p = popen(command, "r");
963 	if (!p)
964 		return;
965 
966 	fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
967 	while (fgets(line, sizeof(line), p))
968 		fputs(line, f);
969 	fprintf(f, "\n\n");
970 	pclose(p);
971 }
972 
si_dump_debug_state(struct pipe_context * ctx,FILE * f,unsigned flags)973 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
974 				unsigned flags)
975 {
976 	struct si_context *sctx = (struct si_context*)ctx;
977 
978 	if (sctx->b.log)
979 		u_log_flush(sctx->b.log);
980 
981 	if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
982 		si_dump_debug_registers(sctx, f);
983 
984 		si_dump_annotated_shaders(sctx, f);
985 		si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
986 		si_dump_command("Wave information", "umr -O bits -wa", f);
987 	}
988 }
989 
si_log_draw_state(struct si_context * sctx,struct u_log_context * log)990 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
991 {
992 	if (!log)
993 		return;
994 
995 	si_dump_framebuffer(sctx, log);
996 
997 	si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
998 	si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
999 	si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
1000 	si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
1001 	si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
1002 
1003 	si_dump_descriptor_list(sctx->screen,
1004 				&sctx->descriptors[SI_DESCS_RW_BUFFERS],
1005 				"", "RW buffers", 4, SI_NUM_RW_BUFFERS,
1006 				si_identity, log);
1007 	si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
1008 	si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
1009 	si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
1010 	si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
1011 	si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
1012 }
1013 
si_log_compute_state(struct si_context * sctx,struct u_log_context * log)1014 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
1015 {
1016 	if (!log)
1017 		return;
1018 
1019 	si_dump_compute_shader(sctx, log);
1020 	si_dump_compute_descriptors(sctx, log);
1021 }
1022 
si_dump_dma(struct si_context * sctx,struct radeon_saved_cs * saved,FILE * f)1023 static void si_dump_dma(struct si_context *sctx,
1024 			struct radeon_saved_cs *saved, FILE *f)
1025 {
1026 	static const char ib_name[] = "sDMA IB";
1027 	unsigned i;
1028 
1029 	si_dump_bo_list(sctx, saved, f);
1030 
1031 	fprintf(f, "------------------ %s begin ------------------\n", ib_name);
1032 
1033 	for (i = 0; i < saved->num_dw; ++i) {
1034 		fprintf(f, " %08x\n", saved->ib[i]);
1035 	}
1036 
1037 	fprintf(f, "------------------- %s end -------------------\n", ib_name);
1038 	fprintf(f, "\n");
1039 
1040 	fprintf(f, "SDMA Dump Done.\n");
1041 }
1042 
si_check_vm_faults(struct r600_common_context * ctx,struct radeon_saved_cs * saved,enum ring_type ring)1043 void si_check_vm_faults(struct r600_common_context *ctx,
1044 			struct radeon_saved_cs *saved, enum ring_type ring)
1045 {
1046 	struct si_context *sctx = (struct si_context *)ctx;
1047 	struct pipe_screen *screen = sctx->b.b.screen;
1048 	FILE *f;
1049 	uint64_t addr;
1050 	char cmd_line[4096];
1051 
1052 	if (!ac_vm_fault_occured(sctx->b.chip_class,
1053 				 &sctx->dmesg_timestamp, &addr))
1054 		return;
1055 
1056 	f = dd_get_debug_file(false);
1057 	if (!f)
1058 		return;
1059 
1060 	fprintf(f, "VM fault report.\n\n");
1061 	if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1062 		fprintf(f, "Command: %s\n", cmd_line);
1063 	fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1064 	fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1065 	fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1066 	fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1067 
1068 	if (sctx->apitrace_call_number)
1069 		fprintf(f, "Last apitrace call: %u\n\n",
1070 			sctx->apitrace_call_number);
1071 
1072 	switch (ring) {
1073 	case RING_GFX: {
1074 		struct u_log_context log;
1075 		u_log_context_init(&log);
1076 
1077 		si_log_draw_state(sctx, &log);
1078 		si_log_compute_state(sctx, &log);
1079 		si_log_cs(sctx, &log, true);
1080 
1081 		u_log_new_page_print(&log, f);
1082 		u_log_context_destroy(&log);
1083 		break;
1084 	}
1085 	case RING_DMA:
1086 		si_dump_dma(sctx, saved, f);
1087 		break;
1088 
1089 	default:
1090 		break;
1091 	}
1092 
1093 	fclose(f);
1094 
1095 	fprintf(stderr, "Detected a VM fault, exiting...\n");
1096 	exit(0);
1097 }
1098 
si_init_debug_functions(struct si_context * sctx)1099 void si_init_debug_functions(struct si_context *sctx)
1100 {
1101 	sctx->b.b.dump_debug_state = si_dump_debug_state;
1102 	sctx->b.check_vm_faults = si_check_vm_faults;
1103 
1104 	/* Set the initial dmesg timestamp for this context, so that
1105 	 * only new messages will be checked for VM faults.
1106 	 */
1107 	if (sctx->screen->debug_flags & DBG(CHECK_VM))
1108 		ac_vm_fault_occured(sctx->b.chip_class,
1109 				    &sctx->dmesg_timestamp, NULL);
1110 }
1111