1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <sys/utsname.h>
31 #include <sys/stat.h>
32 
33 #include "util/mesa-sha1.h"
34 #include "sid.h"
35 #include "ac_debug.h"
36 #include "radv_debug.h"
37 #include "radv_shader.h"
38 
39 #define TRACE_BO_SIZE 4096
40 #define TMA_BO_SIZE 4096
41 
42 #define COLOR_RESET	"\033[0m"
43 #define COLOR_RED	"\033[31m"
44 #define COLOR_GREEN	"\033[1;32m"
45 #define COLOR_YELLOW	"\033[1;33m"
46 #define COLOR_CYAN	"\033[1;36m"
47 
48 #define RADV_DUMP_DIR "radv_dumps"
49 
50 /* Trace BO layout (offsets are 4 bytes):
51  *
52  * [0]: primary trace ID
53  * [1]: secondary trace ID
54  * [2-3]: 64-bit GFX ring pipeline pointer
55  * [4-5]: 64-bit COMPUTE ring pipeline pointer
56  * [6-7]: 64-bit descriptor set #0 pointer
57  * ...
58  * [68-69]: 64-bit descriptor set #31 pointer
59  */
60 
61 bool
radv_init_trace(struct radv_device * device)62 radv_init_trace(struct radv_device *device)
63 {
64 	struct radeon_winsys *ws = device->ws;
65 
66 	device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
67 					     RADEON_DOMAIN_VRAM,
68 					     RADEON_FLAG_CPU_ACCESS|
69 					     RADEON_FLAG_NO_INTERPROCESS_SHARING |
70 					     RADEON_FLAG_ZERO_VRAM,
71 					     RADV_BO_PRIORITY_UPLOAD_BUFFER);
72 	if (!device->trace_bo)
73 		return false;
74 
75 	device->trace_id_ptr = ws->buffer_map(device->trace_bo);
76 	if (!device->trace_id_ptr)
77 		return false;
78 
79 	ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
80 			    &device->dmesg_timestamp, NULL);
81 
82 	return true;
83 }
84 
85 static void
radv_dump_trace(struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)86 radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
87 {
88 	fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
89 	device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
90 }
91 
92 static void
radv_dump_mmapped_reg(struct radv_device * device,FILE * f,unsigned offset)93 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
94 {
95 	struct radeon_winsys *ws = device->ws;
96 	uint32_t value;
97 
98 	if (ws->read_registers(ws, offset, 1, &value))
99 		ac_dump_reg(f, device->physical_device->rad_info.chip_class,
100 			    offset, value, ~0);
101 }
102 
103 static void
radv_dump_debug_registers(struct radv_device * device,FILE * f)104 radv_dump_debug_registers(struct radv_device *device, FILE *f)
105 {
106 	struct radeon_info *info = &device->physical_device->rad_info;
107 
108 	fprintf(f, "Memory-mapped registers:\n");
109 	radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
110 
111 	/* No other registers can be read on DRM < 3.1.0. */
112 	if (info->drm_minor < 1) {
113 		fprintf(f, "\n");
114 		return;
115 	}
116 
117 	radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
118 	radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
119 	radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
120 	radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
121 	radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
122 	radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
123 	radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
124 	if (info->chip_class <= GFX8) {
125 		radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
126 		radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
127 		radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
128 	}
129 	radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
130 	radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
131 	radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
132 	radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
133 	radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
134 	radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
135 	radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
136 	radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
137 	radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
138 	radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
139 	fprintf(f, "\n");
140 }
141 
142 static void
radv_dump_buffer_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)143 radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc,
144 			    FILE *f)
145 {
146 	fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
147 	for (unsigned j = 0; j < 4; j++)
148 		ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4,
149 			    desc[j], 0xffffffff);
150 }
151 
152 static void
radv_dump_image_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)153 radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc,
154 			   FILE *f)
155 {
156 	unsigned sq_img_rsrc_word0 = chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0
157 							 : R_008F10_SQ_IMG_RSRC_WORD0;
158 
159 	fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
160 	for (unsigned j = 0; j < 8; j++)
161 		ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
162 			    desc[j], 0xffffffff);
163 
164 	fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
165 	for (unsigned j = 0; j < 8; j++)
166 		ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
167 			    desc[8 + j], 0xffffffff);
168 }
169 
170 static void
radv_dump_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)171 radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
172 			     FILE *f)
173 {
174 	fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
175 	for (unsigned j = 0; j < 4; j++) {
176 		ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4,
177 			    desc[j], 0xffffffff);
178 	}
179 }
180 
181 static void
radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)182 radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,
183 					    const uint32_t *desc, FILE *f)
184 {
185 	radv_dump_image_descriptor(chip_class, desc, f);
186 	radv_dump_sampler_descriptor(chip_class, desc + 16, f);
187 }
188 
189 static void
radv_dump_descriptor_set(struct radv_device * device,struct radv_descriptor_set * set,unsigned id,FILE * f)190 radv_dump_descriptor_set(struct radv_device *device,
191 			 struct radv_descriptor_set *set, unsigned id, FILE *f)
192 {
193 	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
194 	const struct radv_descriptor_set_layout *layout;
195 	int i;
196 
197 	if (!set)
198 		return;
199 	layout = set->layout;
200 
201 	for (i = 0; i < set->layout->binding_count; i++) {
202 		uint32_t *desc =
203 			set->mapped_ptr + layout->binding[i].offset / 4;
204 
205 		switch (layout->binding[i].type) {
206 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
207 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
208 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
209 		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
210 			radv_dump_buffer_descriptor(chip_class, desc, f);
211 			break;
212 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
213 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
214 		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
215 			radv_dump_image_descriptor(chip_class, desc, f);
216 			break;
217 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
218 			radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
219 			break;
220 		case VK_DESCRIPTOR_TYPE_SAMPLER:
221 			radv_dump_sampler_descriptor(chip_class, desc, f);
222 			break;
223 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
224 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
225 			/* todo */
226 			break;
227 		default:
228 			assert(!"unknown descriptor type");
229 			break;
230 		}
231 		fprintf(f, "\n");
232 	}
233 	fprintf(f, "\n\n");
234 }
235 
236 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)237 radv_dump_descriptors(struct radv_device *device, FILE *f)
238 {
239 	uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
240 	int i;
241 
242 	fprintf(f, "Descriptors:\n");
243 	for (i = 0; i < MAX_SETS; i++) {
244 		struct radv_descriptor_set *set =
245 			*(struct radv_descriptor_set **)(ptr + i + 3);
246 
247 		radv_dump_descriptor_set(device, set, i, f);
248 	}
249 }
250 
251 struct radv_shader_inst {
252 	char text[160];  /* one disasm line */
253 	unsigned offset; /* instruction offset */
254 	unsigned size;   /* instruction size = 4 or 8 */
255 };
256 
257 /* Split a disassembly string into lines and add them to the array pointed
258  * to by "instructions". */
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)259 static void si_add_split_disasm(const char *disasm,
260 				uint64_t start_addr,
261 				unsigned *num,
262 				struct radv_shader_inst *instructions)
263 {
264 	struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
265 	char *next;
266 
267 	while ((next = strchr(disasm, '\n'))) {
268 		struct radv_shader_inst *inst = &instructions[*num];
269 		unsigned len = next - disasm;
270 
271 		if (!memchr(disasm, ';', len)) {
272 			/* Ignore everything that is not an instruction. */
273 			disasm = next + 1;
274 			continue;
275 		}
276 
277 		assert(len < ARRAY_SIZE(inst->text));
278 		memcpy(inst->text, disasm, len);
279 		inst->text[len] = 0;
280 		inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
281 
282 		const char *semicolon = strchr(disasm, ';');
283 		assert(semicolon);
284 		/* More than 16 chars after ";" means the instruction is 8 bytes long. */
285 		inst->size = next - semicolon > 16 ? 8 : 4;
286 
287 		snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
288 			" [PC=0x%"PRIx64", off=%u, size=%u]",
289 			start_addr + inst->offset, inst->offset, inst->size);
290 
291 		last_inst = inst;
292 		(*num)++;
293 		disasm = next + 1;
294 	}
295 }
296 
297 static void
radv_dump_annotated_shader(struct radv_shader_variant * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)298 radv_dump_annotated_shader(struct radv_shader_variant *shader,
299 			   gl_shader_stage stage, struct ac_wave_info *waves,
300 			   unsigned num_waves, FILE *f)
301 {
302 	uint64_t start_addr, end_addr;
303 	unsigned i;
304 
305 	if (!shader)
306 		return;
307 
308 	start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
309 	end_addr = start_addr + shader->code_size;
310 
311 	/* See if any wave executes the shader. */
312 	for (i = 0; i < num_waves; i++) {
313 		if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
314 			break;
315 	}
316 
317 	if (i == num_waves)
318 		return; /* the shader is not being executed */
319 
320 	/* Remember the first found wave. The waves are sorted according to PC. */
321 	waves = &waves[i];
322 	num_waves -= i;
323 
324 	/* Get the list of instructions.
325 	 * Buffer size / 4 is the upper bound of the instruction count.
326 	 */
327 	unsigned num_inst = 0;
328 	struct radv_shader_inst *instructions =
329 		calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
330 
331 	si_add_split_disasm(shader->disasm_string,
332 			    start_addr, &num_inst, instructions);
333 
334 	fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
335 		radv_get_shader_name(&shader->info, stage));
336 
337 	/* Print instructions with annotations. */
338 	for (i = 0; i < num_inst; i++) {
339 		struct radv_shader_inst *inst = &instructions[i];
340 
341 		fprintf(f, "%s\n", inst->text);
342 
343 		/* Print which waves execute the instruction right now. */
344 		while (num_waves && start_addr + inst->offset == waves->pc) {
345 			fprintf(f,
346 				"          " COLOR_GREEN "^ SE%u SH%u CU%u "
347 				"SIMD%u WAVE%u  EXEC=%016"PRIx64 "  ",
348 				waves->se, waves->sh, waves->cu, waves->simd,
349 				waves->wave, waves->exec);
350 
351 			if (inst->size == 4) {
352 				fprintf(f, "INST32=%08X" COLOR_RESET "\n",
353 					waves->inst_dw0);
354 			} else {
355 				fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
356 					waves->inst_dw0, waves->inst_dw1);
357 			}
358 
359 			waves->matched = true;
360 			waves = &waves[1];
361 			num_waves--;
362 		}
363 	}
364 
365 	fprintf(f, "\n\n");
366 	free(instructions);
367 }
368 
369 static void
radv_dump_annotated_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)370 radv_dump_annotated_shaders(struct radv_pipeline *pipeline,
371 			    VkShaderStageFlagBits active_stages, FILE *f)
372 {
373 	struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
374 	enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
375 	unsigned num_waves = ac_get_wave_info(chip_class, waves);
376 
377 	fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
378 		"\n\n", num_waves);
379 
380 	/* Dump annotated active graphics shaders. */
381 	while (active_stages) {
382 		int stage = u_bit_scan(&active_stages);
383 
384 		radv_dump_annotated_shader(pipeline->shaders[stage],
385 					   stage, waves, num_waves, f);
386 	}
387 
388 	/* Print waves executing shaders that are not currently bound. */
389 	unsigned i;
390 	bool found = false;
391 	for (i = 0; i < num_waves; i++) {
392 		if (waves[i].matched)
393 			continue;
394 
395 		if (!found) {
396 			fprintf(f, COLOR_CYAN
397 				"Waves not executing currently-bound shaders:"
398 				COLOR_RESET "\n");
399 			found = true;
400 		}
401 		fprintf(f, "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016"PRIx64
402 			"  INST=%08X %08X  PC=%"PRIx64"\n",
403 			waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
404 			waves[i].wave, waves[i].exec, waves[i].inst_dw0,
405 			waves[i].inst_dw1, waves[i].pc);
406 	}
407 	if (found)
408 		fprintf(f, "\n\n");
409 }
410 
411 static void
radv_dump_shader(struct radv_pipeline * pipeline,struct radv_shader_variant * shader,gl_shader_stage stage,FILE * f)412 radv_dump_shader(struct radv_pipeline *pipeline,
413 		 struct radv_shader_variant *shader, gl_shader_stage stage,
414 		 FILE *f)
415 {
416 	if (!shader)
417 		return;
418 
419 	fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
420 
421 	if (shader->spirv) {
422 		unsigned char sha1[21];
423 		char sha1buf[41];
424 
425 		_mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
426 		_mesa_sha1_format(sha1buf, sha1);
427 
428 		fprintf(f, "SPIRV (sha1: %s):\n", sha1buf);
429 		radv_print_spirv(shader->spirv, shader->spirv_size, f);
430 	}
431 
432 	if (shader->nir_string) {
433 		fprintf(f, "NIR:\n%s\n", shader->nir_string);
434 	}
435 
436 	fprintf(f, "%s IR:\n%s\n",
437 		pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
438 		shader->ir_string);
439 	fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
440 
441 	radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
442 }
443 
444 static void
radv_dump_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)445 radv_dump_shaders(struct radv_pipeline *pipeline,
446 		  VkShaderStageFlagBits active_stages, FILE *f)
447 {
448 	/* Dump active graphics shaders. */
449 	while (active_stages) {
450 		int stage = u_bit_scan(&active_stages);
451 
452 		radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
453 	}
454 }
455 
456 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum ring_type ring)457 radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
458 {
459 	uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
460 	int offset = ring == RING_GFX ? 1 : 2;
461 
462 	return *(struct radv_pipeline **)(ptr + offset);
463 }
464 
465 static void
radv_dump_queue_state(struct radv_queue * queue,FILE * f)466 radv_dump_queue_state(struct radv_queue *queue, FILE *f)
467 {
468 	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
469 	struct radv_pipeline *pipeline;
470 
471 	fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
472 
473 	pipeline = radv_get_saved_pipeline(queue->device, ring);
474 	if (pipeline) {
475 		radv_dump_shaders(pipeline, pipeline->active_stages, f);
476 		radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
477 		radv_dump_descriptors(queue->device, f);
478 	}
479 }
480 
481 static void
radv_dump_cmd(const char * cmd,FILE * f)482 radv_dump_cmd(const char *cmd, FILE *f)
483 {
484 	char line[2048];
485 	FILE *p;
486 
487 	p = popen(cmd, "r");
488 	if (p) {
489 		while (fgets(line, sizeof(line), p))
490 			fputs(line, f);
491 		fprintf(f, "\n");
492 		pclose(p);
493 	}
494 }
495 
496 static void
radv_dump_dmesg(FILE * f)497 radv_dump_dmesg(FILE *f)
498 {
499 	fprintf(f, "\nLast 60 lines of dmesg:\n\n");
500 	radv_dump_cmd("dmesg | tail -n60", f);
501 }
502 
503 void
radv_dump_enabled_options(struct radv_device * device,FILE * f)504 radv_dump_enabled_options(struct radv_device *device, FILE *f)
505 {
506 	uint64_t mask;
507 
508 	if (device->instance->debug_flags) {
509 		fprintf(f, "Enabled debug options: ");
510 
511 		mask = device->instance->debug_flags;
512 		while (mask) {
513 			int i = u_bit_scan64(&mask);
514 			fprintf(f, "%s, ", radv_get_debug_option_name(i));
515 		}
516 		fprintf(f, "\n");
517 	}
518 
519 	if (device->instance->perftest_flags) {
520 		fprintf(f, "Enabled perftest options: ");
521 
522 		mask = device->instance->perftest_flags;
523 		while (mask) {
524 			int i = u_bit_scan64(&mask);
525 			fprintf(f, "%s, ", radv_get_perftest_option_name(i));
526 		}
527 		fprintf(f, "\n");
528 	}
529 }
530 
531 static void
radv_dump_device_name(struct radv_device * device,FILE * f)532 radv_dump_device_name(struct radv_device *device, FILE *f)
533 {
534 	struct radeon_info *info = &device->physical_device->rad_info;
535 	char kernel_version[128] = {0};
536 	struct utsname uname_data;
537 	const char *chip_name;
538 
539 	chip_name = device->ws->get_chip_name(device->ws);
540 
541 	if (uname(&uname_data) == 0)
542 		snprintf(kernel_version, sizeof(kernel_version),
543 			 " / %s", uname_data.release);
544 
545 	fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n",
546 		chip_name, device->physical_device->name,
547 		info->drm_major, info->drm_minor, info->drm_patchlevel,
548 		kernel_version);
549 }
550 
551 static void
radv_dump_umr_ring(struct radv_queue * queue,FILE * f)552 radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
553 {
554 	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
555 	struct radv_device *device = queue->device;
556 	char cmd[128];
557 
558 	/* TODO: Dump compute ring. */
559 	if (ring != RING_GFX)
560 		return;
561 
562 	sprintf(cmd, "umr -R %s 2>&1",
563 		device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
564 
565 	fprintf(f, "\nUMR GFX ring:\n\n");
566 	radv_dump_cmd(cmd, f);
567 }
568 
569 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)570 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
571 {
572 	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
573 	struct radv_device *device = queue->device;
574 	char cmd[128];
575 
576 	/* TODO: Dump compute ring. */
577 	if (ring != RING_GFX)
578 		return;
579 
580 	sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
581 		device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
582 
583 	fprintf(f, "\nUMR GFX waves:\n\n");
584 	radv_dump_cmd(cmd, f);
585 }
586 
587 static bool
radv_gpu_hang_occured(struct radv_queue * queue,enum ring_type ring)588 radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
589 {
590 	struct radeon_winsys *ws = queue->device->ws;
591 
592 	if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
593 		return true;
594 
595 	return false;
596 }
597 
598 void
radv_check_gpu_hangs(struct radv_queue * queue,struct radeon_cmdbuf * cs)599 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
600 {
601 	struct radv_device *device = queue->device;
602 	char dump_dir[256], dump_path[512];
603 	enum ring_type ring;
604 	uint64_t addr;
605 	FILE *f;
606 
607 	ring = radv_queue_family_to_ring(queue->queue_family_index);
608 
609 	bool hang_occurred = radv_gpu_hang_occured(queue, ring);
610 	bool vm_fault_occurred = false;
611 	if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
612 		vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
613 		                                        &device->dmesg_timestamp, &addr);
614 	if (!hang_occurred && !vm_fault_occurred)
615 		return;
616 
617 	fprintf(stderr, "radv: GPU hang detected...\n");
618 
619 	/* Create a directory into $HOME/radv_dumps_<pid> to save various
620 	 * debugging info about that GPU hang.
621 	 */
622 	snprintf(dump_dir, sizeof(dump_dir), "%s/"RADV_DUMP_DIR"_%d",
623 		 debug_get_option("HOME", "."), getpid());
624 	if (mkdir(dump_dir, 0774) && errno != EEXIST) {
625 		fprintf(stderr, "radv: can't create directory '%s' (%i).\n",
626 			dump_dir, errno);
627 		abort();
628 	}
629 
630 	/* Dump trace file. */
631 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
632 	f = fopen(dump_path, "w+");
633 	if (f) {
634 		radv_dump_trace(queue->device, cs, f);
635 		fclose(f);
636 	}
637 
638 	/* Dump pipeline state. */
639 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
640 	f = fopen(dump_path, "w+");
641 	if (f) {
642 		radv_dump_queue_state(queue, f);
643 		fclose(f);
644 	}
645 
646 	/* Dump UMR ring. */
647 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
648 	f = fopen(dump_path, "w+");
649 	if (f) {
650 		radv_dump_umr_ring(queue, f);
651 		fclose(f);
652 	}
653 
654 	/* Dump UMR waves. */
655 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
656 	f = fopen(dump_path, "w+");
657 	if (f) {
658 		radv_dump_umr_waves(queue, f);
659 		fclose(f);
660 	}
661 
662 	/* Dump debug registers. */
663 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
664 	f = fopen(dump_path, "w+");
665 	if (f) {
666 		radv_dump_debug_registers(device, f);
667 		fclose(f);
668 	}
669 
670 	/* Dump VM fault info. */
671 	if (vm_fault_occurred) {
672 		snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
673 		f = fopen(dump_path, "w+");
674 		if (f) {
675 			fprintf(f, "VM fault report.\n\n");
676 			fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
677 			fclose(f);
678 		}
679 	}
680 
681 	/* Dump enabled debug/perftest options. */
682 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "options.log");
683 	f = fopen(dump_path, "w+");
684 	if (f) {
685 		radv_dump_enabled_options(device, f);
686 		fclose(f);
687 	}
688 
689 	/* Dump GPU info. */
690 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
691 	f = fopen(dump_path, "w+");
692 	if (f) {
693 		radv_dump_device_name(device, f);
694 		ac_print_gpu_info(&device->physical_device->rad_info, f);
695 		fclose(f);
696 	}
697 
698 	/* Dump dmesg. */
699 	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
700 	f = fopen(dump_path, "w+");
701 	if (f) {
702 		radv_dump_dmesg(f);
703 		fclose(f);
704 	}
705 
706 	fprintf(stderr, "radv: GPU hang report saved to '%s'!\n", dump_dir);
707 	abort();
708 }
709 
710 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)711 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
712 {
713 	char path[] = "/tmp/fileXXXXXX";
714 	char command[128];
715 	int fd;
716 
717 	/* Dump the binary into a temporary file. */
718 	fd = mkstemp(path);
719 	if (fd < 0)
720 		return;
721 
722 	if (write(fd, data, size) == -1)
723 		goto fail;
724 
725 	/* Disassemble using spirv-dis if installed. */
726 	sprintf(command, "spirv-dis %s", path);
727 	radv_dump_cmd(command, fp);
728 
729 fail:
730 	close(fd);
731 	unlink(path);
732 }
733 
734 bool
radv_trap_handler_init(struct radv_device * device)735 radv_trap_handler_init(struct radv_device *device)
736 {
737 	struct radeon_winsys *ws = device->ws;
738 
739 	/* Create the trap handler shader and upload it like other shaders. */
740 	device->trap_handler_shader = radv_create_trap_handler_shader(device);
741 	if (!device->trap_handler_shader) {
742 		fprintf(stderr, "radv: failed to create the trap handler shader.\n");
743 		return false;
744 	}
745 
746 	device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 256,
747 					   RADEON_DOMAIN_VRAM,
748 					   RADEON_FLAG_CPU_ACCESS |
749 					   RADEON_FLAG_NO_INTERPROCESS_SHARING |
750 					   RADEON_FLAG_ZERO_VRAM |
751 					   RADEON_FLAG_32BIT,
752 					   RADV_BO_PRIORITY_SCRATCH);
753 	if (!device->tma_bo)
754 		return false;
755 
756 	device->tma_ptr = ws->buffer_map(device->tma_bo);
757 	if (!device->tma_ptr)
758 		return false;
759 
760 	/* Upload a buffer descriptor to store various info from the trap. */
761 	uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
762 	uint32_t desc[4];
763 
764 	desc[0] = tma_va;
765 	desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
766 	desc[2] = TMA_BO_SIZE;
767 	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
768 		  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
769 		  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
770 		  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
771 		  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
772 
773 	memcpy(device->tma_ptr, desc, sizeof(desc));
774 
775 	return true;
776 }
777 
778 void
radv_trap_handler_finish(struct radv_device * device)779 radv_trap_handler_finish(struct radv_device *device)
780 {
781 	struct radeon_winsys *ws = device->ws;
782 
783 	if (unlikely(device->trap_handler_shader))
784 		radv_shader_variant_destroy(device, device->trap_handler_shader);
785 
786 	if (unlikely(device->tma_bo))
787 		ws->buffer_destroy(device->tma_bo);
788 }
789 
790 static struct radv_shader_variant *
radv_get_faulty_shader(struct radv_device * device,uint64_t faulty_pc)791 radv_get_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
792 {
793 	struct radv_shader_variant *shader = NULL;
794 
795 	mtx_lock(&device->shader_slab_mutex);
796 	list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
797 		list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
798 			uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);
799 			uint64_t va = radv_buffer_get_va(s->bo);
800 
801 			if (faulty_pc >= va + s->bo_offset && faulty_pc < va + offset) {
802 				mtx_unlock(&device->shader_slab_mutex);
803 				return s;
804 			}
805 		}
806 	}
807 	mtx_unlock(&device->shader_slab_mutex);
808 
809 	return shader;
810 }
811 
812 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)813 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
814 {
815 	struct radv_shader_variant *shader;
816 	uint64_t start_addr, end_addr;
817 	uint32_t instr_offset;
818 
819 	shader = radv_get_faulty_shader(device, faulty_pc);
820 	if (!shader)
821 		return;
822 
823 	start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
824 	end_addr = start_addr + shader->code_size;
825 	instr_offset = faulty_pc - start_addr;
826 
827 	fprintf(stderr, "Faulty shader found "
828 			"VA=[0x%"PRIx64"-0x%"PRIx64"], instr_offset=%d\n",
829 		start_addr, end_addr, instr_offset);
830 
831 	/* Get the list of instructions.
832 	 * Buffer size / 4 is the upper bound of the instruction count.
833 	 */
834 	unsigned num_inst = 0;
835 	struct radv_shader_inst *instructions =
836 		calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
837 
838 	/* Split the disassembly string into instructions. */
839 	si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
840 
841 	/* Print instructions with annotations. */
842 	for (unsigned i = 0; i < num_inst; i++) {
843 		struct radv_shader_inst *inst = &instructions[i];
844 
845 		if (start_addr + inst->offset == faulty_pc) {
846 			fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
847 			fprintf(stderr, "%s\n", inst->text);
848 			fprintf(stderr, "\n");
849 		} else {
850 			fprintf(stderr, "%s\n", inst->text);
851 		}
852 	}
853 
854 	free(instructions);
855 }
856 
857 struct radv_sq_hw_reg {
858 	uint32_t status;
859 	uint32_t trap_sts;
860 	uint32_t hw_id;
861 	uint32_t ib_sts;
862 };
863 
864 static void
radv_dump_sq_hw_regs(struct radv_device * device)865 radv_dump_sq_hw_regs(struct radv_device *device)
866 {
867 	struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
868 
869 	fprintf(stderr, "\nHardware registers:\n");
870 	if (device->physical_device->rad_info.chip_class >= GFX10) {
871 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
872 			    R_000408_SQ_WAVE_STATUS, regs->status, ~0);
873 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
874 			    R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
875 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
876 			    R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
877 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
878 			    R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
879 	} else {
880 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
881 			    R_000048_SQ_WAVE_STATUS, regs->status, ~0);
882 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
883 			    R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
884 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
885 			    R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0);
886 		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
887 			    R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
888 	}
889 	fprintf(stderr, "\n\n");
890 }
891 
892 void
radv_check_trap_handler(struct radv_queue * queue)893 radv_check_trap_handler(struct radv_queue *queue)
894 {
895 	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
896 	struct radv_device *device = queue->device;
897 	struct radeon_winsys *ws = device->ws;
898 
899 	/* Wait for the context to be idle in a finite time. */
900 	ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx);
901 
902 	/* Try to detect if the trap handler has been reached by the hw by
903 	 * looking at ttmp0 which should be non-zero if a shader exception
904 	 * happened.
905 	 */
906 	if (!device->tma_ptr[4])
907 		return;
908 
909 #if 0
910 	fprintf(stderr, "tma_ptr:\n");
911 	for (unsigned i = 0; i < 10; i++)
912 		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
913 #endif
914 
915 	radv_dump_sq_hw_regs(device);
916 
917 	uint32_t ttmp0 = device->tma_ptr[4];
918 	uint32_t ttmp1 = device->tma_ptr[5];
919 
920 	/* According to the ISA docs, 3.10 Trap and Exception Registers:
921 	 *
922 	 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
923 	 *
924 	 * "When the trap handler is entered, the PC of the faulting
925 	 *  instruction is: (PC - PC_rewind * 4)."
926 	 * */
927 	uint8_t trap_id = (ttmp1 >> 16) & 0xff;
928 	uint8_t ht = (ttmp1 >> 24) & 0x1;
929 	uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
930 	uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
931 
932 	fprintf(stderr, "PC=0x%"PRIx64", trapID=%d, HT=%d, PC_rewind=%d\n",
933 		pc, trap_id, ht, pc_rewind);
934 
935 	radv_dump_faulty_shader(device, pc);
936 
937 	abort();
938 }
939