1 /*
2  * Copyright © 2020 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /*
25  * Decoder for devcoredump traces from drm/msm.  In case of a gpu crash/hang,
26  * the coredump should be found in:
27  *
28  *    /sys/class/devcoredump/devcd<n>/data
29  *
30  * The crashdump will hang around for 5min, it can be cleared by writing to
31  * the file, ie:
32  *
33  *    echo 1 > /sys/class/devcoredump/devcd<n>/data
34  *
35  * (the driver won't log any new crashdumps until the previous one is cleared
36  * or times out after 5min)
37  */
38 
39 
40 #include <assert.h>
41 #include <getopt.h>
42 #include <inttypes.h>
43 #include <stdarg.h>
44 #include <stdbool.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 
51 #include "buffers.h"
52 #include "cffdec.h"
53 #include "disasm.h"
54 #include "pager.h"
55 #include "rnnutil.h"
56 #include "util.h"
57 #include "ir3/instr-a3xx.h"
58 
59 
60 static FILE *in;
61 static bool verbose;
62 
63 static struct rnn *rnn_gmu;
64 static struct rnn *rnn_control;
65 static struct rnn *rnn_pipe;
66 
67 static struct cffdec_options options = {
68 	.draw_filter = -1,
69 };
70 
is_a6xx(void)71 static inline bool is_a6xx(void) { return (600 <= options.gpu_id) && (options.gpu_id < 700); }
is_a5xx(void)72 static inline bool is_a5xx(void) { return (500 <= options.gpu_id) && (options.gpu_id < 600); }
is_64b(void)73 static inline bool is_64b(void)  { return options.gpu_id >= 500; }
74 
75 /*
76  * Helpers to read register values:
77  */
78 
79 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
80 static uint64_t
regval64(const char * name)81 regval64(const char *name)
82 {
83 	unsigned reg = regbase(name);
84 	assert(reg);
85 	uint64_t val = reg_val(reg);
86 	if (is_64b())
87 		val |= ((uint64_t)reg_val(reg + 1)) << 32;
88 	return val;
89 }
90 
91 static uint32_t
regval(const char * name)92 regval(const char *name)
93 {
94 	unsigned reg = regbase(name);
95 	assert(reg);
96 	return reg_val(reg);
97 }
98 
99 /*
100  * Line reading and string helpers:
101  */
102 
103 static char *
replacestr(char * line,const char * find,const char * replace)104 replacestr(char *line, const char *find, const char *replace)
105 {
106 	char *tail, *s;
107 
108 	if (!(s = strstr(line, find)))
109 		return line;
110 
111 	tail = s + strlen(find);
112 
113 	char *newline;
114 	asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
115 	free(line);
116 
117 	return newline;
118 }
119 
120 static char *lastline;
121 static char *pushedline;
122 
123 static const char *
popline(void)124 popline(void)
125 {
126 	char *r = pushedline;
127 
128 	if (r) {
129 		pushedline = NULL;
130 		return r;
131 	}
132 
133 	free(lastline);
134 
135 	size_t n = 0;
136 	if (getline(&r, &n, in) < 0)
137 		exit(0);
138 
139 	/* Handle section name typo's from earlier kernels: */
140 	r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
141 	r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
142 
143 	lastline = r;
144 	return r;
145 }
146 
147 static void
pushline(void)148 pushline(void)
149 {
150 	assert(!pushedline);
151 	pushedline = lastline;
152 }
153 
154 static uint32_t *
popline_ascii85(uint32_t sizedwords)155 popline_ascii85(uint32_t sizedwords)
156 {
157 	const char *line = popline();
158 
159 	/* At this point we exepct the ascii85 data to be indented *some*
160 	 * amount, and to terminate at the end of the line.  So just eat
161 	 * up the leading whitespace.
162 	 */
163 	assert(*line == ' ');
164 	while (*line == ' ')
165 		line++;
166 
167 	uint32_t *buf = calloc(1, 4 * sizedwords);
168 	int idx = 0;
169 
170 	while (*line != '\n') {
171 		if (*line == 'z') {
172 			buf[idx++] = 0;
173 			line++;
174 			continue;
175 		}
176 
177 		uint32_t accum = 0;
178 		for (int i = 0; (i < 5) && (*line != '\n'); i++) {
179 			accum *= 85;
180 			accum += *line - '!';
181 			line++;
182 		}
183 
184 		buf[idx++] = accum;
185 	}
186 
187 	return buf;
188 }
189 
190 static bool
startswith(const char * line,const char * start)191 startswith(const char *line, const char *start)
192 {
193 	return strstr(line, start) == line;
194 }
195 
196 static void
parseline(const char * line,const char * fmt,...)197 parseline(const char *line, const char *fmt, ...)
198 {
199 	int fmtlen = strlen(fmt);
200 	int n = 0;
201 	int l = 0;
202 
203 	/* scan fmt string to extract expected # of conversions: */
204 	for (int i = 0; i < fmtlen; i++) {
205 		if (fmt[i] == '%') {
206 			if (i == (l - 1)) { /* prev char was %, ie. we have %% */
207 				n--;
208 				l = 0;
209 			} else {
210 				n++;
211 				l = i;
212 			}
213 		}
214 	}
215 
216 	va_list ap;
217 	va_start(ap, fmt);
218 	if (vsscanf(line, fmt, ap) != n) {
219 		fprintf(stderr, "parse error scanning: '%s'\n", fmt);
220 		exit(1);
221 	}
222 	va_end(ap);
223 }
224 
225 #define foreach_line_in_section(_line) \
226 	for (const char *_line = popline(); _line; _line = popline()) \
227 		/* check for start of next section */                     \
228 		if (_line[0] != ' ') {                                    \
229 			pushline();                                           \
230 			break;                                                \
231 		} else
232 
233 /*
234  * Decode ringbuffer section:
235  */
236 
237 static struct {
238 	uint64_t iova;
239 	uint32_t rptr;
240 	uint32_t wptr;
241 	uint32_t size;
242 	uint32_t *buf;
243 } ringbuffers[5];
244 
245 static void
decode_ringbuffer(void)246 decode_ringbuffer(void)
247 {
248 	int id = 0;
249 
250 	foreach_line_in_section (line) {
251 		if (startswith(line, "  - id:")) {
252 			parseline(line, "  - id: %d", &id);
253 			assert(id < ARRAY_SIZE(ringbuffers));
254 		} else if (startswith(line, "    iova:")) {
255 			parseline(line, "    iova: %"PRIx64, &ringbuffers[id].iova);
256 		} else if (startswith(line, "    rptr:")) {
257 			parseline(line, "    rptr: %d", &ringbuffers[id].rptr);
258 		} else if (startswith(line, "    wptr:")) {
259 			parseline(line, "    wptr: %d", &ringbuffers[id].wptr);
260 		} else if (startswith(line, "    size:")) {
261 			parseline(line, "    size: %d", &ringbuffers[id].size);
262 		} else if (startswith(line, "    data: !!ascii85 |")) {
263 			ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
264 			add_buffer(ringbuffers[id].iova, ringbuffers[id].size, ringbuffers[id].buf);
265 			continue;
266 		}
267 
268 		printf("%s", line);
269 	}
270 }
271 
272 static bool
valid_header(uint32_t pkt)273 valid_header(uint32_t pkt)
274 {
275 	if (options.gpu_id >= 500) {
276 		return pkt_is_type4(pkt) || pkt_is_type7(pkt);
277 	} else {
278 		/* TODO maybe we can check validish looking pkt3 opc or pkt0
279 		 * register offset.. the cmds sent by kernel are usually
280 		 * fairly limited (other than initialization) which confines
281 		 * the search space a bit..
282 		 */
283 		return true;
284 	}
285 }
286 
287 static void
dump_cmdstream(void)288 dump_cmdstream(void)
289 {
290 	uint64_t rb_base = regval64("CP_RB_BASE");
291 
292 	printf("got rb_base=%"PRIx64"\n", rb_base);
293 
294 	options.ibs[1].base = regval64("CP_IB1_BASE");
295 	options.ibs[1].rem  = regval("CP_IB1_REM_SIZE");
296 	options.ibs[2].base = regval64("CP_IB2_BASE");
297 	options.ibs[2].rem  = regval("CP_IB2_REM_SIZE");
298 
299 	/* Adjust remaining size to account for cmdstream slurped into ROQ
300 	 * but not yet consumed by SQE
301 	 *
302 	 * TODO add support for earlier GPUs once we tease out the needed
303 	 * registers.. see crashit.c in msmtest for hints.
304 	 *
305 	 * TODO it would be nice to be able to extract out register bitfields
306 	 * by name rather than hard-coding this.
307 	 */
308 	if (is_a6xx()) {
309 		options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
310 		options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
311 	}
312 
313 	printf("IB1: %"PRIx64", %u\n", options.ibs[1].base, options.ibs[1].rem);
314 	printf("IB2: %"PRIx64", %u\n", options.ibs[2].base, options.ibs[2].rem);
315 
316 	/* now that we've got the regvals we want, reset register state
317 	 * so we aren't seeing values from decode_registers();
318 	 */
319 	reset_regs();
320 
321 	for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
322 		if (ringbuffers[id].iova != rb_base)
323 			continue;
324 		if (!ringbuffers[id].size)
325 			continue;
326 
327 		printf("found ring!\n");
328 
329 		/* The kernel level ringbuffer (RB) wraps around, which
330 		 * cffdec doesn't really deal with.. so figure out how
331 		 * many dwords are unread
332 		 */
333 		unsigned ringszdw = ringbuffers[id].size >> 2;  /* in dwords */
334 
335 /* helper macro to deal with modulo size math: */
336 #define mod_add(b, v)  ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
337 
338 		/* The rptr will (most likely) have moved past the IB to
339 		 * userspace cmdstream, so back up a bit, and then advance
340 		 * until we find a valid start of a packet.. this is going
341 		 * to be less reliable on a4xx and before (pkt0/pkt3),
342 		 * compared to pkt4/pkt7 with parity bits
343 		 */
344 		const int lookback = 12;
345 		unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
346 
347 		for (int idx = 0; idx < lookback; idx++) {
348 			if (valid_header(ringbuffers[id].buf[rptr]))
349 				break;
350 			rptr = mod_add(rptr, 1);
351 		}
352 
353 		unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
354 
355 		printf("got cmdszdw=%d\n", cmdszdw);
356 		uint32_t *buf = malloc(cmdszdw * 4);
357 
358 		for (int idx = 0; idx < cmdszdw; idx++) {
359 			int p = mod_add(rptr, idx);
360 			buf[idx] = ringbuffers[id].buf[p];
361 		}
362 
363 		dump_commands(buf, cmdszdw, 0);
364 		free(buf);
365 	}
366 }
367 
368 /*
369  * Decode 'bos' (buffers) section:
370  */
371 
372 static void
decode_bos(void)373 decode_bos(void)
374 {
375 	uint32_t size = 0;
376 	uint64_t iova = 0;
377 
378 	foreach_line_in_section (line) {
379 		if (startswith(line, "  - iova:")) {
380 			parseline(line, "  - iova: %"PRIx64, &iova);
381 		} else if (startswith(line, "    size:")) {
382 			parseline(line, "    size: %u", &size);
383 		} else if (startswith(line, "    data: !!ascii85 |")) {
384 			uint32_t *buf = popline_ascii85(size / 4);
385 
386 			if (verbose)
387 				dump_hex_ascii(buf, size, 1);
388 
389 			add_buffer(iova, size, buf);
390 
391 			continue;
392 		}
393 
394 		printf("%s", line);
395 	}
396 }
397 
398 /*
399  * Decode registers section:
400  */
401 
402 static void
dump_register(struct rnn * rnn,uint32_t offset,uint32_t value)403 dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
404 {
405 	struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
406 	if (info && info->typeinfo) {
407 		char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
408 		printf("%s: %s\n", info->name, decoded);
409 	} else if (info) {
410 		printf("%s: %08x\n", info->name, value);
411 	} else {
412 		printf("<%04x>: %08x\n", offset, value);
413 	}
414 }
415 
416 static void
decode_gmu_registers(void)417 decode_gmu_registers(void)
418 {
419 	foreach_line_in_section (line) {
420 		uint32_t offset, value;
421 		parseline(line, "  - { offset: %x, value: %x }", &offset, &value);
422 
423 		printf("\t%08x\t", value);
424 		dump_register(rnn_gmu, offset/4, value);
425 	}
426 }
427 
428 static void
decode_registers(void)429 decode_registers(void)
430 {
431 	foreach_line_in_section (line) {
432 		uint32_t offset, value;
433 		parseline(line, "  - { offset: %x, value: %x }", &offset, &value);
434 
435 		reg_set(offset/4, value);
436 		printf("\t%08x", value);
437 		dump_register_val(offset/4, value, 0);
438 	}
439 }
440 
441 /* similar to registers section, but for banked context regs: */
442 static void
decode_clusters(void)443 decode_clusters(void)
444 {
445 	foreach_line_in_section (line) {
446 		if (startswith(line, "  - cluster-name:") ||
447 				startswith(line, "    - context:")) {
448 			printf("%s", line);
449 			continue;
450 		}
451 
452 		uint32_t offset, value;
453 		parseline(line, "      - { offset: %x, value: %x }", &offset, &value);
454 
455 		printf("\t%08x", value);
456 		dump_register_val(offset/4, value, 0);
457 	}
458 }
459 
460 /*
461  * Decode indexed-registers.. these aren't like normal registers, but a
462  * sort of FIFO where successive reads pop out associated debug state.
463  */
464 
465 static void
dump_cp_sqe_stat(uint32_t * stat)466 dump_cp_sqe_stat(uint32_t *stat)
467 {
468 	printf("\t PC: %04x\n", stat[0]);
469 	stat++;
470 
471 	if (is_a6xx() && valid_header(stat[0])) {
472 		if (pkt_is_type7(stat[0])) {
473 			unsigned opc = cp_type7_opcode(stat[0]);
474 			const char *name = pktname(opc);
475 			if (name)
476 				printf("\tPKT: %s\n", name);
477 		} else {
478 			/* Not sure if this case can happen: */
479 		}
480 	}
481 
482 	for (int i = 0; i < 16; i++) {
483 		printf("\t$%02x: %08x\t\t$%02x: %08x\n",
484 				i + 1, stat[i], i + 16 + 1, stat[i + 16]);
485 	}
486 }
487 
488 static void
dump_control_regs(uint32_t * regs)489 dump_control_regs(uint32_t *regs)
490 {
491 	if (!rnn_control)
492 		return;
493 
494 	/* Control regs 0x100-0x17f are a scratch space to be used by the
495 	 * firmware however it wants, unlike lower regs which involve some
496 	 * fixed-function units. Therefore only these registers get dumped
497 	 * directly.
498 	 */
499 	for (uint32_t i = 0; i < 0x80; i++) {
500 		printf("\t%08x\t", regs[i]);
501 		dump_register(rnn_control, i + 0x100, regs[i]);
502 	}
503 }
504 
505 static void
dump_cp_ucode_dbg(uint32_t * dbg)506 dump_cp_ucode_dbg(uint32_t *dbg)
507 {
508 	/* Notes on the data:
509 	 * There seems to be a section every 4096 DWORD's. The sections aren't
510 	 * all the same size, so the rest of the 4096 DWORD's are filled with
511 	 * mirrors of the actual data.
512 	 */
513 
514 	for (int section = 0; section < 6; section++, dbg += 0x1000) {
515 		switch (section) {
516 		case 0:
517 			/* Contains scattered data from a630_sqe.fw: */
518 			printf("\tSQE instruction cache:\n");
519 			dump_hex_ascii(dbg, 4 * 0x400, 1);
520 			break;
521 		case 1:
522 			printf("\tUnknown 1:\n");
523 			dump_hex_ascii(dbg, 4 * 0x80, 1);
524 			break;
525 		case 2:
526 			printf("\tUnknown 2:\n");
527 			dump_hex_ascii(dbg, 4 * 0x200, 1);
528 			break;
529 		case 3:
530 			printf("\tUnknown 3:\n");
531 			dump_hex_ascii(dbg, 4 * 0x80, 1);
532 			break;
533 		case 4:
534 			/* Don't bother printing this normally */
535 			if (verbose) {
536 				printf("\tSQE packet jumptable contents:\n");
537 				dump_hex_ascii(dbg, 4 * 0x80, 1);
538 			}
539 			break;
540 		case 5:
541 			printf("\tSQE scratch control regs:\n");
542 			dump_control_regs(dbg);
543 			break;
544 		}
545 	}
546 }
547 
548 static void
dump_mem_pool_reg_write(unsigned reg,uint32_t data,unsigned context,bool pipe)549 dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context, bool pipe)
550 {
551 	if (pipe) {
552 		struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
553 		printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
554 
555 		if (!strcmp(info->typeinfo->name, "void")) {
556 			/* registers that ignore their payload */
557 		} else {
558 			printf("\t\t\t");
559 			dump_register(rnn_pipe, reg, data);
560 		}
561 	} else {
562 		printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
563 		dump_register_val(reg, data, 2);
564 	}
565 }
566 
567 static void
dump_mem_pool_chunk(const uint32_t * chunk)568 dump_mem_pool_chunk(const uint32_t *chunk)
569 {
570 	struct __attribute__((packed)) {
571 		bool reg0_enabled : 1;
572 		bool reg1_enabled : 1;
573 		uint32_t data0 : 32;
574 		uint32_t data1 : 32;
575 		uint32_t reg0 : 18;
576 		uint32_t reg1 : 18;
577 		bool reg0_pipe : 1;
578 		bool reg1_pipe : 1;
579 		uint32_t reg0_context : 1;
580 		uint32_t reg1_context : 1;
581 		uint32_t padding : 22;
582 	} fields;
583 
584 	memcpy(&fields, chunk, 4 * sizeof(uint32_t));
585 
586 	if (fields.reg0_enabled) {
587 		dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context, fields.reg0_pipe);
588 	}
589 
590 	if (fields.reg1_enabled) {
591 		dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context, fields.reg1_pipe);
592 	}
593 }
594 
595 static void
dump_cp_mem_pool(uint32_t * mempool)596 dump_cp_mem_pool(uint32_t *mempool)
597 {
598 	/* The mem pool is a shared pool of memory used for storing in-flight
599 	 * register writes. There are 6 different queues, one for each
600 	 * cluster. Writing to $data (or for some special registers, $addr)
601 	 * pushes data onto the appropriate queue, and each queue is pulled
602 	 * from by the appropriate cluster. The queues are thus written to
603 	 * in-order, but may be read out-of-order.
604 	 *
605 	 * The queues are conceptually divided into 128-bit "chunks", and the
606 	 * read and write pointers are in units of chunks.  These chunks are
607 	 * organized internally into 8-chunk "blocks", and memory is allocated
608 	 * dynamically in terms of blocks. Each queue is represented as a
609 	 * singly-linked list of blocks, as well as 3-bit start/end chunk
610 	 * pointers that point within the first/last block.  The next pointers
611 	 * are located in a separate array, rather than inline.
612 	 */
613 
614 	/* TODO: The firmware CP_MEM_POOL save/restore routines do something
615 	 * like:
616 	 *
617 	 * cread $02, [ $00 + 0 ]
618 	 * and $02, $02, 0x118
619 	 * ...
620 	 * brne $02, 0, #label
621 	 * mov $03, 0x2000
622 	 * mov $03, 0x1000
623 	 * label:
624 	 * ...
625 	 *
626 	 * I think that control register 0 is the GPU version, and some
627 	 * versions have a smaller mem pool. It seems some models have a mem
628 	 * pool that's half the size, and a bunch of offsets are shifted
629 	 * accordingly. Unfortunately the kernel driver's dumping code doesn't
630 	 * seem to take this into account, even the downstream android driver,
631 	 * and we don't know which versions 0x8, 0x10, or 0x100 correspond
632 	 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
633 	 */
634 	bool small_mem_pool = false;
635 
636 	/* The array of next pointers for each block. */
637 	const uint32_t *next_pointers = small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
638 
639 	/* Maximum number of blocks in the pool, also the size of the pointers
640 	 * array.
641 	 */
642 	const int num_blocks = small_mem_pool ? 0x30 : 0x80;
643 
644 	/* Number of queues */
645 	const unsigned num_queues = 6;
646 
647 	/* Unfortunately the per-queue state is a little more complicated than
648 	 * a simple pair of begin/end pointers. Instead of a single beginning
649 	 * block, there are *two*, with the property that either the two are
650 	 * equal or the second is the "next" of the first. Similarly there are
651 	 * two end blocks. Thus the queue either looks like this:
652 	 *
653 	 * A -> B -> ... -> C -> D
654 	 *
655 	 * Or like this, or some combination:
656 	 *
657 	 * A/B -> ... -> C/D
658 	 *
659 	 * However, there's only one beginning/end chunk offset. Now the
660 	 * question is, which of A or B is the actual start? I.e. is the chunk
661 	 * offset an offset inside A or B? It depends. I'll show a typical read
662 	 * cycle, starting here (read pointer marked with a *) with a chunk
663 	 * offset of 0:
664 	 *
665 	 *	  A                    B
666 	 *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
667 	 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
668 	 *
669 	 * Once the pointer advances far enough, the hardware decides to free
670 	 * A, after which the read-side state looks like:
671 	 *
672 	 *	(free)                A/B
673 	 *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
674 	 * |_|_|_|_|_|_|_|_|    |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
675 	 *
676 	 * Then after advancing the pointer a bit more, the hardware fetches
677 	 * the "next" pointer for A and stores it in B:
678 	 *
679 	 *	(free)                 A                     B
680 	 *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
681 	 * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
682 	 *
683 	 * Then the read pointer advances into B, at which point we've come
684 	 * back to the first state having advanced a whole block:
685 	 *
686 	 *	(free)                 A                     B
687 	 *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
688 	 * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
689 	 *
690 	 *
691 	 * There is a similar cycle for the write pointer. Now, the question
692 	 * is, how do we know which state we're in? We need to know this to
693 	 * know whether the pointer (*) is in A or B if they're different. It
694 	 * seems like there should be some bit somewhere describing this, but
695 	 * after lots of experimentation I've come up empty-handed. For now we
696 	 * assume that if the pointer is in the first half, then we're in
697 	 * either the first or second state and use B, and otherwise we're in
698 	 * the second or third state and use A. So far I haven't seen anything
699 	 * that violates this assumption.
700 	 */
701 
702 	struct {
703 		uint32_t unk0;
704 		uint32_t padding0[7]; /* Mirrors of unk0 */
705 
706 		struct {
707 			uint32_t chunk : 3;
708 			uint32_t first_block : 32 - 3;
709 		} writer[6];
710 		uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
711 
712 		uint32_t unk1;
713 		uint32_t padding2[7]; /* Mirrors of unk1 */
714 
715 		uint32_t writer_second_block[6];
716 		uint32_t padding3[2];
717 
718 		uint32_t unk2[6];
719 		uint32_t padding4[2];
720 
721 		struct {
722 			uint32_t chunk : 3;
723 			uint32_t first_block : 32 - 3;
724 		} reader[6];
725 		uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
726 
727 		uint32_t unk3;
728 		uint32_t padding6[7]; /* Mirrors of unk3 */
729 
730 		uint32_t reader_second_block[6];
731 		uint32_t padding7[2];
732 
733 		uint32_t block_count[6];
734 		uint32_t padding[2];
735 
736 		uint32_t unk4;
737 		uint32_t padding9[7]; /* Mirrors of unk4 */
738 	} data1;
739 
740 	const uint32_t *data1_ptr = small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
741 	memcpy(&data1, data1_ptr, sizeof(data1));
742 
743 	/* Based on the kernel, the first dword is the mem pool size (in
744 	 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
745 	 */
746 	const uint32_t *data2_ptr = small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
747 	const int data2_size = 0x60;
748 
749 	/* This seems to be the size of each queue in chunks. */
750 	const uint32_t *queue_sizes = &data2_ptr[0x18];
751 
752 	printf("\tdata2:\n");
753 	dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
754 
755 	/* These seem to be some kind of counter of allocated/deallocated blocks */
756 	if (verbose) {
757 		printf("\tunk0: %x\n", data1.unk0);
758 		printf("\tunk1: %x\n", data1.unk1);
759 		printf("\tunk3: %x\n", data1.unk3);
760 		printf("\tunk4: %x\n\n", data1.unk4);
761 	}
762 
763 	for (int queue = 0; queue < num_queues; queue++) {
764 		const char *cluster_names[6] = {
765 			"FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS"
766 		};
767 		printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
768 
769 		if (verbose) {
770 			printf("\t\twriter_first_block: 0x%x\n", data1.writer[queue].first_block);
771 			printf("\t\twriter_second_block: 0x%x\n", data1.writer_second_block[queue]);
772 			printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
773 			printf("\t\treader_first_block: 0x%x\n", data1.reader[queue].first_block);
774 			printf("\t\treader_second_block: 0x%x\n", data1.reader_second_block[queue]);
775 			printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
776 			printf("\t\tblock_count: %d\n", data1.block_count[queue]);
777 			printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
778 			printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
779 		}
780 
781 		uint32_t cur_chunk = data1.reader[queue].chunk;
782 		uint32_t cur_block = cur_chunk > 3 ?
783 			data1.reader[queue].first_block :
784 			data1.reader_second_block[queue];
785 		uint32_t last_chunk = data1.writer[queue].chunk;
786 		uint32_t last_block = last_chunk > 3 ?
787 			data1.writer[queue].first_block :
788 			data1.writer_second_block[queue];
789 
790 		if (verbose)
791 			printf("\tblock %x\n", cur_block);
792 		if (cur_block >= num_blocks) {
793 			fprintf(stderr, "block %x too large\n", cur_block);
794 			exit(1);
795 		}
796 		unsigned calculated_queue_size = 0;
797 		while (cur_block != last_block || cur_chunk != last_chunk) {
798 			calculated_queue_size++;
799 			uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
800 
801 			dump_mem_pool_chunk(chunk_ptr);
802 
803 			printf("\t%05x: %08x %08x %08x %08x\n",
804 			       4 * (cur_block * 0x20 + cur_chunk + 4),
805 			       chunk_ptr[0], chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
806 
807 			cur_chunk++;
808 			if (cur_chunk == 8) {
809 				cur_block = next_pointers[cur_block];
810 				if (verbose)
811 					printf("\tblock %x\n", cur_block);
812 				if (cur_block >= num_blocks) {
813 					fprintf(stderr, "block %x too large\n", cur_block);
814 					exit(1);
815 				}
816 				cur_chunk = 0;
817 			}
818 		}
819 		if (calculated_queue_size != queue_sizes[queue]) {
820 			printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size);
821 		}
822 		printf("\n");
823 	}
824 }
825 
826 static void
decode_indexed_registers(void)827 decode_indexed_registers(void)
828 {
829 	char *name = NULL;
830 	uint32_t sizedwords = 0;
831 
832 	foreach_line_in_section (line) {
833 		if (startswith(line, "  - regs-name:")) {
834 			free(name);
835 			parseline(line, "  - regs-name: %ms", &name);
836 		} else if (startswith(line, "    dwords:")) {
837 			parseline(line, "    dwords: %u", &sizedwords);
838 		} else if (startswith(line, "    data: !!ascii85 |")) {
839 			uint32_t *buf = popline_ascii85(sizedwords);
840 
841 			/* some of the sections are pretty large, and are (at least
842 			 * so far) not useful, so skip them if not in verbose mode:
843 			 */
844 			bool dump = verbose ||
845 				!strcmp(name, "CP_SQE_STAT") ||
846 				!strcmp(name, "CP_DRAW_STATE") ||
847 				!strcmp(name, "CP_ROQ") ||
848 				0;
849 
850 			if (!strcmp(name, "CP_SQE_STAT"))
851 				dump_cp_sqe_stat(buf);
852 
853 			if (!strcmp(name, "CP_UCODE_DBG_DATA"))
854 				dump_cp_ucode_dbg(buf);
855 
856 			if (!strcmp(name, "CP_MEMPOOL"))
857 				dump_cp_mem_pool(buf);
858 
859 			if (dump)
860 				dump_hex_ascii(buf, 4 * sizedwords, 1);
861 
862 			free(buf);
863 
864 			continue;
865 		}
866 
867 		printf("%s", line);
868 	}
869 }
870 
871 /*
872  * Decode shader-blocks:
873  */
874 
875 static void
decode_shader_blocks(void)876 decode_shader_blocks(void)
877 {
878 	char *type = NULL;
879 	uint32_t sizedwords = 0;
880 
881 	foreach_line_in_section (line) {
882 		if (startswith(line, "  - type:")) {
883 			free(type);
884 			parseline(line, "  - type: %ms", &type);
885 		} else if (startswith(line, "      size:")) {
886 			parseline(line, "      size: %u", &sizedwords);
887 		} else if (startswith(line, "    data: !!ascii85 |")) {
888 			uint32_t *buf = popline_ascii85(sizedwords);
889 
890 			/* some of the sections are pretty large, and are (at least
891 			 * so far) not useful, so skip them if not in verbose mode:
892 			 */
893 			bool dump = verbose ||
894 				!strcmp(type, "A6XX_SP_INST_DATA") ||
895 				!strcmp(type, "A6XX_HLSQ_INST_RAM") ||
896 				0;
897 
898 			if (!strcmp(type, "A6XX_SP_INST_DATA") ||
899 					!strcmp(type, "A6XX_HLSQ_INST_RAM")) {
900 				/* TODO this section actually contains multiple shaders
901 				 * (or parts of shaders?), so perhaps we should search
902 				 * for ends of shaders and decode each?
903 				 */
904 				try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id);
905 			}
906 
907 			if (dump)
908 				dump_hex_ascii(buf, 4 * sizedwords, 1);
909 
910 			free(buf);
911 
912 			continue;
913 		}
914 
915 		printf("%s", line);
916 	}
917 
918 	free(type);
919 }
920 
921 /*
922  * Decode debugbus section:
923  */
924 
925 static void
decode_debugbus(void)926 decode_debugbus(void)
927 {
928 	char *block = NULL;
929 	uint32_t sizedwords = 0;
930 
931 	foreach_line_in_section (line) {
932 		if (startswith(line, "  - debugbus-block:")) {
933 			free(block);
934 			parseline(line, "  - debugbus-block: %ms", &block);
935 		} else if (startswith(line, "    count:")) {
936 			parseline(line, "    count: %u", &sizedwords);
937 		} else if (startswith(line, "    data: !!ascii85 |")) {
938 			uint32_t *buf = popline_ascii85(sizedwords);
939 
940 			/* some of the sections are pretty large, and are (at least
941 			 * so far) not useful, so skip them if not in verbose mode:
942 			 */
943 			bool dump = verbose ||
944 				0;
945 
946 			if (dump)
947 				dump_hex_ascii(buf, 4 * sizedwords, 1);
948 
949 			free(buf);
950 
951 			continue;
952 		}
953 
954 		printf("%s", line);
955 	}
956 }
957 
958 /*
959  * Main crashdump decode loop:
960  */
961 
962 static void
decode(void)963 decode(void)
964 {
965 	const char *line;
966 
967 	while ((line = popline())) {
968 		printf("%s", line);
969 		if (startswith(line, "revision:")) {
970 			parseline(line, "revision: %u", &options.gpu_id);
971 			printf("Got gpu_id=%u\n", options.gpu_id);
972 
973 			cffdec_init(&options);
974 
975 			if (is_a6xx()) {
976 				rnn_gmu = rnn_new(!options.color);
977 				rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
978 				rnn_control = rnn_new(!options.color);
979 				rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG");
980 				rnn_pipe = rnn_new(!options.color);
981 				rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG");
982 			} else if (is_a5xx()) {
983 				rnn_control = rnn_new(!options.color);
984 				rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG");
985 			} else {
986 				rnn_control = NULL;
987 			}
988 		} else if (startswith(line, "bos:")) {
989 			decode_bos();
990 		} else if (startswith(line, "ringbuffer:")) {
991 			decode_ringbuffer();
992 		} else if (startswith(line, "registers:")) {
993 			decode_registers();
994 
995 			/* after we've recorded buffer contents, and CP register values,
996 			 * we can take a stab at decoding the cmdstream:
997 			 */
998 			dump_cmdstream();
999 		} else if (startswith(line, "registers-gmu:")) {
1000 			decode_gmu_registers();
1001 		} else if (startswith(line, "indexed-registers:")) {
1002 			decode_indexed_registers();
1003 		} else if (startswith(line, "shader-blocks:")) {
1004 			decode_shader_blocks();
1005 		} else if (startswith(line, "clusters:")) {
1006 			decode_clusters();
1007 		} else if (startswith(line, "debugbus:")) {
1008 			decode_debugbus();
1009 		}
1010 	}
1011 }
1012 
1013 /*
1014  * Usage and argument parsing:
1015  */
1016 
1017 static void
usage(void)1018 usage(void)
1019 {
1020 	fprintf(stderr, "Usage:\n\n"
1021 			"\tcrashdec [-achmsv] [-f FILE]\n\n"
1022 			"Options:\n"
1023 			"\t-a, --allregs   - show all registers (including ones not written since\n"
1024 			"\t                  previous draw) at each draw\n"
1025 			"\t-c, --color     - use colors\n"
1026 			"\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1027 			"\t-h, --help      - this usage message\n"
1028 			"\t-m, --markers   - try to decode CP_NOP string markers\n"
1029 			"\t-s, --summary   - don't show individual register writes, but just show\n"
1030 			"\t                  register values on draws\n"
1031 			"\t-v, --verbose   - dump more verbose output, including contents of\n"
1032 			"\t                  less interesting buffers\n"
1033 			"\n"
1034 		);
1035 	exit(2);
1036 }
1037 
1038 static const struct option opts[] = {
1039 	{ .name = "allregs", .has_arg = 0, NULL, 'a' },
1040 	{ .name = "color",   .has_arg = 0, NULL, 'c' },
1041 	{ .name = "file",    .has_arg = 1, NULL, 'f' },
1042 	{ .name = "help",    .has_arg = 0, NULL, 'h' },
1043 	{ .name = "markers", .has_arg = 0, NULL, 'm' },
1044 	{ .name = "summary", .has_arg = 0, NULL, 's' },
1045 	{ .name = "verbose", .has_arg = 0, NULL, 'v' },
1046 	{}
1047 };
1048 
1049 static bool interactive;
1050 
1051 static void
cleanup(void)1052 cleanup(void)
1053 {
1054 	fflush(stdout);
1055 
1056 	if (interactive) {
1057 		pager_close();
1058 	}
1059 }
1060 
1061 int
main(int argc,char ** argv)1062 main(int argc, char **argv)
1063 {
1064 	int c;
1065 
1066 	interactive = isatty(STDOUT_FILENO);
1067 	options.color = interactive;
1068 
1069 	/* default to read from stdin: */
1070 	in = stdin;
1071 
1072 	while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
1073 		switch (c) {
1074 		case 'a':
1075 			options.allregs = true;
1076 			break;
1077 		case 'c':
1078 			options.color = true;
1079 			break;
1080 		case 'f':
1081 			in = fopen(optarg, "r");
1082 			break;
1083 		case 'm':
1084 			options.decode_markers = true;
1085 			break;
1086 		case 's':
1087 			options.summary = true;
1088 			break;
1089 		case 'v':
1090 			verbose = true;
1091 			break;
1092 		case 'h':
1093 		default:
1094 			usage();
1095 		}
1096 	}
1097 
1098 	disasm_a3xx_set_debug(PRINT_RAW);
1099 
1100 	if (interactive) {
1101 		pager_open();
1102 	}
1103 
1104 	atexit(cleanup);
1105 
1106 	decode();
1107 	cleanup();
1108 }
1109