1 /*
2 * Copyright © 2020 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /*
25 * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
26 * the coredump should be found in:
27 *
28 * /sys/class/devcoredump/devcd<n>/data
29 *
30 * The crashdump will hang around for 5min, it can be cleared by writing to
31 * the file, ie:
32 *
33 * echo 1 > /sys/class/devcoredump/devcd<n>/data
34 *
35 * (the driver won't log any new crashdumps until the previous one is cleared
36 * or times out after 5min)
37 */
38
39
40 #include <assert.h>
41 #include <getopt.h>
42 #include <inttypes.h>
43 #include <stdarg.h>
44 #include <stdbool.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50
51 #include "buffers.h"
52 #include "cffdec.h"
53 #include "disasm.h"
54 #include "pager.h"
55 #include "rnnutil.h"
56 #include "util.h"
57 #include "ir3/instr-a3xx.h"
58
59
60 static FILE *in;
61 static bool verbose;
62
63 static struct rnn *rnn_gmu;
64 static struct rnn *rnn_control;
65 static struct rnn *rnn_pipe;
66
67 static struct cffdec_options options = {
68 .draw_filter = -1,
69 };
70
is_a6xx(void)71 static inline bool is_a6xx(void) { return (600 <= options.gpu_id) && (options.gpu_id < 700); }
is_a5xx(void)72 static inline bool is_a5xx(void) { return (500 <= options.gpu_id) && (options.gpu_id < 600); }
is_64b(void)73 static inline bool is_64b(void) { return options.gpu_id >= 500; }
74
75 /*
76 * Helpers to read register values:
77 */
78
79 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
80 static uint64_t
regval64(const char * name)81 regval64(const char *name)
82 {
83 unsigned reg = regbase(name);
84 assert(reg);
85 uint64_t val = reg_val(reg);
86 if (is_64b())
87 val |= ((uint64_t)reg_val(reg + 1)) << 32;
88 return val;
89 }
90
91 static uint32_t
regval(const char * name)92 regval(const char *name)
93 {
94 unsigned reg = regbase(name);
95 assert(reg);
96 return reg_val(reg);
97 }
98
99 /*
100 * Line reading and string helpers:
101 */
102
103 static char *
replacestr(char * line,const char * find,const char * replace)104 replacestr(char *line, const char *find, const char *replace)
105 {
106 char *tail, *s;
107
108 if (!(s = strstr(line, find)))
109 return line;
110
111 tail = s + strlen(find);
112
113 char *newline;
114 asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
115 free(line);
116
117 return newline;
118 }
119
120 static char *lastline;
121 static char *pushedline;
122
123 static const char *
popline(void)124 popline(void)
125 {
126 char *r = pushedline;
127
128 if (r) {
129 pushedline = NULL;
130 return r;
131 }
132
133 free(lastline);
134
135 size_t n = 0;
136 if (getline(&r, &n, in) < 0)
137 exit(0);
138
139 /* Handle section name typo's from earlier kernels: */
140 r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
141 r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
142
143 lastline = r;
144 return r;
145 }
146
147 static void
pushline(void)148 pushline(void)
149 {
150 assert(!pushedline);
151 pushedline = lastline;
152 }
153
154 static uint32_t *
popline_ascii85(uint32_t sizedwords)155 popline_ascii85(uint32_t sizedwords)
156 {
157 const char *line = popline();
158
159 /* At this point we exepct the ascii85 data to be indented *some*
160 * amount, and to terminate at the end of the line. So just eat
161 * up the leading whitespace.
162 */
163 assert(*line == ' ');
164 while (*line == ' ')
165 line++;
166
167 uint32_t *buf = calloc(1, 4 * sizedwords);
168 int idx = 0;
169
170 while (*line != '\n') {
171 if (*line == 'z') {
172 buf[idx++] = 0;
173 line++;
174 continue;
175 }
176
177 uint32_t accum = 0;
178 for (int i = 0; (i < 5) && (*line != '\n'); i++) {
179 accum *= 85;
180 accum += *line - '!';
181 line++;
182 }
183
184 buf[idx++] = accum;
185 }
186
187 return buf;
188 }
189
190 static bool
startswith(const char * line,const char * start)191 startswith(const char *line, const char *start)
192 {
193 return strstr(line, start) == line;
194 }
195
196 static void
parseline(const char * line,const char * fmt,...)197 parseline(const char *line, const char *fmt, ...)
198 {
199 int fmtlen = strlen(fmt);
200 int n = 0;
201 int l = 0;
202
203 /* scan fmt string to extract expected # of conversions: */
204 for (int i = 0; i < fmtlen; i++) {
205 if (fmt[i] == '%') {
206 if (i == (l - 1)) { /* prev char was %, ie. we have %% */
207 n--;
208 l = 0;
209 } else {
210 n++;
211 l = i;
212 }
213 }
214 }
215
216 va_list ap;
217 va_start(ap, fmt);
218 if (vsscanf(line, fmt, ap) != n) {
219 fprintf(stderr, "parse error scanning: '%s'\n", fmt);
220 exit(1);
221 }
222 va_end(ap);
223 }
224
225 #define foreach_line_in_section(_line) \
226 for (const char *_line = popline(); _line; _line = popline()) \
227 /* check for start of next section */ \
228 if (_line[0] != ' ') { \
229 pushline(); \
230 break; \
231 } else
232
233 /*
234 * Decode ringbuffer section:
235 */
236
237 static struct {
238 uint64_t iova;
239 uint32_t rptr;
240 uint32_t wptr;
241 uint32_t size;
242 uint32_t *buf;
243 } ringbuffers[5];
244
245 static void
decode_ringbuffer(void)246 decode_ringbuffer(void)
247 {
248 int id = 0;
249
250 foreach_line_in_section (line) {
251 if (startswith(line, " - id:")) {
252 parseline(line, " - id: %d", &id);
253 assert(id < ARRAY_SIZE(ringbuffers));
254 } else if (startswith(line, " iova:")) {
255 parseline(line, " iova: %"PRIx64, &ringbuffers[id].iova);
256 } else if (startswith(line, " rptr:")) {
257 parseline(line, " rptr: %d", &ringbuffers[id].rptr);
258 } else if (startswith(line, " wptr:")) {
259 parseline(line, " wptr: %d", &ringbuffers[id].wptr);
260 } else if (startswith(line, " size:")) {
261 parseline(line, " size: %d", &ringbuffers[id].size);
262 } else if (startswith(line, " data: !!ascii85 |")) {
263 ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
264 add_buffer(ringbuffers[id].iova, ringbuffers[id].size, ringbuffers[id].buf);
265 continue;
266 }
267
268 printf("%s", line);
269 }
270 }
271
272 static bool
valid_header(uint32_t pkt)273 valid_header(uint32_t pkt)
274 {
275 if (options.gpu_id >= 500) {
276 return pkt_is_type4(pkt) || pkt_is_type7(pkt);
277 } else {
278 /* TODO maybe we can check validish looking pkt3 opc or pkt0
279 * register offset.. the cmds sent by kernel are usually
280 * fairly limited (other than initialization) which confines
281 * the search space a bit..
282 */
283 return true;
284 }
285 }
286
287 static void
dump_cmdstream(void)288 dump_cmdstream(void)
289 {
290 uint64_t rb_base = regval64("CP_RB_BASE");
291
292 printf("got rb_base=%"PRIx64"\n", rb_base);
293
294 options.ibs[1].base = regval64("CP_IB1_BASE");
295 options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
296 options.ibs[2].base = regval64("CP_IB2_BASE");
297 options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
298
299 /* Adjust remaining size to account for cmdstream slurped into ROQ
300 * but not yet consumed by SQE
301 *
302 * TODO add support for earlier GPUs once we tease out the needed
303 * registers.. see crashit.c in msmtest for hints.
304 *
305 * TODO it would be nice to be able to extract out register bitfields
306 * by name rather than hard-coding this.
307 */
308 if (is_a6xx()) {
309 options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
310 options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
311 }
312
313 printf("IB1: %"PRIx64", %u\n", options.ibs[1].base, options.ibs[1].rem);
314 printf("IB2: %"PRIx64", %u\n", options.ibs[2].base, options.ibs[2].rem);
315
316 /* now that we've got the regvals we want, reset register state
317 * so we aren't seeing values from decode_registers();
318 */
319 reset_regs();
320
321 for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
322 if (ringbuffers[id].iova != rb_base)
323 continue;
324 if (!ringbuffers[id].size)
325 continue;
326
327 printf("found ring!\n");
328
329 /* The kernel level ringbuffer (RB) wraps around, which
330 * cffdec doesn't really deal with.. so figure out how
331 * many dwords are unread
332 */
333 unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
334
335 /* helper macro to deal with modulo size math: */
336 #define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
337
338 /* The rptr will (most likely) have moved past the IB to
339 * userspace cmdstream, so back up a bit, and then advance
340 * until we find a valid start of a packet.. this is going
341 * to be less reliable on a4xx and before (pkt0/pkt3),
342 * compared to pkt4/pkt7 with parity bits
343 */
344 const int lookback = 12;
345 unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
346
347 for (int idx = 0; idx < lookback; idx++) {
348 if (valid_header(ringbuffers[id].buf[rptr]))
349 break;
350 rptr = mod_add(rptr, 1);
351 }
352
353 unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
354
355 printf("got cmdszdw=%d\n", cmdszdw);
356 uint32_t *buf = malloc(cmdszdw * 4);
357
358 for (int idx = 0; idx < cmdszdw; idx++) {
359 int p = mod_add(rptr, idx);
360 buf[idx] = ringbuffers[id].buf[p];
361 }
362
363 dump_commands(buf, cmdszdw, 0);
364 free(buf);
365 }
366 }
367
368 /*
369 * Decode 'bos' (buffers) section:
370 */
371
372 static void
decode_bos(void)373 decode_bos(void)
374 {
375 uint32_t size = 0;
376 uint64_t iova = 0;
377
378 foreach_line_in_section (line) {
379 if (startswith(line, " - iova:")) {
380 parseline(line, " - iova: %"PRIx64, &iova);
381 } else if (startswith(line, " size:")) {
382 parseline(line, " size: %u", &size);
383 } else if (startswith(line, " data: !!ascii85 |")) {
384 uint32_t *buf = popline_ascii85(size / 4);
385
386 if (verbose)
387 dump_hex_ascii(buf, size, 1);
388
389 add_buffer(iova, size, buf);
390
391 continue;
392 }
393
394 printf("%s", line);
395 }
396 }
397
398 /*
399 * Decode registers section:
400 */
401
402 static void
dump_register(struct rnn * rnn,uint32_t offset,uint32_t value)403 dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
404 {
405 struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
406 if (info && info->typeinfo) {
407 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
408 printf("%s: %s\n", info->name, decoded);
409 } else if (info) {
410 printf("%s: %08x\n", info->name, value);
411 } else {
412 printf("<%04x>: %08x\n", offset, value);
413 }
414 }
415
416 static void
decode_gmu_registers(void)417 decode_gmu_registers(void)
418 {
419 foreach_line_in_section (line) {
420 uint32_t offset, value;
421 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
422
423 printf("\t%08x\t", value);
424 dump_register(rnn_gmu, offset/4, value);
425 }
426 }
427
428 static void
decode_registers(void)429 decode_registers(void)
430 {
431 foreach_line_in_section (line) {
432 uint32_t offset, value;
433 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
434
435 reg_set(offset/4, value);
436 printf("\t%08x", value);
437 dump_register_val(offset/4, value, 0);
438 }
439 }
440
441 /* similar to registers section, but for banked context regs: */
442 static void
decode_clusters(void)443 decode_clusters(void)
444 {
445 foreach_line_in_section (line) {
446 if (startswith(line, " - cluster-name:") ||
447 startswith(line, " - context:")) {
448 printf("%s", line);
449 continue;
450 }
451
452 uint32_t offset, value;
453 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
454
455 printf("\t%08x", value);
456 dump_register_val(offset/4, value, 0);
457 }
458 }
459
460 /*
461 * Decode indexed-registers.. these aren't like normal registers, but a
462 * sort of FIFO where successive reads pop out associated debug state.
463 */
464
465 static void
dump_cp_sqe_stat(uint32_t * stat)466 dump_cp_sqe_stat(uint32_t *stat)
467 {
468 printf("\t PC: %04x\n", stat[0]);
469 stat++;
470
471 if (is_a6xx() && valid_header(stat[0])) {
472 if (pkt_is_type7(stat[0])) {
473 unsigned opc = cp_type7_opcode(stat[0]);
474 const char *name = pktname(opc);
475 if (name)
476 printf("\tPKT: %s\n", name);
477 } else {
478 /* Not sure if this case can happen: */
479 }
480 }
481
482 for (int i = 0; i < 16; i++) {
483 printf("\t$%02x: %08x\t\t$%02x: %08x\n",
484 i + 1, stat[i], i + 16 + 1, stat[i + 16]);
485 }
486 }
487
488 static void
dump_control_regs(uint32_t * regs)489 dump_control_regs(uint32_t *regs)
490 {
491 if (!rnn_control)
492 return;
493
494 /* Control regs 0x100-0x17f are a scratch space to be used by the
495 * firmware however it wants, unlike lower regs which involve some
496 * fixed-function units. Therefore only these registers get dumped
497 * directly.
498 */
499 for (uint32_t i = 0; i < 0x80; i++) {
500 printf("\t%08x\t", regs[i]);
501 dump_register(rnn_control, i + 0x100, regs[i]);
502 }
503 }
504
505 static void
dump_cp_ucode_dbg(uint32_t * dbg)506 dump_cp_ucode_dbg(uint32_t *dbg)
507 {
508 /* Notes on the data:
509 * There seems to be a section every 4096 DWORD's. The sections aren't
510 * all the same size, so the rest of the 4096 DWORD's are filled with
511 * mirrors of the actual data.
512 */
513
514 for (int section = 0; section < 6; section++, dbg += 0x1000) {
515 switch (section) {
516 case 0:
517 /* Contains scattered data from a630_sqe.fw: */
518 printf("\tSQE instruction cache:\n");
519 dump_hex_ascii(dbg, 4 * 0x400, 1);
520 break;
521 case 1:
522 printf("\tUnknown 1:\n");
523 dump_hex_ascii(dbg, 4 * 0x80, 1);
524 break;
525 case 2:
526 printf("\tUnknown 2:\n");
527 dump_hex_ascii(dbg, 4 * 0x200, 1);
528 break;
529 case 3:
530 printf("\tUnknown 3:\n");
531 dump_hex_ascii(dbg, 4 * 0x80, 1);
532 break;
533 case 4:
534 /* Don't bother printing this normally */
535 if (verbose) {
536 printf("\tSQE packet jumptable contents:\n");
537 dump_hex_ascii(dbg, 4 * 0x80, 1);
538 }
539 break;
540 case 5:
541 printf("\tSQE scratch control regs:\n");
542 dump_control_regs(dbg);
543 break;
544 }
545 }
546 }
547
548 static void
dump_mem_pool_reg_write(unsigned reg,uint32_t data,unsigned context,bool pipe)549 dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context, bool pipe)
550 {
551 if (pipe) {
552 struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
553 printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
554
555 if (!strcmp(info->typeinfo->name, "void")) {
556 /* registers that ignore their payload */
557 } else {
558 printf("\t\t\t");
559 dump_register(rnn_pipe, reg, data);
560 }
561 } else {
562 printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
563 dump_register_val(reg, data, 2);
564 }
565 }
566
567 static void
dump_mem_pool_chunk(const uint32_t * chunk)568 dump_mem_pool_chunk(const uint32_t *chunk)
569 {
570 struct __attribute__((packed)) {
571 bool reg0_enabled : 1;
572 bool reg1_enabled : 1;
573 uint32_t data0 : 32;
574 uint32_t data1 : 32;
575 uint32_t reg0 : 18;
576 uint32_t reg1 : 18;
577 bool reg0_pipe : 1;
578 bool reg1_pipe : 1;
579 uint32_t reg0_context : 1;
580 uint32_t reg1_context : 1;
581 uint32_t padding : 22;
582 } fields;
583
584 memcpy(&fields, chunk, 4 * sizeof(uint32_t));
585
586 if (fields.reg0_enabled) {
587 dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context, fields.reg0_pipe);
588 }
589
590 if (fields.reg1_enabled) {
591 dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context, fields.reg1_pipe);
592 }
593 }
594
595 static void
dump_cp_mem_pool(uint32_t * mempool)596 dump_cp_mem_pool(uint32_t *mempool)
597 {
598 /* The mem pool is a shared pool of memory used for storing in-flight
599 * register writes. There are 6 different queues, one for each
600 * cluster. Writing to $data (or for some special registers, $addr)
601 * pushes data onto the appropriate queue, and each queue is pulled
602 * from by the appropriate cluster. The queues are thus written to
603 * in-order, but may be read out-of-order.
604 *
605 * The queues are conceptually divided into 128-bit "chunks", and the
606 * read and write pointers are in units of chunks. These chunks are
607 * organized internally into 8-chunk "blocks", and memory is allocated
608 * dynamically in terms of blocks. Each queue is represented as a
609 * singly-linked list of blocks, as well as 3-bit start/end chunk
610 * pointers that point within the first/last block. The next pointers
611 * are located in a separate array, rather than inline.
612 */
613
614 /* TODO: The firmware CP_MEM_POOL save/restore routines do something
615 * like:
616 *
617 * cread $02, [ $00 + 0 ]
618 * and $02, $02, 0x118
619 * ...
620 * brne $02, 0, #label
621 * mov $03, 0x2000
622 * mov $03, 0x1000
623 * label:
624 * ...
625 *
626 * I think that control register 0 is the GPU version, and some
627 * versions have a smaller mem pool. It seems some models have a mem
628 * pool that's half the size, and a bunch of offsets are shifted
629 * accordingly. Unfortunately the kernel driver's dumping code doesn't
630 * seem to take this into account, even the downstream android driver,
631 * and we don't know which versions 0x8, 0x10, or 0x100 correspond
632 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
633 */
634 bool small_mem_pool = false;
635
636 /* The array of next pointers for each block. */
637 const uint32_t *next_pointers = small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
638
639 /* Maximum number of blocks in the pool, also the size of the pointers
640 * array.
641 */
642 const int num_blocks = small_mem_pool ? 0x30 : 0x80;
643
644 /* Number of queues */
645 const unsigned num_queues = 6;
646
647 /* Unfortunately the per-queue state is a little more complicated than
648 * a simple pair of begin/end pointers. Instead of a single beginning
649 * block, there are *two*, with the property that either the two are
650 * equal or the second is the "next" of the first. Similarly there are
651 * two end blocks. Thus the queue either looks like this:
652 *
653 * A -> B -> ... -> C -> D
654 *
655 * Or like this, or some combination:
656 *
657 * A/B -> ... -> C/D
658 *
659 * However, there's only one beginning/end chunk offset. Now the
660 * question is, which of A or B is the actual start? I.e. is the chunk
661 * offset an offset inside A or B? It depends. I'll show a typical read
662 * cycle, starting here (read pointer marked with a *) with a chunk
663 * offset of 0:
664 *
665 * A B
666 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
667 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
668 *
669 * Once the pointer advances far enough, the hardware decides to free
670 * A, after which the read-side state looks like:
671 *
672 * (free) A/B
673 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
674 * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
675 *
676 * Then after advancing the pointer a bit more, the hardware fetches
677 * the "next" pointer for A and stores it in B:
678 *
679 * (free) A B
680 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
681 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
682 *
683 * Then the read pointer advances into B, at which point we've come
684 * back to the first state having advanced a whole block:
685 *
686 * (free) A B
687 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
688 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
689 *
690 *
691 * There is a similar cycle for the write pointer. Now, the question
692 * is, how do we know which state we're in? We need to know this to
693 * know whether the pointer (*) is in A or B if they're different. It
694 * seems like there should be some bit somewhere describing this, but
695 * after lots of experimentation I've come up empty-handed. For now we
696 * assume that if the pointer is in the first half, then we're in
697 * either the first or second state and use B, and otherwise we're in
698 * the second or third state and use A. So far I haven't seen anything
699 * that violates this assumption.
700 */
701
702 struct {
703 uint32_t unk0;
704 uint32_t padding0[7]; /* Mirrors of unk0 */
705
706 struct {
707 uint32_t chunk : 3;
708 uint32_t first_block : 32 - 3;
709 } writer[6];
710 uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
711
712 uint32_t unk1;
713 uint32_t padding2[7]; /* Mirrors of unk1 */
714
715 uint32_t writer_second_block[6];
716 uint32_t padding3[2];
717
718 uint32_t unk2[6];
719 uint32_t padding4[2];
720
721 struct {
722 uint32_t chunk : 3;
723 uint32_t first_block : 32 - 3;
724 } reader[6];
725 uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
726
727 uint32_t unk3;
728 uint32_t padding6[7]; /* Mirrors of unk3 */
729
730 uint32_t reader_second_block[6];
731 uint32_t padding7[2];
732
733 uint32_t block_count[6];
734 uint32_t padding[2];
735
736 uint32_t unk4;
737 uint32_t padding9[7]; /* Mirrors of unk4 */
738 } data1;
739
740 const uint32_t *data1_ptr = small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
741 memcpy(&data1, data1_ptr, sizeof(data1));
742
743 /* Based on the kernel, the first dword is the mem pool size (in
744 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
745 */
746 const uint32_t *data2_ptr = small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
747 const int data2_size = 0x60;
748
749 /* This seems to be the size of each queue in chunks. */
750 const uint32_t *queue_sizes = &data2_ptr[0x18];
751
752 printf("\tdata2:\n");
753 dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
754
755 /* These seem to be some kind of counter of allocated/deallocated blocks */
756 if (verbose) {
757 printf("\tunk0: %x\n", data1.unk0);
758 printf("\tunk1: %x\n", data1.unk1);
759 printf("\tunk3: %x\n", data1.unk3);
760 printf("\tunk4: %x\n\n", data1.unk4);
761 }
762
763 for (int queue = 0; queue < num_queues; queue++) {
764 const char *cluster_names[6] = {
765 "FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS"
766 };
767 printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
768
769 if (verbose) {
770 printf("\t\twriter_first_block: 0x%x\n", data1.writer[queue].first_block);
771 printf("\t\twriter_second_block: 0x%x\n", data1.writer_second_block[queue]);
772 printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
773 printf("\t\treader_first_block: 0x%x\n", data1.reader[queue].first_block);
774 printf("\t\treader_second_block: 0x%x\n", data1.reader_second_block[queue]);
775 printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
776 printf("\t\tblock_count: %d\n", data1.block_count[queue]);
777 printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
778 printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
779 }
780
781 uint32_t cur_chunk = data1.reader[queue].chunk;
782 uint32_t cur_block = cur_chunk > 3 ?
783 data1.reader[queue].first_block :
784 data1.reader_second_block[queue];
785 uint32_t last_chunk = data1.writer[queue].chunk;
786 uint32_t last_block = last_chunk > 3 ?
787 data1.writer[queue].first_block :
788 data1.writer_second_block[queue];
789
790 if (verbose)
791 printf("\tblock %x\n", cur_block);
792 if (cur_block >= num_blocks) {
793 fprintf(stderr, "block %x too large\n", cur_block);
794 exit(1);
795 }
796 unsigned calculated_queue_size = 0;
797 while (cur_block != last_block || cur_chunk != last_chunk) {
798 calculated_queue_size++;
799 uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
800
801 dump_mem_pool_chunk(chunk_ptr);
802
803 printf("\t%05x: %08x %08x %08x %08x\n",
804 4 * (cur_block * 0x20 + cur_chunk + 4),
805 chunk_ptr[0], chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
806
807 cur_chunk++;
808 if (cur_chunk == 8) {
809 cur_block = next_pointers[cur_block];
810 if (verbose)
811 printf("\tblock %x\n", cur_block);
812 if (cur_block >= num_blocks) {
813 fprintf(stderr, "block %x too large\n", cur_block);
814 exit(1);
815 }
816 cur_chunk = 0;
817 }
818 }
819 if (calculated_queue_size != queue_sizes[queue]) {
820 printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size);
821 }
822 printf("\n");
823 }
824 }
825
826 static void
decode_indexed_registers(void)827 decode_indexed_registers(void)
828 {
829 char *name = NULL;
830 uint32_t sizedwords = 0;
831
832 foreach_line_in_section (line) {
833 if (startswith(line, " - regs-name:")) {
834 free(name);
835 parseline(line, " - regs-name: %ms", &name);
836 } else if (startswith(line, " dwords:")) {
837 parseline(line, " dwords: %u", &sizedwords);
838 } else if (startswith(line, " data: !!ascii85 |")) {
839 uint32_t *buf = popline_ascii85(sizedwords);
840
841 /* some of the sections are pretty large, and are (at least
842 * so far) not useful, so skip them if not in verbose mode:
843 */
844 bool dump = verbose ||
845 !strcmp(name, "CP_SQE_STAT") ||
846 !strcmp(name, "CP_DRAW_STATE") ||
847 !strcmp(name, "CP_ROQ") ||
848 0;
849
850 if (!strcmp(name, "CP_SQE_STAT"))
851 dump_cp_sqe_stat(buf);
852
853 if (!strcmp(name, "CP_UCODE_DBG_DATA"))
854 dump_cp_ucode_dbg(buf);
855
856 if (!strcmp(name, "CP_MEMPOOL"))
857 dump_cp_mem_pool(buf);
858
859 if (dump)
860 dump_hex_ascii(buf, 4 * sizedwords, 1);
861
862 free(buf);
863
864 continue;
865 }
866
867 printf("%s", line);
868 }
869 }
870
871 /*
872 * Decode shader-blocks:
873 */
874
875 static void
decode_shader_blocks(void)876 decode_shader_blocks(void)
877 {
878 char *type = NULL;
879 uint32_t sizedwords = 0;
880
881 foreach_line_in_section (line) {
882 if (startswith(line, " - type:")) {
883 free(type);
884 parseline(line, " - type: %ms", &type);
885 } else if (startswith(line, " size:")) {
886 parseline(line, " size: %u", &sizedwords);
887 } else if (startswith(line, " data: !!ascii85 |")) {
888 uint32_t *buf = popline_ascii85(sizedwords);
889
890 /* some of the sections are pretty large, and are (at least
891 * so far) not useful, so skip them if not in verbose mode:
892 */
893 bool dump = verbose ||
894 !strcmp(type, "A6XX_SP_INST_DATA") ||
895 !strcmp(type, "A6XX_HLSQ_INST_RAM") ||
896 0;
897
898 if (!strcmp(type, "A6XX_SP_INST_DATA") ||
899 !strcmp(type, "A6XX_HLSQ_INST_RAM")) {
900 /* TODO this section actually contains multiple shaders
901 * (or parts of shaders?), so perhaps we should search
902 * for ends of shaders and decode each?
903 */
904 try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id);
905 }
906
907 if (dump)
908 dump_hex_ascii(buf, 4 * sizedwords, 1);
909
910 free(buf);
911
912 continue;
913 }
914
915 printf("%s", line);
916 }
917
918 free(type);
919 }
920
921 /*
922 * Decode debugbus section:
923 */
924
925 static void
decode_debugbus(void)926 decode_debugbus(void)
927 {
928 char *block = NULL;
929 uint32_t sizedwords = 0;
930
931 foreach_line_in_section (line) {
932 if (startswith(line, " - debugbus-block:")) {
933 free(block);
934 parseline(line, " - debugbus-block: %ms", &block);
935 } else if (startswith(line, " count:")) {
936 parseline(line, " count: %u", &sizedwords);
937 } else if (startswith(line, " data: !!ascii85 |")) {
938 uint32_t *buf = popline_ascii85(sizedwords);
939
940 /* some of the sections are pretty large, and are (at least
941 * so far) not useful, so skip them if not in verbose mode:
942 */
943 bool dump = verbose ||
944 0;
945
946 if (dump)
947 dump_hex_ascii(buf, 4 * sizedwords, 1);
948
949 free(buf);
950
951 continue;
952 }
953
954 printf("%s", line);
955 }
956 }
957
958 /*
959 * Main crashdump decode loop:
960 */
961
962 static void
decode(void)963 decode(void)
964 {
965 const char *line;
966
967 while ((line = popline())) {
968 printf("%s", line);
969 if (startswith(line, "revision:")) {
970 parseline(line, "revision: %u", &options.gpu_id);
971 printf("Got gpu_id=%u\n", options.gpu_id);
972
973 cffdec_init(&options);
974
975 if (is_a6xx()) {
976 rnn_gmu = rnn_new(!options.color);
977 rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
978 rnn_control = rnn_new(!options.color);
979 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG");
980 rnn_pipe = rnn_new(!options.color);
981 rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG");
982 } else if (is_a5xx()) {
983 rnn_control = rnn_new(!options.color);
984 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG");
985 } else {
986 rnn_control = NULL;
987 }
988 } else if (startswith(line, "bos:")) {
989 decode_bos();
990 } else if (startswith(line, "ringbuffer:")) {
991 decode_ringbuffer();
992 } else if (startswith(line, "registers:")) {
993 decode_registers();
994
995 /* after we've recorded buffer contents, and CP register values,
996 * we can take a stab at decoding the cmdstream:
997 */
998 dump_cmdstream();
999 } else if (startswith(line, "registers-gmu:")) {
1000 decode_gmu_registers();
1001 } else if (startswith(line, "indexed-registers:")) {
1002 decode_indexed_registers();
1003 } else if (startswith(line, "shader-blocks:")) {
1004 decode_shader_blocks();
1005 } else if (startswith(line, "clusters:")) {
1006 decode_clusters();
1007 } else if (startswith(line, "debugbus:")) {
1008 decode_debugbus();
1009 }
1010 }
1011 }
1012
1013 /*
1014 * Usage and argument parsing:
1015 */
1016
1017 static void
usage(void)1018 usage(void)
1019 {
1020 fprintf(stderr, "Usage:\n\n"
1021 "\tcrashdec [-achmsv] [-f FILE]\n\n"
1022 "Options:\n"
1023 "\t-a, --allregs - show all registers (including ones not written since\n"
1024 "\t previous draw) at each draw\n"
1025 "\t-c, --color - use colors\n"
1026 "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1027 "\t-h, --help - this usage message\n"
1028 "\t-m, --markers - try to decode CP_NOP string markers\n"
1029 "\t-s, --summary - don't show individual register writes, but just show\n"
1030 "\t register values on draws\n"
1031 "\t-v, --verbose - dump more verbose output, including contents of\n"
1032 "\t less interesting buffers\n"
1033 "\n"
1034 );
1035 exit(2);
1036 }
1037
1038 static const struct option opts[] = {
1039 { .name = "allregs", .has_arg = 0, NULL, 'a' },
1040 { .name = "color", .has_arg = 0, NULL, 'c' },
1041 { .name = "file", .has_arg = 1, NULL, 'f' },
1042 { .name = "help", .has_arg = 0, NULL, 'h' },
1043 { .name = "markers", .has_arg = 0, NULL, 'm' },
1044 { .name = "summary", .has_arg = 0, NULL, 's' },
1045 { .name = "verbose", .has_arg = 0, NULL, 'v' },
1046 {}
1047 };
1048
1049 static bool interactive;
1050
1051 static void
cleanup(void)1052 cleanup(void)
1053 {
1054 fflush(stdout);
1055
1056 if (interactive) {
1057 pager_close();
1058 }
1059 }
1060
1061 int
main(int argc,char ** argv)1062 main(int argc, char **argv)
1063 {
1064 int c;
1065
1066 interactive = isatty(STDOUT_FILENO);
1067 options.color = interactive;
1068
1069 /* default to read from stdin: */
1070 in = stdin;
1071
1072 while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
1073 switch (c) {
1074 case 'a':
1075 options.allregs = true;
1076 break;
1077 case 'c':
1078 options.color = true;
1079 break;
1080 case 'f':
1081 in = fopen(optarg, "r");
1082 break;
1083 case 'm':
1084 options.decode_markers = true;
1085 break;
1086 case 's':
1087 options.summary = true;
1088 break;
1089 case 'v':
1090 verbose = true;
1091 break;
1092 case 'h':
1093 default:
1094 usage();
1095 }
1096 }
1097
1098 disasm_a3xx_set_debug(PRINT_RAW);
1099
1100 if (interactive) {
1101 pager_open();
1102 }
1103
1104 atexit(cleanup);
1105
1106 decode();
1107 cleanup();
1108 }
1109