1 /*
2  * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <unistd.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/wait.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <assert.h>
40 #include <signal.h>
41 #include <errno.h>
42 
43 #include "redump.h"
44 #include "disasm.h"
45 #include "script.h"
46 #include "rnnutil.h"
47 #include "buffers.h"
48 #include "cffdec.h"
49 
50 /* ************************************************************************* */
51 /* originally based on kernel recovery dump code: */
52 
53 static const struct cffdec_options *options;
54 
55 static bool needs_wfi = false;
56 static bool summary = false;
57 static bool in_summary = false;
58 static int vertices;
59 
regcnt(void)60 static inline unsigned regcnt(void)
61 {
62 	if (options->gpu_id >= 500)
63 		return 0xffff;
64 	else
65 		return 0x7fff;
66 }
67 
is_64b(void)68 static int is_64b(void)
69 {
70 	return options->gpu_id >= 500;
71 }
72 
73 
74 static int draws[3];
75 static struct {
76 	uint64_t base;
77 	uint32_t size;   /* in dwords */
78 	/* Generally cmdstream consists of multiple IB calls to different
79 	 * buffers, which are themselves often re-used for each tile.  The
80 	 * triggered flag serves two purposes to help make it more clear
81 	 * what part of the cmdstream is before vs after the the GPU hang:
82 	 *
83 	 * 1) if in IB2 we are passed the point within the IB2 buffer where
84 	 *    the GPU hung, but IB1 is not passed the point within its
85 	 *    buffer where the GPU had hung, then we know the GPU hang
86 	 *    happens on a future use of that IB2 buffer.
87 	 *
88 	 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
89 	 *    hung, but we've already passed the trigger point at the same
90 	 *    IB level, we know that we are passed the point where the GPU
91 	 *    had hung.
92 	 *
93 	 * So this is a one way switch, false->true.  And a higher #'d
94 	 * IB level isn't considered triggered unless the lower #'d IB
95 	 * level is.
96 	 */
97 	bool triggered;
98 } ibs[4];
99 static int ib;
100 
101 static int draw_count;
102 static int current_draw_count;
103 
104 /* query mode.. to handle symbolic register name queries, we need to
105  * defer parsing query string until after gpu_id is know and rnn db
106  * loaded:
107  */
108 static int *queryvals;
109 
110 static bool
quiet(int lvl)111 quiet(int lvl)
112 {
113 	if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count))
114 		return true;
115 	if ((lvl >= 3) && (summary || options->querystrs || options->script))
116 		return true;
117 	if ((lvl >= 2) && (options->querystrs || options->script))
118 		return true;
119 	return false;
120 }
121 
122 void
printl(int lvl,const char * fmt,...)123 printl(int lvl, const char *fmt, ...)
124 {
125 	va_list args;
126 	if (quiet(lvl))
127 		return;
128 	va_start(args, fmt);
129 	vprintf(fmt, args);
130 	va_end(args);
131 }
132 
133 static const char *levels[] = {
134 		"\t",
135 		"\t\t",
136 		"\t\t\t",
137 		"\t\t\t\t",
138 		"\t\t\t\t\t",
139 		"\t\t\t\t\t\t",
140 		"\t\t\t\t\t\t\t",
141 		"\t\t\t\t\t\t\t\t",
142 		"\t\t\t\t\t\t\t\t\t",
143 		"x",
144 		"x",
145 		"x",
146 		"x",
147 		"x",
148 		"x",
149 };
150 
151 enum state_src_t {
152 	STATE_SRC_DIRECT,
153 	STATE_SRC_INDIRECT,
154 	STATE_SRC_BINDLESS,
155 };
156 
157 /* SDS (CP_SET_DRAW_STATE) helpers: */
158 static void load_all_groups(int level);
159 static void disable_all_groups(void);
160 
161 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level);
162 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
163 
164 static bool
highlight_gpuaddr(uint64_t gpuaddr)165 highlight_gpuaddr(uint64_t gpuaddr)
166 {
167 	if (!options->color)
168 		return false;
169 
170 	if (!options->ibs[ib].base)
171 		return false;
172 
173 	if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered)
174 		return false;
175 
176 	if (ibs[ib].triggered)
177 		return true;
178 
179 	if (options->ibs[ib].base != ibs[ib].base)
180 		return false;
181 
182 	uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
183 	uint64_t end   = ibs[ib].base + 4 * ibs[ib].size;
184 
185 	bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
186 
187 	ibs[ib].triggered |= triggered;
188 
189 	if (triggered)
190 		printf("ESTIMATED CRASH LOCATION!\n");
191 
192 	return triggered;
193 }
194 
195 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)196 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
197 {
198 	int i, j;
199 	int lastzero = 1;
200 
201 	if (quiet(2))
202 		return;
203 
204 	for (i = 0; i < sizedwords; i += 8) {
205 		int zero = 1;
206 
207 		/* always show first row: */
208 		if (i == 0)
209 			zero = 0;
210 
211 		for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++)
212 			if (dwords[i+j])
213 				zero = 0;
214 
215 		if (zero && !lastzero)
216 			printf("*\n");
217 
218 		lastzero = zero;
219 
220 		if (zero)
221 			continue;
222 
223 		uint64_t addr = gpuaddr(&dwords[i]);
224 		bool highlight = highlight_gpuaddr(addr);
225 
226 		if (highlight)
227 			printf("\x1b[0;1;31m");
228 
229 		if (is_64b()) {
230 			printf("%016"PRIx64":%s", addr, levels[level]);
231 		} else {
232 			printf("%08x:%s", (uint32_t)addr, levels[level]);
233 		}
234 
235 		if (highlight)
236 			printf("\x1b[0m");
237 
238 		printf("%04x:", i * 4);
239 
240 		for (j = 0; (j < 8) && (i+j < sizedwords); j++) {
241 			printf(" %08x", dwords[i+j]);
242 		}
243 
244 		printf("\n");
245 	}
246 }
247 
248 static void
dump_float(float * dwords,uint32_t sizedwords,int level)249 dump_float(float *dwords, uint32_t sizedwords, int level)
250 {
251 	int i;
252 	for (i = 0; i < sizedwords; i++) {
253 		if ((i % 8) == 0) {
254 			if (is_64b()) {
255 				printf("%016"PRIx64":%s", gpuaddr(dwords), levels[level]);
256 			} else {
257 				printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
258 			}
259 		} else {
260 			printf(" ");
261 		}
262 		printf("%8f", *(dwords++));
263 		if ((i % 8) == 7)
264 			printf("\n");
265 	}
266 	if (i % 8)
267 		printf("\n");
268 }
269 
270 /* I believe the surface format is low bits:
271 #define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
272 comments in sys2gmem_tex_const indicate that address is [31:12], but
273 looks like at least some of the bits above the format have different meaning..
274 */
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)275 static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr,
276 		uint32_t *flags, uint32_t mask)
277 {
278 	assert(!is_64b());  /* this is only used on a2xx */
279 	*gpuaddr = dword & ~mask;
280 	*flags   = dword & mask;
281 }
282 
283 static uint32_t type0_reg_vals[0xffff + 1];
284 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8];  /* written since last draw */
285 static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8];
286 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
287 
reg_rewritten(uint32_t regbase)288 static bool reg_rewritten(uint32_t regbase)
289 {
290 	return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8)));
291 }
292 
reg_written(uint32_t regbase)293 bool reg_written(uint32_t regbase)
294 {
295 	return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8)));
296 }
297 
clear_rewritten(void)298 static void clear_rewritten(void)
299 {
300 	memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
301 }
302 
clear_written(void)303 static void clear_written(void)
304 {
305 	memset(type0_reg_written, 0, sizeof(type0_reg_written));
306 	clear_rewritten();
307 }
308 
reg_lastval(uint32_t regbase)309 uint32_t reg_lastval(uint32_t regbase)
310 {
311 	return lastvals[regbase];
312 }
313 
314 static void
clear_lastvals(void)315 clear_lastvals(void)
316 {
317 	memset(lastvals, 0, sizeof(lastvals));
318 }
319 
320 uint32_t
reg_val(uint32_t regbase)321 reg_val(uint32_t regbase)
322 {
323 	return type0_reg_vals[regbase];
324 }
325 
326 void
reg_set(uint32_t regbase,uint32_t val)327 reg_set(uint32_t regbase, uint32_t val)
328 {
329 	assert(regbase < regcnt());
330 	type0_reg_vals[regbase] = val;
331 	type0_reg_written[regbase/8] |= (1 << (regbase % 8));
332 	type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8));
333 }
334 
335 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)336 reg_dump_scratch(const char *name, uint32_t dword, int level)
337 {
338 	unsigned r;
339 
340 	if (quiet(3))
341 		return;
342 
343 	r = regbase("CP_SCRATCH[0].REG");
344 
345 	// if not, try old a2xx/a3xx version:
346 	if (!r)
347 		r = regbase("CP_SCRATCH_REG0");
348 
349 	if (!r)
350 		return;
351 
352 	printf("%s:%u,%u,%u,%u\n", levels[level],
353 			reg_val(r + 4), reg_val(r + 5),
354 			reg_val(r + 6), reg_val(r + 7));
355 }
356 
357 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360 	void *buf;
361 
362 	if (quiet(quietlvl))
363 		return;
364 
365 	buf = hostptr(gpuaddr);
366 	if (buf) {
367 		dump_hex(buf, sizedwords, level+1);
368 	}
369 }
370 
371 static void
dump_gpuaddr(uint64_t gpuaddr,int level)372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374 	dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376 
377 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380 	dump_gpuaddr(dword, level);
381 }
382 
383 uint32_t gpuaddr_lo;
384 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387 	gpuaddr_lo = dword;
388 }
389 
390 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393 	dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395 
396 
397 static void
dump_shader(const char * ext,void * buf,int bufsz)398 dump_shader(const char *ext, void *buf, int bufsz)
399 {
400 	if (options->dump_shaders) {
401 		static int n = 0;
402 		char filename[16];
403 		int fd;
404 		sprintf(filename, "%04d.%s", n++, ext);
405 		fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644);
406 		if (fd != -1) {
407 			write(fd, buf, bufsz);
408 			close(fd);
409 		}
410 	}
411 }
412 
413 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)414 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
415 {
416 	void *buf;
417 
418 	gpuaddr &= 0xfffffffffffffff0;
419 
420 	if (quiet(3))
421 		return;
422 
423 	buf = hostptr(gpuaddr);
424 	if (buf) {
425 		uint32_t sizedwords = hostlen(gpuaddr) / 4;
426 		const char *ext;
427 
428 		dump_hex(buf, min(64, sizedwords), level+1);
429 		try_disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id);
430 
431 		/* this is a bit ugly way, but oh well.. */
432 		if (strstr(name, "SP_VS_OBJ")) {
433 			ext = "vo3";
434 		} else if (strstr(name, "SP_FS_OBJ")) {
435 			ext = "fo3";
436 		} else if (strstr(name, "SP_GS_OBJ")) {
437 			ext = "go3";
438 		} else if (strstr(name, "SP_CS_OBJ")) {
439 			ext = "co3";
440 		} else {
441 			ext = NULL;
442 		}
443 
444 		if (ext)
445 			dump_shader(ext, buf, sizedwords * 4);
446 	}
447 }
448 
449 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)450 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
451 {
452 	disasm_gpuaddr(name, dword, level);
453 }
454 
455 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)456 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
457 {
458 	disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
459 }
460 
461 /* Find the value of the TEX_COUNT register that corresponds to the named
462  * TEX_SAMP/TEX_CONST reg.
463  *
464  * Note, this kinda assumes an equal # of samplers and textures, but not
465  * really sure if there is a much better option.  I suppose on a6xx we
466  * could instead decode the bitfields in SP_xS_CONFIG
467  */
468 static int
get_tex_count(const char * name)469 get_tex_count(const char *name)
470 {
471 	char count_reg[strlen(name) + 5];
472 	char *p;
473 
474 	p = strstr(name, "CONST");
475 	if (!p)
476 		p = strstr(name, "SAMP");
477 	if (!p)
478 		return 0;
479 
480 	int n = p - name;
481 	strncpy(count_reg, name, n);
482 	strcpy(count_reg + n, "COUNT");
483 
484 	return reg_val(regbase(count_reg));
485 }
486 
487 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)488 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
489 {
490 	if (!in_summary)
491 		return;
492 
493 	int num_unit = get_tex_count(name);
494 	uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
495 	void *buf = hostptr(gpuaddr);
496 
497 	if (!buf)
498 		return;
499 
500 	dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1);
501 }
502 
503 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)504 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
505 {
506 	if (!in_summary)
507 		return;
508 
509 	int num_unit = get_tex_count(name);
510 	uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
511 	void *buf = hostptr(gpuaddr);
512 
513 	if (!buf)
514 		return;
515 
516 	dump_tex_const(buf, num_unit, level+1);
517 }
518 
519 /*
520  * Registers with special handling (rnndec_decode() handles rest):
521  */
522 #define REG(x, fxn) { #x, fxn }
523 static struct {
524 	const char *regname;
525 	void (*fxn)(const char *name, uint32_t dword, int level);
526 	uint32_t regbase;
527 } reg_a2xx[] = {
528 		REG(CP_SCRATCH_REG0, reg_dump_scratch),
529 		REG(CP_SCRATCH_REG1, reg_dump_scratch),
530 		REG(CP_SCRATCH_REG2, reg_dump_scratch),
531 		REG(CP_SCRATCH_REG3, reg_dump_scratch),
532 		REG(CP_SCRATCH_REG4, reg_dump_scratch),
533 		REG(CP_SCRATCH_REG5, reg_dump_scratch),
534 		REG(CP_SCRATCH_REG6, reg_dump_scratch),
535 		REG(CP_SCRATCH_REG7, reg_dump_scratch),
536 		{NULL},
537 }, reg_a3xx[] = {
538 		REG(CP_SCRATCH_REG0, reg_dump_scratch),
539 		REG(CP_SCRATCH_REG1, reg_dump_scratch),
540 		REG(CP_SCRATCH_REG2, reg_dump_scratch),
541 		REG(CP_SCRATCH_REG3, reg_dump_scratch),
542 		REG(CP_SCRATCH_REG4, reg_dump_scratch),
543 		REG(CP_SCRATCH_REG5, reg_dump_scratch),
544 		REG(CP_SCRATCH_REG6, reg_dump_scratch),
545 		REG(CP_SCRATCH_REG7, reg_dump_scratch),
546 		REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
547 		REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
548 		REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
549 		REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
550 		REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
551 		REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
552 		{NULL},
553 }, reg_a4xx[] = {
554 		REG(CP_SCRATCH[0].REG, reg_dump_scratch),
555 		REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
556 		REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
557 		REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
558 		REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
559 		REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
560 		REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
561 		REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
562 		REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
563 		REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
564 		REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
565 		REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
566 		REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
567 		REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
568 		REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
569 		REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
570 		REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
571 		REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
572 		REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
573 		REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
574 		REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
575 		REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
576 		REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
577 		REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
578 		REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
579 		{NULL},
580 }, reg_a5xx[] = {
581 		REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
582 		REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
583 		REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
584 		REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
585 		REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
586 		REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
587 		REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
588 		REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
589 		REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
590 		REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
591 		REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
592 		REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
593 		REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
594 		REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
595 		REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
596 		REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
597 		REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
598 		REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
599 		REG(TPL1_VS_TEX_SAMP_LO,  reg_gpuaddr_lo),
600 		REG(TPL1_VS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
601 		REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
602 		REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
603 		REG(TPL1_HS_TEX_SAMP_LO,  reg_gpuaddr_lo),
604 		REG(TPL1_HS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
605 		REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
606 		REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
607 		REG(TPL1_DS_TEX_SAMP_LO,  reg_gpuaddr_lo),
608 		REG(TPL1_DS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
609 		REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
610 		REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
611 		REG(TPL1_GS_TEX_SAMP_LO,  reg_gpuaddr_lo),
612 		REG(TPL1_GS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
613 		REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
614 		REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
615 		REG(TPL1_FS_TEX_SAMP_LO,  reg_gpuaddr_lo),
616 		REG(TPL1_FS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
617 		REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
618 		REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
619 		REG(TPL1_CS_TEX_SAMP_LO,  reg_gpuaddr_lo),
620 		REG(TPL1_CS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
621 		REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO,  reg_gpuaddr_lo),
622 		REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI,  reg_dump_gpuaddr_hi),
623 //		REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
624 //		REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
625 //		REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
626 //		REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
627 //		REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
628 //		REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
629 //		REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
630 //		REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
631 //		REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
632 //		REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
633 //		REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
634 //		REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
635 //		REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
636 //		REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
637 //		REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
638 //		REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
639 //		REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
640 //		REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
641 //		REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
642 //		REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
643 //		REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
644 //		REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
645 //		REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
646 //		REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
647 //		REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
648 //		REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
649 
650 //		REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
651 //		REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
652 //		REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
653 //		REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
654 //		REG(RB_2D_DST_LO, reg_gpuaddr_lo),
655 //		REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
656 //		REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
657 //		REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
658 
659 		{NULL},
660 }, reg_a6xx[] = {
661 		REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
662 		REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
663 		REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
664 		REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
665 
666 		REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
667 		REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
668 		REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
669 		REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
670 		REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
671 		REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
672 		REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
673 		REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
674 		REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
675 		REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
676 		REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
677 		REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
678 
679 		REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo),
680 		REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
681 		REG(SP_VS_TEX_SAMP_LO,  reg_gpuaddr_lo),
682 		REG(SP_VS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
683 		REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo),
684 		REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
685 		REG(SP_HS_TEX_SAMP_LO,  reg_gpuaddr_lo),
686 		REG(SP_HS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
687 		REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo),
688 		REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
689 		REG(SP_DS_TEX_SAMP_LO,  reg_gpuaddr_lo),
690 		REG(SP_DS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
691 		REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo),
692 		REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
693 		REG(SP_GS_TEX_SAMP_LO,  reg_gpuaddr_lo),
694 		REG(SP_GS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
695 		REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo),
696 		REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
697 		REG(SP_FS_TEX_SAMP_LO,  reg_gpuaddr_lo),
698 		REG(SP_FS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
699 		REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo),
700 		REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
701 		REG(SP_CS_TEX_SAMP_LO,  reg_gpuaddr_lo),
702 		REG(SP_CS_TEX_SAMP_HI,  reg_dump_tex_samp_hi),
703 
704 		{NULL},
705 }, *type0_reg;
706 
707 static struct rnn *rnn;
708 
709 static void
init_rnn(const char * gpuname)710 init_rnn(const char *gpuname)
711 {
712 	rnn = rnn_new(!options->color);
713 
714 	rnn_load(rnn, gpuname);
715 
716 	if (options->querystrs) {
717 		int i;
718 		queryvals = calloc(options->nquery, sizeof(queryvals[0]));
719 
720 		for (i = 0; i < options->nquery; i++) {
721 			int val = strtol(options->querystrs[i], NULL, 0);
722 
723 			if (val == 0)
724 				val = regbase(options->querystrs[i]);
725 
726 			queryvals[i] = val;
727 			printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
728 		}
729 	}
730 
731 	for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
732 		type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
733 		if (!type0_reg[idx].regbase) {
734 			printf("invalid register name: %s\n", type0_reg[idx].regname);
735 			exit(1);
736 		}
737 	}
738 }
739 
740 void
reset_regs(void)741 reset_regs(void)
742 {
743 	clear_written();
744 	clear_lastvals();
745 	memset(&ibs, 0, sizeof(ibs));
746 }
747 
748 void
cffdec_init(const struct cffdec_options * _options)749 cffdec_init(const struct cffdec_options *_options)
750 {
751 	options = _options;
752 	summary = options->summary;
753 
754 	/* in case we're decoding multiple files: */
755 	free(queryvals);
756 	reset_regs();
757 	draw_count = 0;
758 
759 	/* TODO we need an API to free/cleanup any previous rnn */
760 
761 	switch (options->gpu_id) {
762 	case 200 ... 299:
763 		type0_reg = reg_a2xx;
764 		init_rnn("a2xx");
765 		break;
766 	case 300 ... 399:
767 		type0_reg = reg_a3xx;
768 		init_rnn("a3xx");
769 		break;
770 	case 400 ... 499:
771 		type0_reg = reg_a4xx;
772 		init_rnn("a4xx");
773 		break;
774 	case 500 ... 599:
775 		type0_reg = reg_a5xx;
776 		init_rnn("a5xx");
777 		break;
778 	case 600 ... 699:
779 		type0_reg = reg_a6xx;
780 		init_rnn("a6xx");
781 		break;
782 	default:
783 		errx(-1, "unsupported gpu");
784 	}
785 }
786 
787 const char *
pktname(unsigned opc)788 pktname(unsigned opc)
789 {
790 	return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
791 }
792 
793 const char *
regname(uint32_t regbase,int color)794 regname(uint32_t regbase, int color)
795 {
796 	return rnn_regname(rnn, regbase, color);
797 }
798 
799 uint32_t
regbase(const char * name)800 regbase(const char *name)
801 {
802 	return rnn_regbase(rnn, name);
803 }
804 
805 static int
endswith(uint32_t regbase,const char * suffix)806 endswith(uint32_t regbase, const char *suffix)
807 {
808 	const char *name = regname(regbase, 0);
809 	const char *s = strstr(name, suffix);
810 	if (!s)
811 		return 0;
812 	return (s - strlen(name) + strlen(suffix)) == name;
813 }
814 
815 void
dump_register_val(uint32_t regbase,uint32_t dword,int level)816 dump_register_val(uint32_t regbase, uint32_t dword, int level)
817 {
818 	struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
819 
820 	if (info && info->typeinfo) {
821 		uint64_t gpuaddr = 0;
822 		char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
823 		printf("%s%s: %s", levels[level], info->name, decoded);
824 
825 		/* Try and figure out if we are looking at a gpuaddr.. this
826 		 * might be useful for other gen's too, but at least a5xx has
827 		 * the _HI/_LO suffix we can look for.  Maybe a better approach
828 		 * would be some special annotation in the xml..
829 		 */
830 		if (options->gpu_id >= 500) {
831 			if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) {
832 				gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1);
833 			} else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) {
834 				gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword;
835 			}
836 		}
837 
838 		if (gpuaddr && hostptr(gpuaddr)) {
839 			printf("\t\tbase=%"PRIx64", offset=%"PRIu64", size=%u",
840 					gpubaseaddr(gpuaddr),
841 					gpuaddr - gpubaseaddr(gpuaddr),
842 					hostlen(gpubaseaddr(gpuaddr)));
843 		}
844 
845 		printf("\n");
846 
847 		free(decoded);
848 	} else if (info) {
849 		printf("%s%s: %08x\n", levels[level], info->name, dword);
850 	} else {
851 		printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
852 	}
853 
854 	if (info) {
855 		free(info->name);
856 		free(info);
857 	}
858 }
859 
860 static void
dump_register(uint32_t regbase,uint32_t dword,int level)861 dump_register(uint32_t regbase, uint32_t dword, int level)
862 {
863 	if (!quiet(3)) {
864 		dump_register_val(regbase, dword, level);
865 	}
866 
867 	for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
868 		if (type0_reg[idx].regbase == regbase) {
869 			type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
870 			break;
871 		}
872 	}
873 }
874 
875 static bool
is_banked_reg(uint32_t regbase)876 is_banked_reg(uint32_t regbase)
877 {
878 	return (0x2000 <= regbase) && (regbase < 0x2400);
879 }
880 
881 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)882 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level)
883 {
884 	while (sizedwords--) {
885 		int last_summary = summary;
886 
887 		/* access to non-banked registers needs a WFI:
888 		 * TODO banked register range for a2xx??
889 		 */
890 		if (needs_wfi && !is_banked_reg(regbase))
891 			printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
892 
893 		reg_set(regbase, *dwords);
894 		dump_register(regbase, *dwords, level);
895 		regbase++;
896 		dwords++;
897 		summary = last_summary;
898 	}
899 }
900 
901 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)902 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level,
903 		const char *name)
904 {
905 	struct rnndomain *dom;
906 	int i;
907 
908 	dom = rnn_finddomain(rnn->db, name);
909 
910 	if (!dom)
911 		return;
912 
913 	if (script_packet)
914 		script_packet(dwords, sizedwords, rnn, dom);
915 
916 	if (quiet(2))
917 		return;
918 
919 	for (i = 0; i < sizedwords; i++) {
920 		struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
921 		char *decoded;
922 		if (!(info && info->typeinfo))
923 			break;
924 		uint64_t value = dwords[i];
925 		if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
926 			value |= (uint64_t) dwords[i + 1] << 32;
927 			i++; /* skip the next dword since we're printing it now */
928 		}
929 		decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
930 		/* Unlike the register printing path, we don't print the name
931 		 * of the register, so if it doesn't contain other named
932 		 * things (i.e. it isn't a bitset) then print the register
933 		 * name as if it's a bitset with a single entry. This avoids
934 		 * having to create a dummy register with a single entry to
935 		 * get a name in the decoding.
936 		 */
937 		if (info->typeinfo->type == RNN_TTYPE_BITSET ||
938 		    info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
939 			printf("%s%s\n", levels[level], decoded);
940 		} else {
941 			printf("%s{ %s%s%s = %s }\n", levels[level],
942 					rnn->vc->colors->rname, info->name,
943 					rnn->vc->colors->reset, decoded);
944 		}
945 		free(decoded);
946 		free(info->name);
947 		free(info);
948 	}
949 }
950 
951 
952 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
953 static unsigned mode;
954 static const char *render_mode;
955 static enum {
956 	MODE_BINNING = 0x1,
957 	MODE_GMEM    = 0x2,
958 	MODE_BYPASS  = 0x4,
959 	MODE_ALL     = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
960 } enable_mask = MODE_ALL;
961 static bool skip_ib2_enable_global;
962 static bool skip_ib2_enable_local;
963 
964 static void
print_mode(int level)965 print_mode(int level)
966 {
967 	if ((options->gpu_id >= 500) && !quiet(2)) {
968 		printf("%smode: %s\n", levels[level], render_mode);
969 		printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local);
970 	}
971 }
972 
973 static bool
skip_query(void)974 skip_query(void)
975 {
976 	switch (options->query_mode) {
977 	case QUERY_ALL:
978 		/* never skip: */
979 		return false;
980 	case QUERY_WRITTEN:
981 		for (int i = 0; i < options->nquery; i++) {
982 			uint32_t regbase = queryvals[i];
983 			if (!reg_written(regbase)) {
984 				continue;
985 			}
986 			if (reg_rewritten(regbase)) {
987 				return false;
988 			}
989 		}
990 		return true;
991 	case QUERY_DELTA:
992 		for (int i = 0; i < options->nquery; i++) {
993 			uint32_t regbase = queryvals[i];
994 			if (!reg_written(regbase)) {
995 				continue;
996 			}
997 			uint32_t lastval = reg_val(regbase);
998 			if (lastval != lastvals[regbase]) {
999 				return false;
1000 			}
1001 		}
1002 		return true;
1003 	}
1004 	return true;
1005 }
1006 
1007 static void
__do_query(const char * primtype,uint32_t num_indices)1008 __do_query(const char *primtype, uint32_t num_indices)
1009 {
1010 	int n = 0;
1011 
1012 	if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1013 		uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1014 		uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1015 
1016 		bin_x1 = scissor_tl & 0xffff;
1017 		bin_y1 = scissor_tl >> 16;
1018 		bin_x2 = scissor_br & 0xffff;
1019 		bin_y2 = scissor_br >> 16;
1020 	}
1021 
1022 	for (int i = 0; i < options->nquery; i++) {
1023 		uint32_t regbase = queryvals[i];
1024 		if (reg_written(regbase)) {
1025 			uint32_t lastval = reg_val(regbase);
1026 			printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype,
1027 					bin_x1, bin_y1, bin_x2, bin_y2, num_indices);
1028 			if (options->gpu_id >= 500)
1029 				printf("%s:", render_mode);
1030 			printf("\t%08x", lastval);
1031 			if (lastval != lastvals[regbase]) {
1032 				printf("!");
1033 			} else {
1034 				printf(" ");
1035 			}
1036 			if (reg_rewritten(regbase)) {
1037 				printf("+");
1038 			} else {
1039 				printf(" ");
1040 			}
1041 			dump_register_val(regbase, lastval, 0);
1042 			n++;
1043 		}
1044 	}
1045 
1046 	if (n > 1)
1047 		printf("\n");
1048 }
1049 
1050 static void
do_query_compare(const char * primtype,uint32_t num_indices)1051 do_query_compare(const char *primtype, uint32_t num_indices)
1052 {
1053 	unsigned saved_enable_mask = enable_mask;
1054 	const char *saved_render_mode = render_mode;
1055 
1056 	/* in 'query-compare' mode, we want to see if the register is writtten
1057 	 * or changed in any mode:
1058 	 *
1059 	 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1060 	 * is written with different values in binning vs sysmem/gmem mode, as
1061 	 * we don't track previous values per-mode, but I think we can live with
1062 	 * that)
1063 	 */
1064 	enable_mask = MODE_ALL;
1065 
1066 	clear_rewritten();
1067 	load_all_groups(0);
1068 
1069 	if (!skip_query()) {
1070 		/* dump binning pass values: */
1071 		enable_mask = MODE_BINNING;
1072 		render_mode = "BINNING";
1073 		clear_rewritten();
1074 		load_all_groups(0);
1075 		__do_query(primtype, num_indices);
1076 
1077 		/* dump draw pass values: */
1078 		enable_mask = MODE_GMEM | MODE_BYPASS;
1079 		render_mode = "DRAW";
1080 		clear_rewritten();
1081 		load_all_groups(0);
1082 		__do_query(primtype, num_indices);
1083 
1084 		printf("\n");
1085 	}
1086 
1087 	enable_mask = saved_enable_mask;
1088 	render_mode = saved_render_mode;
1089 
1090 	disable_all_groups();
1091 }
1092 
1093 /* well, actually query and script..
1094  * NOTE: call this before dump_register_summary()
1095  */
1096 static void
do_query(const char * primtype,uint32_t num_indices)1097 do_query(const char *primtype, uint32_t num_indices)
1098 {
1099 	if (script_draw)
1100 		script_draw(primtype, num_indices);
1101 
1102 	if (options->query_compare) {
1103 		do_query_compare(primtype, num_indices);
1104 		return;
1105 	}
1106 
1107 	if (skip_query())
1108 		return;
1109 
1110 	__do_query(primtype, num_indices);
1111 }
1112 
1113 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1114 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1115 {
1116 	uint32_t start = dwords[1] >> 16;
1117 	uint32_t size  = dwords[1] & 0xffff;
1118 	const char *type = NULL, *ext = NULL;
1119 	gl_shader_stage disasm_type;
1120 
1121 	switch (dwords[0]) {
1122 	case 0:
1123 		type = "vertex";
1124 		ext = "vo";
1125 		disasm_type = MESA_SHADER_VERTEX;
1126 		break;
1127 	case 1:
1128 		type = "fragment";
1129 		ext = "fo";
1130 		disasm_type = MESA_SHADER_FRAGMENT;
1131 		break;
1132 	default:
1133 		type = "<unknown>";
1134 		disasm_type = 0;
1135 		break;
1136 	}
1137 
1138 	printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size);
1139 	disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type);
1140 
1141 	/* dump raw shader: */
1142 	if (ext)
1143 		dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1144 }
1145 
1146 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1147 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1148 {
1149 	uint32_t reg = dwords[0] & 0xffff;
1150 	int i;
1151 	for (i = 1; i < sizedwords; i++) {
1152 		dump_register(reg, dwords[i], level+1);
1153 		reg_set(reg, dwords[i]);
1154 		reg++;
1155 	}
1156 }
1157 
1158 enum state_t {
1159 	TEX_SAMP = 1,
1160 	TEX_CONST,
1161 	TEX_MIPADDR,  /* a3xx only */
1162 	SHADER_PROG,
1163 	SHADER_CONST,
1164 
1165 	// image/ssbo state:
1166 	SSBO_0,
1167 	SSBO_1,
1168 	SSBO_2,
1169 
1170 	UBO,
1171 
1172 	// unknown things, just to hexdumps:
1173 	UNKNOWN_DWORDS,
1174 	UNKNOWN_2DWORDS,
1175 	UNKNOWN_4DWORDS,
1176 };
1177 
1178 enum adreno_state_block {
1179 	SB_VERT_TEX = 0,
1180 	SB_VERT_MIPADDR = 1,
1181 	SB_FRAG_TEX = 2,
1182 	SB_FRAG_MIPADDR = 3,
1183 	SB_VERT_SHADER = 4,
1184 	SB_GEOM_SHADER = 5,
1185 	SB_FRAG_SHADER = 6,
1186 	SB_COMPUTE_SHADER = 7,
1187 };
1188 
1189 /* TODO there is probably a clever way to let rnndec parse things so
1190  * we don't have to care about packet format differences across gens
1191  */
1192 
1193 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1194 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1195 		    enum state_src_t *src)
1196 {
1197 	unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1198 	unsigned state_type = dwords[1] & 0x3;
1199 	static const struct {
1200 		gl_shader_stage stage;
1201 		enum state_t state;
1202 	} lookup[0xf][0x3] = {
1203 		[SB_VERT_TEX][0]    = { MESA_SHADER_VERTEX,    TEX_SAMP },
1204 		[SB_VERT_TEX][1]    = { MESA_SHADER_VERTEX,    TEX_CONST },
1205 		[SB_FRAG_TEX][0]    = { MESA_SHADER_FRAGMENT,  TEX_SAMP },
1206 		[SB_FRAG_TEX][1]    = { MESA_SHADER_FRAGMENT,  TEX_CONST },
1207 		[SB_VERT_SHADER][0] = { MESA_SHADER_VERTEX,    SHADER_PROG },
1208 		[SB_VERT_SHADER][1] = { MESA_SHADER_VERTEX,    SHADER_CONST },
1209 		[SB_FRAG_SHADER][0] = { MESA_SHADER_FRAGMENT,  SHADER_PROG },
1210 		[SB_FRAG_SHADER][1] = { MESA_SHADER_FRAGMENT,  SHADER_CONST },
1211 	};
1212 
1213 	*stage = lookup[state_block_id][state_type].stage;
1214 	*state = lookup[state_block_id][state_type].state;
1215 	unsigned state_src = (dwords[0] >> 16) & 0x7;
1216 	if (state_src == 0 /* SS_DIRECT */)
1217 		*src = STATE_SRC_DIRECT;
1218 	else
1219 		*src = STATE_SRC_INDIRECT;
1220 }
1221 
1222 static enum state_src_t
_get_state_src(unsigned dword0)1223 _get_state_src(unsigned dword0)
1224 {
1225 	switch ((dword0 >> 16) & 0x3) {
1226 	case 0: /* SS4_DIRECT / SS6_DIRECT */
1227 		return STATE_SRC_DIRECT;
1228 	case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1229 		return STATE_SRC_INDIRECT;
1230 	case 1: /* SS6_BINDLESS */
1231 		return STATE_SRC_BINDLESS;
1232 	default:
1233 		return STATE_SRC_DIRECT;
1234 	}
1235 }
1236 
1237 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1238 _get_state_type(unsigned state_block_id, unsigned state_type,
1239 		gl_shader_stage *stage, enum state_t *state)
1240 {
1241 	static const struct {
1242 		gl_shader_stage stage;
1243 		enum state_t  state;
1244 	} lookup[0x10][0x4] = {
1245 		// SB4_VS_TEX:
1246 		[0x0][0] = { MESA_SHADER_VERTEX,    TEX_SAMP },
1247 		[0x0][1] = { MESA_SHADER_VERTEX,    TEX_CONST },
1248 		[0x0][2] = { MESA_SHADER_VERTEX,    UBO },
1249 		// SB4_HS_TEX:
1250 		[0x1][0] = { MESA_SHADER_TESS_CTRL, TEX_SAMP },
1251 		[0x1][1] = { MESA_SHADER_TESS_CTRL, TEX_CONST },
1252 		[0x1][2] = { MESA_SHADER_TESS_CTRL, UBO },
1253 		// SB4_DS_TEX:
1254 		[0x2][0] = { MESA_SHADER_TESS_EVAL, TEX_SAMP },
1255 		[0x2][1] = { MESA_SHADER_TESS_EVAL, TEX_CONST },
1256 		[0x2][2] = { MESA_SHADER_TESS_EVAL, UBO },
1257 		// SB4_GS_TEX:
1258 		[0x3][0] = { MESA_SHADER_GEOMETRY,  TEX_SAMP },
1259 		[0x3][1] = { MESA_SHADER_GEOMETRY,  TEX_CONST },
1260 		[0x3][2] = { MESA_SHADER_GEOMETRY,  UBO },
1261 		// SB4_FS_TEX:
1262 		[0x4][0] = { MESA_SHADER_FRAGMENT,  TEX_SAMP },
1263 		[0x4][1] = { MESA_SHADER_FRAGMENT,  TEX_CONST },
1264 		[0x4][2] = { MESA_SHADER_FRAGMENT,  UBO },
1265 		// SB4_CS_TEX:
1266 		[0x5][0] = { MESA_SHADER_COMPUTE,   TEX_SAMP },
1267 		[0x5][1] = { MESA_SHADER_COMPUTE,   TEX_CONST },
1268 		[0x5][2] = { MESA_SHADER_COMPUTE,   UBO },
1269 		// SB4_VS_SHADER:
1270 		[0x8][0] = { MESA_SHADER_VERTEX,    SHADER_PROG },
1271 		[0x8][1] = { MESA_SHADER_VERTEX,    SHADER_CONST },
1272 		[0x8][2] = { MESA_SHADER_VERTEX,    UBO },
1273 		// SB4_HS_SHADER
1274 		[0x9][0] = { MESA_SHADER_TESS_CTRL, SHADER_PROG },
1275 		[0x9][1] = { MESA_SHADER_TESS_CTRL, SHADER_CONST },
1276 		[0x9][2] = { MESA_SHADER_TESS_CTRL, UBO },
1277 		// SB4_DS_SHADER
1278 		[0xa][0] = { MESA_SHADER_TESS_EVAL, SHADER_PROG },
1279 		[0xa][1] = { MESA_SHADER_TESS_EVAL, SHADER_CONST },
1280 		[0xa][2] = { MESA_SHADER_TESS_EVAL, UBO },
1281 		// SB4_GS_SHADER
1282 		[0xb][0] = { MESA_SHADER_GEOMETRY,  SHADER_PROG },
1283 		[0xb][1] = { MESA_SHADER_GEOMETRY,  SHADER_CONST },
1284 		[0xb][2] = { MESA_SHADER_GEOMETRY,  UBO },
1285 		// SB4_FS_SHADER:
1286 		[0xc][0] = { MESA_SHADER_FRAGMENT,  SHADER_PROG },
1287 		[0xc][1] = { MESA_SHADER_FRAGMENT,  SHADER_CONST },
1288 		[0xc][2] = { MESA_SHADER_FRAGMENT,  UBO },
1289 		// SB4_CS_SHADER:
1290 		[0xd][0] = { MESA_SHADER_COMPUTE,   SHADER_PROG },
1291 		[0xd][1] = { MESA_SHADER_COMPUTE,   SHADER_CONST },
1292 		[0xd][2] = { MESA_SHADER_COMPUTE,   UBO },
1293 		[0xd][3] = { MESA_SHADER_COMPUTE,   SSBO_0 },      /* a6xx location */
1294 		// SB4_SSBO (shared across all stages)
1295 		[0xe][0] = { 0, SSBO_0 },                     /* a5xx (and a4xx?) location */
1296 		[0xe][1] = { 0, SSBO_1 },
1297 		[0xe][2] = { 0, SSBO_2 },
1298 		// SB4_CS_SSBO
1299 		[0xf][0] = { MESA_SHADER_COMPUTE, SSBO_0 },
1300 		[0xf][1] = { MESA_SHADER_COMPUTE, SSBO_1 },
1301 		[0xf][2] = { MESA_SHADER_COMPUTE, SSBO_2 },
1302 		// unknown things
1303 		/* This looks like combined UBO state for 3d stages (a5xx and
1304 		 * before??  I think a6xx has UBO state per shader stage:
1305 		 */
1306 		[0x6][2] = { 0, UBO },
1307 		[0x7][1] = { 0, UNKNOWN_2DWORDS },
1308 	};
1309 
1310 	*stage = lookup[state_block_id][state_type].stage;
1311 	*state = lookup[state_block_id][state_type].state;
1312 }
1313 
1314 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1315 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1316 		    enum state_src_t *src)
1317 {
1318 	unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1319 	unsigned state_type = dwords[1] & 0x3;
1320 	_get_state_type(state_block_id, state_type, stage, state);
1321 	*src = _get_state_src(dwords[0]);
1322 }
1323 
1324 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1325 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1326 		    enum state_src_t *src)
1327 {
1328 	unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1329 	unsigned state_type = (dwords[0] >> 14) & 0x3;
1330 	_get_state_type(state_block_id, state_type, stage, state);
1331 	*src = _get_state_src(dwords[0]);
1332 }
1333 
1334 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1335 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1336 {
1337 	for (int i = 0; i < num_unit; i++) {
1338 		/* work-around to reduce noise for opencl blob which always
1339 		 * writes the max # regardless of # of textures used
1340 		 */
1341 		if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1342 			break;
1343 
1344 		if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1345 			dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP");
1346 			dump_hex(texsamp, 2, level+1);
1347 			texsamp += 2;
1348 		} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1349 			dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP");
1350 			dump_hex(texsamp, 2, level+1);
1351 			texsamp += 2;
1352 		} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1353 			dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP");
1354 			dump_hex(texsamp, 4, level+1);
1355 			texsamp += 4;
1356 		} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1357 			dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP");
1358 			dump_hex(texsamp, 4, level+1);
1359 			texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1360 		}
1361 	}
1362 }
1363 
1364 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1365 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1366 {
1367 	for (int i = 0; i < num_unit; i++) {
1368 		/* work-around to reduce noise for opencl blob which always
1369 		 * writes the max # regardless of # of textures used
1370 		 */
1371 		if ((num_unit == 16) &&
1372 			(texconst[0] == 0) && (texconst[1] == 0) &&
1373 			(texconst[2] == 0) && (texconst[3] == 0))
1374 			break;
1375 
1376 		if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1377 			dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST");
1378 			dump_hex(texconst, 4, level+1);
1379 			texconst += 4;
1380 		} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1381 			dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST");
1382 			if (options->dump_textures) {
1383 				uint32_t addr = texconst[4] & ~0x1f;
1384 				dump_gpuaddr(addr, level-2);
1385 			}
1386 			dump_hex(texconst, 8, level+1);
1387 			texconst += 8;
1388 		} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1389 			dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST");
1390 			if (options->dump_textures) {
1391 				uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1392 				dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1393 			}
1394 			dump_hex(texconst, 12, level+1);
1395 			texconst += 12;
1396 		} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1397 			dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST");
1398 			if (options->dump_textures) {
1399 				uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1400 				dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1401 			}
1402 			dump_hex(texconst, 16, level+1);
1403 			texconst += 16;
1404 		}
1405 	}
1406 }
1407 
1408 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1409 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1410 {
1411 	gl_shader_stage stage;
1412 	enum state_t state;
1413 	enum state_src_t src;
1414 	uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1415 	uint64_t ext_src_addr;
1416 	void *contents;
1417 	int i;
1418 
1419 	if (quiet(2) && !options->script)
1420 		return;
1421 
1422 	if (options->gpu_id >= 600)
1423 		a6xx_get_state_type(dwords, &stage, &state, &src);
1424 	else if (options->gpu_id >= 400)
1425 		a4xx_get_state_type(dwords, &stage, &state, &src);
1426 	else
1427 		a3xx_get_state_type(dwords, &stage, &state, &src);
1428 
1429 	switch (src) {
1430 	case STATE_SRC_DIRECT: ext_src_addr = 0; break;
1431 	case STATE_SRC_INDIRECT:
1432 		if (is_64b()) {
1433 			ext_src_addr = dwords[1] & 0xfffffffc;
1434 			ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1435 		} else {
1436 			ext_src_addr = dwords[1] & 0xfffffffc;
1437 		}
1438 
1439 		break;
1440 	case STATE_SRC_BINDLESS: {
1441 		const unsigned base_reg =
1442 			stage == MESA_SHADER_COMPUTE ?
1443 				regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR") :
1444 				regbase("HLSQ_BINDLESS_BASE[0].ADDR");
1445 
1446 		if (is_64b()) {
1447 			const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1448 			ext_src_addr = reg_val(reg) & 0xfffffffc;
1449 			ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1450 		} else {
1451 			const unsigned reg = base_reg + (dwords[1] >> 28);
1452 			ext_src_addr = reg_val(reg) & 0xfffffffc;
1453 		}
1454 
1455 		ext_src_addr += 4 * (dwords[1] & 0xffffff);
1456 		break;
1457 	}
1458 	}
1459 
1460 	if (ext_src_addr)
1461 		contents = hostptr(ext_src_addr);
1462 	else
1463 		contents = is_64b() ? dwords + 3 : dwords + 2;
1464 
1465 	if (!contents)
1466 		return;
1467 
1468 	switch (state) {
1469 	case SHADER_PROG: {
1470 		const char *ext = NULL;
1471 
1472 		if (quiet(2))
1473 			return;
1474 
1475 		if (options->gpu_id >= 400)
1476 			num_unit *= 16;
1477 		else if (options->gpu_id >= 300)
1478 			num_unit *= 4;
1479 
1480 		/* shaders:
1481 		 *
1482 		 * note: num_unit seems to be # of instruction groups, where
1483 		 * an instruction group has 4 64bit instructions.
1484 		 */
1485 		if (stage == MESA_SHADER_VERTEX) {
1486 			ext = "vo3";
1487 		} else if (stage == MESA_SHADER_GEOMETRY) {
1488 			ext = "go3";
1489 		} else if (stage == MESA_SHADER_COMPUTE) {
1490 			ext = "co3";
1491 		} else if (stage == MESA_SHADER_FRAGMENT){
1492 			ext = "fo3";
1493 		}
1494 
1495 		if (contents)
1496 			try_disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id);
1497 
1498 		/* dump raw shader: */
1499 		if (ext)
1500 			dump_shader(ext, contents, num_unit * 2 * 4);
1501 
1502 		break;
1503 	}
1504 	case SHADER_CONST: {
1505 		if (quiet(2))
1506 			return;
1507 
1508 		/* uniforms/consts:
1509 		 *
1510 		 * note: num_unit seems to be # of pairs of dwords??
1511 		 */
1512 
1513 		if (options->gpu_id >= 400)
1514 			num_unit *= 2;
1515 
1516 		dump_float(contents, num_unit*2, level+1);
1517 		dump_hex(contents, num_unit*2, level+1);
1518 
1519 		break;
1520 	}
1521 	case TEX_MIPADDR: {
1522 		uint32_t *addrs = contents;
1523 
1524 		if (quiet(2))
1525 			return;
1526 
1527 		/* mipmap consts block just appears to be array of num_unit gpu addr's: */
1528 		for (i = 0; i < num_unit; i++) {
1529 			void *ptr = hostptr(addrs[i]);
1530 			printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]);
1531 			if (options->dump_textures) {
1532 				printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1533 				dump_hex(ptr, hostlen(addrs[i])/4, level+1);
1534 			}
1535 		}
1536 		break;
1537 	}
1538 	case TEX_SAMP: {
1539 		dump_tex_samp(contents, src, num_unit, level);
1540 		break;
1541 	}
1542 	case TEX_CONST: {
1543 		dump_tex_const(contents, num_unit, level);
1544 		break;
1545 	}
1546 	case SSBO_0: {
1547 		uint32_t *ssboconst = (uint32_t *)contents;
1548 
1549 		for (i = 0; i < num_unit; i++) {
1550 			int sz = 4;
1551 			if (400 <= options->gpu_id && options->gpu_id < 500) {
1552 				dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0");
1553 			} else if (500 <= options->gpu_id && options->gpu_id < 600) {
1554 				dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0");
1555 			} else if (600 <= options->gpu_id && options->gpu_id < 700) {
1556 				sz = 16;
1557 				dump_domain(ssboconst, 16, level+2, "A6XX_IBO");
1558 			}
1559 			dump_hex(ssboconst, sz, level+1);
1560 			ssboconst += sz;
1561 		}
1562 		break;
1563 	}
1564 	case SSBO_1: {
1565 		uint32_t *ssboconst = (uint32_t *)contents;
1566 
1567 		for (i = 0; i < num_unit; i++) {
1568 			if (400 <= options->gpu_id && options->gpu_id < 500)
1569 				dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1");
1570 			else if (500 <= options->gpu_id && options->gpu_id < 600)
1571 				dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1");
1572 			dump_hex(ssboconst, 2, level+1);
1573 			ssboconst += 2;
1574 		}
1575 		break;
1576 	}
1577 	case SSBO_2: {
1578 		uint32_t *ssboconst = (uint32_t *)contents;
1579 
1580 		for (i = 0; i < num_unit; i++) {
1581 			/* TODO a4xx and a5xx might be same: */
1582 			if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1583 				dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2");
1584 				dump_hex(ssboconst, 2, level+1);
1585 			}
1586 			if (options->dump_textures) {
1587 				uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1588 				dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1589 			}
1590 			ssboconst += 2;
1591 		}
1592 		break;
1593 	}
1594 	case UBO: {
1595 		uint32_t *uboconst = (uint32_t *)contents;
1596 
1597 		for (i = 0; i < num_unit; i++) {
1598 			// TODO probably similar on a4xx..
1599 			if (500 <= options->gpu_id && options->gpu_id < 600)
1600 				dump_domain(uboconst, 2, level+2, "A5XX_UBO");
1601 			else if (600 <= options->gpu_id && options->gpu_id < 700)
1602 				dump_domain(uboconst, 2, level+2, "A6XX_UBO");
1603 			dump_hex(uboconst, 2, level+1);
1604 			uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1605 		}
1606 		break;
1607 	}
1608 	case UNKNOWN_DWORDS: {
1609 		if (quiet(2))
1610 			return;
1611 		dump_hex(contents, num_unit, level+1);
1612 		break;
1613 	}
1614 	case UNKNOWN_2DWORDS: {
1615 		if (quiet(2))
1616 			return;
1617 		dump_hex(contents, num_unit * 2, level+1);
1618 		break;
1619 	}
1620 	case UNKNOWN_4DWORDS: {
1621 		if (quiet(2))
1622 			return;
1623 		dump_hex(contents, num_unit * 4, level+1);
1624 		break;
1625 	}
1626 	default:
1627 		if (quiet(2))
1628 			return;
1629 		/* hmm.. */
1630 		dump_hex(contents, num_unit, level+1);
1631 		break;
1632 	}
1633 }
1634 
1635 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1636 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1637 {
1638 	bin_x1 = dwords[1] & 0xffff;
1639 	bin_y1 = dwords[1] >> 16;
1640 	bin_x2 = dwords[2] & 0xffff;
1641 	bin_y2 = dwords[2] >> 16;
1642 }
1643 
1644 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1645 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1646 {
1647 	uint32_t w, h, p;
1648 	uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1649 	uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1650 	static const char *filter[] = {
1651 			"point", "bilinear", "bicubic",
1652 	};
1653 	static const char *clamp[] = {
1654 			"wrap", "mirror", "clamp-last-texel",
1655 	};
1656 	static const char swiznames[] = "xyzw01??";
1657 
1658 	/* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1659 
1660 	/* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1661 	 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1662 	 */
1663 	p = (dwords[0] >> 22) << 5;
1664 	clamp_x = (dwords[0] >> 10) & 0x3;
1665 	clamp_y = (dwords[0] >> 13) & 0x3;
1666 	clamp_z = (dwords[0] >> 16) & 0x3;
1667 
1668 	/* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1669 	 * NearestClamp=1:OGL Mode
1670 	 */
1671 	parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1672 
1673 	/* Width, Height, EndianSwap=0:None */
1674 	w = (dwords[2] & 0x1fff) + 1;
1675 	h = ((dwords[2] >> 13) & 0x1fff) + 1;
1676 
1677 	/* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1678 	 * Mip=2:BaseMap
1679 	 */
1680 	mag = (dwords[3] >> 19) & 0x3;
1681 	min = (dwords[3] >> 21) & 0x3;
1682 	swiz = (dwords[3] >> 1) & 0xfff;
1683 
1684 	/* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1685 	 * Dim3d=0
1686 	 */
1687 	// XXX
1688 
1689 	/* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1690 	 * Dim=1:2d, MipPacking=0
1691 	 */
1692 	parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1693 
1694 	printf("%sset texture const %04x\n", levels[level], val);
1695 	printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1],
1696 			clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]);
1697 	printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]);
1698 	printf("%sswizzle: %c%c%c%c\n", levels[level+1],
1699 			swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1700 			swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1701 	printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1702 			levels[level+1], gpuaddr, flags, w, h, p,
1703 			rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1704 	printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1],
1705 			mip_gpuaddr, mip_flags);
1706 }
1707 
1708 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1709 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1710 {
1711 	int i;
1712 	printf("%sset shader const %04x\n", levels[level], val);
1713 	for (i = 0; i < sizedwords; ) {
1714 		uint32_t gpuaddr, flags;
1715 		parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1716 		void *addr = hostptr(gpuaddr);
1717 		if (addr) {
1718 			const char * fmt =
1719 				rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1720 			uint32_t size = dwords[i++];
1721 			printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1],
1722 					gpuaddr, size, fmt);
1723 			// TODO maybe dump these as bytes instead of dwords?
1724 			size = (size + 3) / 4; // for now convert to dwords
1725 			dump_hex(addr, min(size, 64), level + 1);
1726 			if (size > min(size, 64))
1727 				printf("%s\t\t...\n", levels[level+1]);
1728 			dump_float(addr, min(size, 64), level + 1);
1729 			if (size > min(size, 64))
1730 				printf("%s\t\t...\n", levels[level+1]);
1731 		}
1732 	}
1733 }
1734 
1735 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1736 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1737 {
1738 	uint32_t val = dwords[0] & 0xffff;
1739 	switch((dwords[0] >> 16) & 0xf) {
1740 	case 0x0:
1741 		dump_float((float *)(dwords+1), sizedwords-1, level+1);
1742 		break;
1743 	case 0x1:
1744 		/* need to figure out how const space is partitioned between
1745 		 * attributes, textures, etc..
1746 		 */
1747 		if (val < 0x78) {
1748 			dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level);
1749 		} else {
1750 			dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level);
1751 		}
1752 		break;
1753 	case 0x2:
1754 		printf("%sset bool const %04x\n", levels[level], val);
1755 		break;
1756 	case 0x3:
1757 		printf("%sset loop const %04x\n", levels[level], val);
1758 		break;
1759 	case 0x4:
1760 		val += 0x2000;
1761 		if (dwords[0] & 0x80000000) {
1762 			uint32_t srcreg = dwords[1];
1763 			uint32_t dstval = dwords[2];
1764 
1765 			/* TODO: not sure what happens w/ payload != 2.. */
1766 			assert(sizedwords == 3);
1767 			assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1768 
1769 			/* note: rnn_regname uses a static buf so we can't do
1770 			 * two regname() calls for one printf..
1771 			 */
1772 			printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1773 			printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1774 
1775 			dstval += type0_reg_vals[srcreg];
1776 
1777 			dump_registers(val, &dstval, 1, level+1);
1778 		} else {
1779 			dump_registers(val, dwords+1, sizedwords-1, level+1);
1780 		}
1781 		break;
1782 	}
1783 }
1784 
1785 static void dump_register_summary(int level);
1786 
1787 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1788 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1789 {
1790 	const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1791 	printl(2, "%sevent %s\n", levels[level], name);
1792 
1793 	if (name && (options->gpu_id > 500)) {
1794 		char eventname[64];
1795 		snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1796 		if (!strcmp(name, "BLIT")) {
1797 			do_query(eventname, 0);
1798 			print_mode(level);
1799 			dump_register_summary(level);
1800 		}
1801 	}
1802 }
1803 
1804 static void
dump_register_summary(int level)1805 dump_register_summary(int level)
1806 {
1807 	uint32_t i;
1808 	bool saved_summary = summary;
1809 	summary = false;
1810 
1811 	in_summary = true;
1812 
1813 	/* dump current state of registers: */
1814 	printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1815 	for (i = 0; i < regcnt(); i++) {
1816 		uint32_t regbase = i;
1817 		uint32_t lastval = reg_val(regbase);
1818 		/* skip registers that haven't been updated since last draw/blit: */
1819 		if (!(options->allregs || reg_rewritten(regbase)))
1820 			continue;
1821 		if (!reg_written(regbase))
1822 			continue;
1823 		if (lastval != lastvals[regbase]) {
1824 			printl(2, "!");
1825 			lastvals[regbase] = lastval;
1826 		} else {
1827 			printl(2, " ");
1828 		}
1829 		if (reg_rewritten(regbase)) {
1830 			printl(2, "+");
1831 		} else {
1832 			printl(2, " ");
1833 		}
1834 		printl(2, "\t%08x", lastval);
1835 		if (!quiet(2)) {
1836 			dump_register(regbase, lastval, level);
1837 		}
1838 	}
1839 
1840 	clear_rewritten();
1841 
1842 	in_summary = false;
1843 
1844 	draw_count++;
1845 	summary = saved_summary;
1846 }
1847 
1848 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1849 draw_indx_common(uint32_t *dwords, int level)
1850 {
1851 	uint32_t prim_type     = dwords[1] & 0x1f;
1852 	uint32_t source_select = (dwords[1] >> 6) & 0x3;
1853 	uint32_t num_indices   = dwords[2];
1854 	const char *primtype;
1855 
1856 	primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1857 
1858 	do_query(primtype, num_indices);
1859 
1860 	printl(2, "%sdraw:          %d\n", levels[level], draws[ib]);
1861 	printl(2, "%sprim_type:     %s (%d)\n", levels[level], primtype,
1862 			prim_type);
1863 	printl(2, "%ssource_select: %s (%d)\n", levels[level],
1864 			rnn_enumname(rnn, "pc_di_src_sel", source_select),
1865 			source_select);
1866 	printl(2, "%snum_indices:   %d\n", levels[level], num_indices);
1867 
1868 	vertices += num_indices;
1869 
1870 	draws[ib]++;
1871 
1872 	return num_indices;
1873 }
1874 
1875 enum pc_di_index_size {
1876 	INDEX_SIZE_IGN = 0,
1877 	INDEX_SIZE_16_BIT = 0,
1878 	INDEX_SIZE_32_BIT = 1,
1879 	INDEX_SIZE_8_BIT = 2,
1880 	INDEX_SIZE_INVALID = 0,
1881 };
1882 
1883 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)1884 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1885 {
1886 	uint32_t num_indices = draw_indx_common(dwords, level);
1887 
1888 	assert(!is_64b());
1889 
1890 	/* if we have an index buffer, dump that: */
1891 	if (sizedwords == 5) {
1892 		void *ptr = hostptr(dwords[3]);
1893 		printl(2, "%sgpuaddr:       %08x\n", levels[level], dwords[3]);
1894 		printl(2, "%sidx_size:      %d\n", levels[level], dwords[4]);
1895 		if (ptr) {
1896 			enum pc_di_index_size size =
1897 					((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1898 			if (!quiet(2)) {
1899 				int i;
1900 				printf("%sidxs:         ", levels[level]);
1901 				if (size == INDEX_SIZE_8_BIT) {
1902 					uint8_t *idx = ptr;
1903 					for (i = 0; i < dwords[4]; i++)
1904 						printf(" %u", idx[i]);
1905 				} else if (size == INDEX_SIZE_16_BIT) {
1906 					uint16_t *idx = ptr;
1907 					for (i = 0; i < dwords[4]/2; i++)
1908 						printf(" %u", idx[i]);
1909 				} else if (size == INDEX_SIZE_32_BIT) {
1910 					uint32_t *idx = ptr;
1911 					for (i = 0; i < dwords[4]/4; i++)
1912 						printf(" %u", idx[i]);
1913 				}
1914 				printf("\n");
1915 				dump_hex(ptr, dwords[4]/4, level+1);
1916 			}
1917 		}
1918 	}
1919 
1920 	/* don't bother dumping registers for the dummy draw_indx's.. */
1921 	if (num_indices > 0)
1922 		dump_register_summary(level);
1923 
1924 	needs_wfi = true;
1925 }
1926 
1927 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)1928 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1929 {
1930 	uint32_t num_indices = draw_indx_common(dwords, level);
1931 	enum pc_di_index_size size =
1932 			((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1933 	void *ptr = &dwords[3];
1934 	int sz = 0;
1935 
1936 	assert(!is_64b());
1937 
1938 	/* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1939 	if (!quiet(2)) {
1940 		int i;
1941 		printf("%sidxs:         ", levels[level]);
1942 		if (size == INDEX_SIZE_8_BIT) {
1943 			uint8_t *idx = ptr;
1944 			for (i = 0; i < num_indices; i++)
1945 				printf(" %u", idx[i]);
1946 			sz = num_indices;
1947 		} else if (size == INDEX_SIZE_16_BIT) {
1948 			uint16_t *idx = ptr;
1949 			for (i = 0; i < num_indices; i++)
1950 				printf(" %u", idx[i]);
1951 			sz = num_indices * 2;
1952 		} else if (size == INDEX_SIZE_32_BIT) {
1953 			uint32_t *idx = ptr;
1954 			for (i = 0; i < num_indices; i++)
1955 				printf(" %u", idx[i]);
1956 			sz = num_indices * 4;
1957 		}
1958 		printf("\n");
1959 		dump_hex(ptr, sz / 4, level+1);
1960 	}
1961 
1962 	/* don't bother dumping registers for the dummy draw_indx's.. */
1963 	if (num_indices > 0)
1964 		dump_register_summary(level);
1965 }
1966 
1967 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)1968 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1969 {
1970 	uint32_t num_indices = dwords[2];
1971 	uint32_t prim_type = dwords[0] & 0x1f;
1972 
1973 	do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1974 	print_mode(level);
1975 
1976 	/* don't bother dumping registers for the dummy draw_indx's.. */
1977 	if (num_indices > 0)
1978 		dump_register_summary(level);
1979 }
1980 
1981 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)1982 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
1983 {
1984 	uint32_t prim_type = dwords[0] & 0x1f;
1985 	uint64_t addr;
1986 
1987 	do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
1988 	print_mode(level);
1989 
1990 	if (is_64b())
1991 		addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
1992 	else
1993 		addr = dwords[1];
1994 	dump_gpuaddr_size(addr, level, 0x10, 2);
1995 
1996 	if (is_64b())
1997 		addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
1998 	else
1999 		addr = dwords[3];
2000 	dump_gpuaddr_size(addr, level, 0x10, 2);
2001 
2002 	dump_register_summary(level);
2003 }
2004 
2005 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2006 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2007 {
2008 	uint32_t prim_type = dwords[0] & 0x1f;
2009 	uint64_t addr;
2010 
2011 	do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2012 	print_mode(level);
2013 
2014 	addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2015 	dump_gpuaddr_size(addr, level, 0x10, 2);
2016 
2017 	dump_register_summary(level);
2018 }
2019 
2020 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2021 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2022 {
2023 	uint32_t prim_type = dwords[0] & 0x1f;
2024 	uint32_t count = dwords[2];
2025 
2026 	do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2027 	print_mode(level);
2028 
2029 	struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2030 	uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2031 	uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2032 	uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2033 
2034 	if (count_dword) {
2035 		uint64_t count_addr = ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2036 		uint32_t *buf = hostptr(count_addr);
2037 
2038 		/* Don't print more draws than this if we don't know the indirect
2039 		 * count. It's possible the user will give ~0 or some other large
2040 		 * value, expecting the GPU to fill in the draw count, and we don't
2041 		 * want to print a gazillion draws in that case:
2042 		 */
2043 		const uint32_t max_draw_count = 0x100;
2044 
2045 		/* Assume the indirect count is garbage if it's larger than this
2046 		 * (quite large) value or 0. Hopefully this catches most cases.
2047 		 */
2048 		const uint32_t max_indirect_draw_count = 0x10000;
2049 
2050 		if (buf) {
2051 			printf("%sindirect count: %u\n", levels[level], *buf);
2052 			if (*buf == 0 || *buf > max_indirect_draw_count) {
2053 				/* garbage value */
2054 				count = min(count, max_draw_count);
2055 			} else {
2056 				/* not garbage */
2057 				count = min(count, *buf);
2058 			}
2059 		} else {
2060 			count = min(count, max_draw_count);
2061 		}
2062 	}
2063 
2064 	if (addr_dword && stride_dword) {
2065 		uint64_t addr = ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2066 		uint32_t stride = dwords[stride_dword];
2067 
2068 		for (unsigned i = 0; i < count; i++, addr += stride) {
2069 			printf("%sdraw %d:\n", levels[level], i);
2070 			dump_gpuaddr_size(addr, level, 0x10, 2);
2071 		}
2072 	}
2073 
2074 	dump_register_summary(level);
2075 }
2076 
2077 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2078 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2079 {
2080 	do_query("COMPUTE", 1);
2081 	dump_register_summary(level);
2082 }
2083 
2084 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2085 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2086 {
2087 	const char *buf = (void *)dwords;
2088 	int i;
2089 
2090 	if (quiet(3))
2091 		return;
2092 
2093 	// blob doesn't use CP_NOP for string_marker but it does
2094 	// use it for things that end up looking like, but aren't
2095 	// ascii chars:
2096 	if (!options->decode_markers)
2097 		return;
2098 
2099 	for (i = 0; i < 4 * sizedwords; i++) {
2100 		if (buf[i] == '\0')
2101 			break;
2102 		if (isascii(buf[i]))
2103 			printf("%c", buf[i]);
2104 	}
2105 	printf("\n");
2106 }
2107 
2108 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2109 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2110 {
2111 	/* traverse indirect buffers */
2112 	uint64_t ibaddr;
2113 	uint32_t ibsize;
2114 	uint32_t *ptr = NULL;
2115 
2116 	if (is_64b()) {
2117 		/* a5xx+.. high 32b of gpu addr, then size: */
2118 		ibaddr = dwords[0];
2119 		ibaddr |= ((uint64_t)dwords[1]) << 32;
2120 		ibsize = dwords[2];
2121 	} else {
2122 		ibaddr = dwords[0];
2123 		ibsize = dwords[1];
2124 	}
2125 
2126 	if (!quiet(3)) {
2127 		if (is_64b()) {
2128 			printf("%sibaddr:%016"PRIx64"\n", levels[level], ibaddr);
2129 		} else {
2130 			printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2131 		}
2132 		printf("%sibsize:%08x\n", levels[level], ibsize);
2133 	}
2134 
2135 	if (options->once && has_dumped(ibaddr, enable_mask))
2136 		return;
2137 
2138 	/* 'query-compare' mode implies 'once' mode, although we need only to
2139 	 * process the cmdstream for *any* enable_mask mode, since we are
2140 	 * comparing binning vs draw reg values at the same time, ie. it is
2141 	 * not useful to process the same draw in both binning and draw pass.
2142 	 */
2143 	if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2144 		return;
2145 
2146 	/* map gpuaddr back to hostptr: */
2147 	ptr = hostptr(ibaddr);
2148 
2149 	if (ptr) {
2150 		/* If the GPU hung within the target IB, the trigger point will be
2151 		 * just after the current CP_INDIRECT_BUFFER.  Because the IB is
2152 		 * executed but never returns.  Account for this by checking if
2153 		 * the IB returned:
2154 		 */
2155 		highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2156 
2157 		ib++;
2158 		ibs[ib].base = ibaddr;
2159 		ibs[ib].size = ibsize;
2160 
2161 		dump_commands(ptr, ibsize, level);
2162 		ib--;
2163 	} else {
2164 		fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize);
2165 	}
2166 }
2167 
2168 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2169 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2170 {
2171 	needs_wfi = false;
2172 }
2173 
2174 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2175 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2176 {
2177 	if (quiet(2))
2178 		return;
2179 
2180 	if (is_64b()) {
2181 		uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2182 		printf("%sgpuaddr:%016"PRIx64"\n", levels[level], gpuaddr);
2183 		dump_hex(&dwords[2], sizedwords-2, level+1);
2184 
2185 		if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2186 			dump_commands(&dwords[2], sizedwords-2, level+1);
2187 	} else {
2188 		uint32_t gpuaddr = dwords[0];
2189 		printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2190 		dump_float((float *)&dwords[1], sizedwords-1, level+1);
2191 	}
2192 }
2193 
2194 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2195 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2196 {
2197 	uint32_t val = dwords[0] & 0xffff;
2198 	uint32_t and = dwords[1];
2199 	uint32_t or  = dwords[2];
2200 	printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or);
2201 	if (needs_wfi)
2202 		printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or);
2203 	reg_set(val, (reg_val(val) & and) | or);
2204 }
2205 
2206 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2207 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2208 {
2209 	uint32_t val = dwords[0] & 0xffff;
2210 	printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2211 
2212 	if (quiet(2))
2213 		return;
2214 
2215 	uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2216 	printf("%sgpuaddr:%016"PRIx64"\n", levels[level], gpuaddr);
2217 	void *ptr = hostptr(gpuaddr);
2218 	if (ptr) {
2219 		uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2220 		dump_hex(ptr, cnt, level + 1);
2221 	}
2222 }
2223 
2224 struct draw_state {
2225 	uint16_t enable_mask;
2226 	uint16_t flags;
2227 	uint32_t count;
2228 	uint64_t addr;
2229 };
2230 
2231 struct draw_state state[32];
2232 
2233 #define FLAG_DIRTY              0x1
2234 #define FLAG_DISABLE            0x2
2235 #define FLAG_DISABLE_ALL_GROUPS 0x4
2236 #define FLAG_LOAD_IMMED         0x8
2237 
2238 static int draw_mode;
2239 
2240 static void
disable_group(unsigned group_id)2241 disable_group(unsigned group_id)
2242 {
2243 	struct draw_state *ds = &state[group_id];
2244 	memset(ds, 0, sizeof(*ds));
2245 }
2246 
2247 static void
disable_all_groups(void)2248 disable_all_groups(void)
2249 {
2250 	for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2251 		disable_group(i);
2252 }
2253 
2254 static void
load_group(unsigned group_id,int level)2255 load_group(unsigned group_id, int level)
2256 {
2257 	struct draw_state *ds = &state[group_id];
2258 
2259 	if (!ds->count)
2260 		return;
2261 
2262 	printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2263 	printl(2, "%scount: %d\n", levels[level], ds->count);
2264 	printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2265 	printl(2, "%sflags: %x\n", levels[level], ds->flags);
2266 
2267 	if (options->gpu_id >= 600) {
2268 		printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2269 
2270 		if (!(ds->enable_mask & enable_mask)) {
2271 			printl(2, "%s\tskipped!\n\n", levels[level]);
2272 			return;
2273 		}
2274 	}
2275 
2276 	void *ptr = hostptr(ds->addr);
2277 	if (ptr) {
2278 		if (!quiet(2))
2279 			dump_hex(ptr, ds->count, level+1);
2280 
2281 		ib++;
2282 		dump_commands(ptr, ds->count, level+1);
2283 		ib--;
2284 	}
2285 }
2286 
2287 static void
load_all_groups(int level)2288 load_all_groups(int level)
2289 {
2290 	/* sanity check, we should never recursively hit recursion here, and if
2291 	 * we do bad things happen:
2292 	 */
2293 	static bool loading_groups = false;
2294 	if (loading_groups) {
2295 		printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
2296 		return;
2297 	}
2298 	loading_groups = true;
2299 	for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2300 		load_group(i, level);
2301 	loading_groups = false;
2302 
2303 	/* in 'query-compare' mode, defer disabling all groups until we have a
2304 	 * chance to process the query:
2305 	 */
2306 	if (!options->query_compare)
2307 		disable_all_groups();
2308 }
2309 
2310 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2311 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2312 {
2313 	uint32_t i;
2314 
2315 	for (i = 0; i < sizedwords; ) {
2316 		struct draw_state *ds;
2317 		uint32_t count = dwords[i] & 0xffff;
2318 		uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2319 		uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2320 		uint32_t flags = (dwords[i] >> 16) & 0xf;
2321 		uint64_t addr;
2322 
2323 		if (is_64b()) {
2324 			addr = dwords[i + 1];
2325 			addr |= ((uint64_t)dwords[i + 2]) << 32;
2326 			i += 3;
2327 		} else {
2328 			addr = dwords[i + 1];
2329 			i += 2;
2330 		}
2331 
2332 		if (flags & FLAG_DISABLE_ALL_GROUPS) {
2333 			disable_all_groups();
2334 			continue;
2335 		}
2336 
2337 		if (flags & FLAG_DISABLE) {
2338 			disable_group(group_id);
2339 			continue;
2340 		}
2341 
2342 		assert(group_id < ARRAY_SIZE(state));
2343 		disable_group(group_id);
2344 
2345 		ds = &state[group_id];
2346 
2347 		ds->enable_mask = enable_mask;
2348 		ds->flags = flags;
2349 		ds->count = count;
2350 		ds->addr  = addr;
2351 
2352 		if (flags & FLAG_LOAD_IMMED) {
2353 			load_group(group_id, level);
2354 			disable_group(group_id);
2355 		}
2356 	}
2357 }
2358 
2359 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2360 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2361 {
2362 	draw_mode = dwords[0];
2363 }
2364 
2365 /* execute compute shader */
2366 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2367 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2368 {
2369 	do_query("compute", 0);
2370 	dump_register_summary(level);
2371 }
2372 
2373 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2374 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2375 {
2376 	uint64_t addr;
2377 
2378 	if (is_64b()) {
2379 		addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2380 	} else {
2381 		addr = dwords[1];
2382 	}
2383 
2384 	printl(3, "%saddr: %016llx\n", levels[level], addr);
2385 	dump_gpuaddr_size(addr, level, 0x10, 2);
2386 
2387 	do_query("compute", 0);
2388 	dump_register_summary(level);
2389 }
2390 
2391 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2392 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2393 {
2394 	render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
2395 
2396 	if (!strcmp(render_mode, "RM6_BINNING")) {
2397 		enable_mask = MODE_BINNING;
2398 	} else if (!strcmp(render_mode, "RM6_GMEM")) {
2399 		enable_mask = MODE_GMEM;
2400 	} else if (!strcmp(render_mode, "RM6_BYPASS")) {
2401 		enable_mask = MODE_BYPASS;
2402 	}
2403 }
2404 
2405 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2406 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2407 {
2408 	uint64_t addr;
2409 	uint32_t *ptr, len;
2410 
2411 	assert(is_64b());
2412 
2413 	/* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2414 	 * not sure if this can come in different sizes.
2415 	 *
2416 	 * First ptr doesn't seem to be cmdstream, second one does.
2417 	 *
2418 	 * Comment from downstream kernel:
2419 	 *
2420 	 * SRM -- set render mode (ex binning, direct render etc)
2421 	 * SRM is set by UMD usually at start of IB to tell CP the type of
2422 	 * preemption.
2423 	 * KMD needs to set SRM to NULL to indicate CP that rendering is
2424 	 * done by IB.
2425 	 * ------------------------------------------------------------------
2426 	 *
2427 	 * Seems to always be one of these two:
2428 	 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
2429 	 * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
2430 	 *
2431 	 */
2432 
2433 	assert(options->gpu_id >= 500);
2434 
2435 	render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2436 
2437 	if (sizedwords == 1)
2438 		return;
2439 
2440 	addr = dwords[1];
2441 	addr |= ((uint64_t)dwords[2]) << 32;
2442 
2443 	mode = dwords[3];
2444 
2445 	dump_gpuaddr(addr, level+1);
2446 
2447 	if (sizedwords == 5)
2448 		return;
2449 
2450 	assert(sizedwords == 8);
2451 
2452 	len = dwords[5];
2453 	addr = dwords[6];
2454 	addr |= ((uint64_t)dwords[7]) << 32;
2455 
2456 	printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2457 	printl(3, "%slen:  0x%x\n", levels[level], len);
2458 
2459 	ptr = hostptr(addr);
2460 
2461 	if (ptr) {
2462 		if (!quiet(2)) {
2463 			ib++;
2464 			dump_commands(ptr, len, level+1);
2465 			ib--;
2466 			dump_hex(ptr, len, level+1);
2467 		}
2468 	}
2469 }
2470 
2471 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2472 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2473 {
2474 	uint64_t addr;
2475 	uint32_t *ptr, len;
2476 
2477 	assert(is_64b());
2478 	assert(options->gpu_id >= 500);
2479 
2480 	assert(sizedwords == 8);
2481 
2482 	addr = dwords[5];
2483 	addr |= ((uint64_t)dwords[6]) << 32;
2484 	len = dwords[7];
2485 
2486 	printl(3, "%saddr: 0x%016"PRIx64"\n", levels[level], addr);
2487 	printl(3, "%slen:  0x%x\n", levels[level], len);
2488 
2489 	ptr = hostptr(addr);
2490 
2491 	if (ptr) {
2492 		if (!quiet(2)) {
2493 			ib++;
2494 			dump_commands(ptr, len, level+1);
2495 			ib--;
2496 			dump_hex(ptr, len, level+1);
2497 		}
2498 	}
2499 }
2500 
2501 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2502 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2503 {
2504 	do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2505 	print_mode(level);
2506 	dump_register_summary(level);
2507 }
2508 
2509 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2510 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2511 {
2512 	int i;
2513 
2514 	/* NOTE: seems to write same reg multiple times.. not sure if different parts of
2515 	 * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
2516 	 * are?)
2517 	 */
2518 	bool saved_summary = summary;
2519 	summary = false;
2520 
2521 	for (i = 0; i < sizedwords; i += 2) {
2522 		dump_register(dwords[i+0], dwords[i+1], level+1);
2523 		reg_set(dwords[i+0], dwords[i+1]);
2524 	}
2525 
2526 	summary = saved_summary;
2527 }
2528 
2529 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2530 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2531 {
2532 	uint32_t reg = dwords[1] & 0xffff;
2533 
2534 	dump_register(reg, dwords[2], level+1);
2535 	reg_set(reg, dwords[2]);
2536 }
2537 
2538 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2539 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2540 {
2541 	uint64_t addr;
2542 	uint32_t size = dwords[2] & 0xffff;
2543 	void *ptr;
2544 
2545 	addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2546 
2547 	printf("addr=%"PRIx64"\n", addr);
2548 	ptr = hostptr(addr);
2549 	if (ptr) {
2550 		dump_commands(ptr, size, level+1);
2551 	}
2552 }
2553 
2554 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2555 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2556 {
2557 	skip_ib2_enable_global = dwords[0];
2558 }
2559 
2560 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2561 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2562 {
2563 	skip_ib2_enable_local = dwords[0];
2564 }
2565 
2566 #define CP(x, fxn, ...)   { "CP_" #x, fxn, ##__VA_ARGS__ }
2567 static const struct type3_op {
2568 	const char *name;
2569 	void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2570 	struct {
2571 		bool load_all_groups;
2572 	} options;
2573 } type3_op[] = {
2574 		CP(NOP, cp_nop),
2575 		CP(INDIRECT_BUFFER, cp_indirect),
2576 		CP(INDIRECT_BUFFER_PFD, cp_indirect),
2577 		CP(WAIT_FOR_IDLE, cp_wfi),
2578 		CP(REG_RMW, cp_rmw),
2579 		CP(REG_TO_MEM, cp_reg_mem),
2580 		CP(MEM_TO_REG, cp_reg_mem),  /* same layout as CP_REG_TO_MEM */
2581 		CP(MEM_WRITE, cp_mem_write),
2582 		CP(EVENT_WRITE, cp_event_write),
2583 		CP(RUN_OPENCL, cp_run_cl),
2584 		CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}),
2585 		CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}),
2586 		CP(SET_CONSTANT, cp_set_const),
2587 		CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2588 		CP(WIDE_REG_WRITE, cp_wide_reg_write),
2589 
2590 		/* for a3xx */
2591 		CP(LOAD_STATE, cp_load_state),
2592 		CP(SET_BIN, cp_set_bin),
2593 
2594 		/* for a4xx */
2595 		CP(LOAD_STATE4, cp_load_state),
2596 		CP(SET_DRAW_STATE, cp_set_draw_state),
2597 		CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}),
2598 		CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}),
2599 		CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}),
2600 
2601 		/* for a5xx */
2602 		CP(SET_RENDER_MODE, cp_set_render_mode),
2603 		CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2604 		CP(BLIT, cp_blit),
2605 		CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2606 		CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}),
2607 		CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}),
2608 		CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups=true}),
2609 		CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2610 		CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2611 
2612 		/* for a6xx */
2613 		CP(LOAD_STATE6_GEOM, cp_load_state),
2614 		CP(LOAD_STATE6_FRAG, cp_load_state),
2615 		CP(LOAD_STATE6, cp_load_state),
2616 		CP(SET_MODE, cp_set_mode),
2617 		CP(SET_MARKER, cp_set_marker),
2618 		CP(REG_WRITE, cp_reg_write),
2619 
2620 		CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2621 };
2622 
2623 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2624 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2625 {
2626 }
2627 
2628 static const struct type3_op *
get_type3_op(unsigned opc)2629 get_type3_op(unsigned opc)
2630 {
2631 	static const struct type3_op dummy_op = {
2632 		.fxn = noop_fxn,
2633 	};
2634 	const char *name = pktname(opc);
2635 
2636 	if (!name)
2637 		return &dummy_op;
2638 
2639 	for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2640 		if (!strcmp(name, type3_op[i].name))
2641 			return &type3_op[i];
2642 
2643 	return &dummy_op;
2644 }
2645 
2646 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2647 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2648 {
2649 	int dwords_left = sizedwords;
2650 	uint32_t count = 0; /* dword count including packet header */
2651 	uint32_t val;
2652 
2653 //	assert(dwords);
2654 	if (!dwords) {
2655 		printf("NULL cmd buffer!\n");
2656 		return;
2657 	}
2658 
2659 	draws[ib] = 0;
2660 
2661 	while (dwords_left > 0) {
2662 
2663 		current_draw_count = draw_count;
2664 
2665 		/* hack, this looks like a -1 underflow, in some versions
2666 		 * when it tries to write zero registers via pkt0
2667 		 */
2668 //		if ((dwords[0] >> 16) == 0xffff)
2669 //			goto skip;
2670 
2671 		if (pkt_is_type0(dwords[0])) {
2672 			printl(3, "t0");
2673 			count = type0_pkt_size(dwords[0]) + 1;
2674 			val = type0_pkt_offset(dwords[0]);
2675 			assert(val < regcnt());
2676 			printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1),
2677 					(dwords[0] & 0x8000) ? " (same register)" : "", val);
2678 			dump_registers(val, dwords+1, count-1, level+2);
2679 			if (!quiet(3))
2680 				dump_hex(dwords, count, level+1);
2681 		} else if (pkt_is_type4(dwords[0])) {
2682 			/* basically the same(ish) as type0 prior to a5xx */
2683 			printl(3, "t4");
2684 			count = type4_pkt_size(dwords[0]) + 1;
2685 			val = type4_pkt_offset(dwords[0]);
2686 			assert(val < regcnt());
2687 			printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val);
2688 			dump_registers(val, dwords+1, count-1, level+2);
2689 			if (!quiet(3))
2690 				dump_hex(dwords, count, level+1);
2691 #if 0
2692 		} else if (pkt_is_type1(dwords[0])) {
2693 			printl(3, "t1");
2694 			count = 3;
2695 			val = dwords[0] & 0xfff;
2696 			printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2697 			dump_registers(val, dwords+1, 1, level+2);
2698 			val = (dwords[0] >> 12) & 0xfff;
2699 			printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2700 			dump_registers(val, dwords+2, 1, level+2);
2701 			if (!quiet(3))
2702 				dump_hex(dwords, count, level+1);
2703 		} else if (pkt_is_type2(dwords[0])) {
2704 			printl(3, "t2");
2705 			printf("%sNOP\n", levels[level+1]);
2706 			count = 1;
2707 			if (!quiet(3))
2708 				dump_hex(dwords, count, level+1);
2709 #endif
2710 		} else if (pkt_is_type3(dwords[0])) {
2711 			count = type3_pkt_size(dwords[0]) + 1;
2712 			val = cp_type3_opcode(dwords[0]);
2713 			const struct type3_op *op = get_type3_op(val);
2714 			if (op->options.load_all_groups)
2715 				load_all_groups(level+1);
2716 			printl(3, "t3");
2717 			const char *name = pktname(val);
2718 			if (!quiet(2)) {
2719 				printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2720 						rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2721 						val, count, (dwords[0] & 0x1) ? " (predicated)" : "");
2722 			}
2723 			if (name)
2724 				dump_domain(dwords+1, count-1, level+2, name);
2725 			op->fxn(dwords+1, count-1, level+1);
2726 			if (!quiet(2))
2727 				dump_hex(dwords, count, level+1);
2728 		} else if (pkt_is_type7(dwords[0])) {
2729 			count = type7_pkt_size(dwords[0]) + 1;
2730 			val = cp_type7_opcode(dwords[0]);
2731 			const struct type3_op *op = get_type3_op(val);
2732 			if (op->options.load_all_groups)
2733 				load_all_groups(level+1);
2734 			printl(3, "t7");
2735 			const char *name = pktname(val);
2736 			if (!quiet(2)) {
2737 				printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2738 						rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2739 						val, count);
2740 			}
2741 			if (name) {
2742 				/* special hack for two packets that decode the same way
2743 				 * on a6xx:
2744 				 */
2745 				if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2746 						!strcmp(name, "CP_LOAD_STATE6_GEOM"))
2747 					name = "CP_LOAD_STATE6";
2748 				dump_domain(dwords+1, count-1, level+2, name);
2749 			}
2750 			op->fxn(dwords+1, count-1, level+1);
2751 			if (!quiet(2))
2752 				dump_hex(dwords, count, level+1);
2753 		} else if (pkt_is_type2(dwords[0])) {
2754 			printl(3, "t2");
2755 			printl(3, "%snop\n", levels[level+1]);
2756 		} else {
2757 			/* for 5xx+ we can do a passable job of looking for start of next valid packet: */
2758 			if (options->gpu_id >= 500) {
2759 				while (dwords_left > 0) {
2760 					if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2761 						break;
2762 					printf("bad type! %08x\n", dwords[0]);
2763 					dwords++;
2764 					dwords_left--;
2765 				}
2766 			} else {
2767 				printf("bad type! %08x\n", dwords[0]);
2768 				return;
2769 			}
2770 		}
2771 
2772 		dwords += count;
2773 		dwords_left -= count;
2774 
2775 	}
2776 
2777 	if (dwords_left < 0)
2778 		printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2779 }
2780