1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <locale.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdint.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/mman.h>
39 #include <time.h>
40 #include <unistd.h>
41 #include <curses.h>
42 #include <libconfig.h>
43 #include <inttypes.h>
44 #include <xf86drm.h>
45 
46 #include "drm/freedreno_drmif.h"
47 #include "drm/freedreno_ringbuffer.h"
48 
49 #include "freedreno_perfcntr.h"
50 
51 #define MAX_CNTR_PER_GROUP 24
52 
53 /* NOTE first counter group should always be CP, since we unconditionally
54  * use CP counter to measure the gpu freq.
55  */
56 
57 struct counter_group {
58 	const struct fd_perfcntr_group *group;
59 
60 	struct {
61 		const struct fd_perfcntr_counter *counter;
62 		uint16_t select_val;
63 		volatile uint32_t *val_hi;
64 		volatile uint32_t *val_lo;
65 	} counter[MAX_CNTR_PER_GROUP];
66 
67 	/* last sample time: */
68 	uint32_t stime[MAX_CNTR_PER_GROUP];
69 	/* for now just care about the low 32b value.. at least then we don't
70 	 * have to really care that we can't sample both hi and lo regs at the
71 	 * same time:
72 	 */
73 	uint32_t last[MAX_CNTR_PER_GROUP];
74 	/* current value, ie. by how many did the counter increase in last
75 	 * sampling period divided by the sampling period:
76 	 */
77 	float current[MAX_CNTR_PER_GROUP];
78 	/* name of currently selected counters (for UI): */
79 	const char *label[MAX_CNTR_PER_GROUP];
80 };
81 
82 static struct {
83 	char *dtnode;
84 	int address_cells, size_cells;
85 	uint64_t base;
86 	uint32_t size;
87 	void *io;
88 	uint32_t chipid;
89 	uint32_t min_freq;
90 	uint32_t max_freq;
91 	/* per-generation table of counters: */
92 	unsigned ngroups;
93 	struct counter_group *groups;
94 	/* drm device (for writing select regs via ring): */
95 	struct fd_device *dev;
96 	struct fd_pipe *pipe;
97 	struct fd_submit *submit;
98 	struct fd_ringbuffer *ring;
99 } dev;
100 
101 static void config_save(void);
102 static void config_restore(void);
103 static void restore_counter_groups(void);
104 
105 /*
106  * helpers
107  */
108 
109 #define CHUNKSIZE 32
110 
111 static void *
readfile(const char * path,int * sz)112 readfile(const char *path, int *sz)
113 {
114 	char *buf = NULL;
115 	int fd, ret, n = 0;
116 
117 	fd = open(path, O_RDONLY);
118 	if (fd < 0) {
119 		*sz = 0;
120 		return NULL;
121 	}
122 
123 	while (1) {
124 		buf = realloc(buf, n + CHUNKSIZE);
125 		ret = read(fd, buf + n, CHUNKSIZE);
126 		if (ret < 0) {
127 			free(buf);
128 			*sz = 0;
129 			close(fd);
130 			return NULL;
131 		} else if (ret < CHUNKSIZE) {
132 			n += ret;
133 			*sz = n;
134 			close(fd);
135 			return buf;
136 		} else {
137 			n += CHUNKSIZE;
138 		}
139 	}
140 }
141 
142 static uint32_t
gettime_us(void)143 gettime_us(void)
144 {
145 	struct timespec ts;
146 	clock_gettime(CLOCK_MONOTONIC, &ts);
147 	return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
148 }
149 
150 static uint32_t
delta(uint32_t a,uint32_t b)151 delta(uint32_t a, uint32_t b)
152 {
153 	/* deal with rollover: */
154 	if (a > b)
155 		return 0xffffffff - a + b;
156 	else
157 		return b - a;
158 }
159 
160 /*
161  * code to find stuff in /proc/device-tree:
162  *
163  * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
164  * /dev/mem and /proc/device-tree crawling.  OTOH when the GPU is heavily loaded
165  * we would be competing with whatever else is using the GPU.
166  */
167 
168 static void *
readdt(const char * node)169 readdt(const char *node)
170 {
171 	char *path;
172 	void *buf;
173 	int sz;
174 
175 	(void) asprintf(&path, "%s/%s", dev.dtnode, node);
176 	buf = readfile(path, &sz);
177 	free(path);
178 
179 	return buf;
180 }
181 
182 static int
find_freqs_fn(const char * fpath,const struct stat * sb,int typeflag,struct FTW * ftwbuf)183 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
184 {
185 	const char *fname = fpath + ftwbuf->base;
186 	int sz;
187 
188 	if (strcmp(fname, "qcom,gpu-freq") == 0) {
189 		uint32_t *buf = readfile(fpath, &sz);
190 		uint32_t freq = ntohl(buf[0]);
191 		free(buf);
192 		dev.max_freq = MAX2(dev.max_freq, freq);
193 		dev.min_freq = MIN2(dev.min_freq, freq);
194 	}
195 
196 	return 0;
197 }
198 
199 static void
find_freqs(void)200 find_freqs(void)
201 {
202 	char *path;
203 	int ret;
204 
205 	dev.min_freq = ~0;
206 	dev.max_freq = 0;
207 
208 	(void) asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
209 
210 	ret = nftw(path, find_freqs_fn, 64, 0);
211 	if (ret < 0)
212 		err(1, "could not find power levels");
213 
214 	free(path);
215 }
216 
217 static const char * compatibles[] = {
218 		"qcom,adreno-3xx",
219 		"qcom,kgsl-3d0",
220 		"amd,imageon",
221 		"qcom,adreno",
222 };
223 
224 /**
225  * compatstrs is a list of compatible strings separated by null, ie.
226  *
227  *       compatible = "qcom,adreno-630.2", "qcom,adreno";
228  *
229  * would result in "qcom,adreno-630.2\0qcom,adreno\0"
230  */
match_compatible(char * compatstrs,int sz)231 static bool match_compatible(char *compatstrs, int sz)
232 {
233 	while (sz > 0) {
234 		char *compatible = compatstrs;
235 
236 		for (unsigned i = 0; i < ARRAY_SIZE(compatibles); i++) {
237 			if (strcmp(compatible, compatibles[i]) == 0) {
238 				return true;
239 			}
240 		}
241 
242 		compatstrs += strlen(compatible) + 1;
243 		sz -= strlen(compatible) + 1;
244 	}
245 	return false;
246 }
247 
248 static int
find_device_fn(const char * fpath,const struct stat * sb,int typeflag,struct FTW * ftwbuf)249 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
250 {
251 	const char *fname = fpath + ftwbuf->base;
252 	int sz;
253 
254 	if (strcmp(fname, "compatible") == 0) {
255 		char *str = readfile(fpath, &sz);
256 		if (match_compatible(str, sz)) {
257 			int dlen = strlen(fpath) - strlen("/compatible");
258 			dev.dtnode = malloc(dlen + 1);
259 			memcpy(dev.dtnode, fpath, dlen);
260 			printf("found dt node: %s\n", dev.dtnode);
261 
262 			char buf[dlen + sizeof("/../#address-cells") + 1];
263 			int sz, *val;
264 
265 			sprintf(buf, "%s/../#address-cells", dev.dtnode);
266 			val = readfile(buf, &sz);
267 			dev.address_cells = ntohl(*val);
268 			free(val);
269 
270 			sprintf(buf, "%s/../#size-cells", dev.dtnode);
271 			val = readfile(buf, &sz);
272 			dev.size_cells = ntohl(*val);
273 			free(val);
274 
275 			printf("#address-cells=%d, #size-cells=%d\n",
276 					dev.address_cells, dev.size_cells);
277 		}
278 		free(str);
279 	}
280 	if (dev.dtnode) {
281 		/* we found it! */
282 		return 1;
283 	}
284 	return 0;
285 }
286 
287 static void
find_device(void)288 find_device(void)
289 {
290 	int ret, fd;
291 	uint32_t *buf, *b;
292 
293 	ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
294 	if (ret < 0)
295 		err(1, "could not find adreno gpu");
296 
297 	if (!dev.dtnode)
298 		errx(1, "could not find qcom,adreno-3xx node");
299 
300 	fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
301 	if (fd < 0)
302 		err(1, "could not open drm device");
303 
304 	dev.dev  = fd_device_new(fd);
305 	dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
306 
307 	uint64_t val;
308 	ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
309 	if (ret) {
310 		err(1, "could not get gpu-id");
311 	}
312 	dev.chipid = val;
313 
314 #define CHIP_FMT "d%d%d.%d"
315 #define CHIP_ARGS(chipid) \
316 		((chipid) >> 24) & 0xff, \
317 		((chipid) >> 16) & 0xff, \
318 		((chipid) >> 8) & 0xff, \
319 		((chipid) >> 0) & 0xff
320 	printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
321 
322 	b = buf = readdt("reg");
323 
324 	if (dev.address_cells == 2) {
325 		uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
326 		dev.base = (((uint64_t)u[0]) << 32) | u[1];
327 		buf += 2;
328 	} else {
329 		dev.base = ntohl(buf[0]);
330 		buf += 1;
331 	}
332 
333 	if (dev.size_cells == 2) {
334 		uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
335 		dev.size = (((uint64_t)u[0]) << 32) | u[1];
336 		buf += 2;
337 	} else {
338 		dev.size = ntohl(buf[0]);
339 		buf += 1;
340 	}
341 
342 	free(b);
343 
344 	printf("i/o region at %08"PRIx64" (size: %x)\n", dev.base, dev.size);
345 
346 	/* try MAX_FREQ first as that will work regardless of old dt
347 	 * dt bindings vs upstream bindings:
348 	 */
349 	ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
350 	if (ret) {
351 		printf("falling back to parsing DT bindings for freq\n");
352 		find_freqs();
353 	} else {
354 		dev.min_freq = 0;
355 		dev.max_freq = val;
356 	}
357 
358 	printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
359 
360 	fd = open("/dev/mem", O_RDWR | O_SYNC);
361 	if (fd < 0)
362 		err(1, "could not open /dev/mem");
363 
364 	dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
365 	if (dev.io == MAP_FAILED) {
366 		close(fd);
367 		err(1, "could not map device");
368 	}
369 }
370 
371 /*
372  * perf-monitor
373  */
374 
375 static void
flush_ring(void)376 flush_ring(void)
377 {
378 	int ret;
379 
380 	if (!dev.submit)
381 		return;
382 
383 	ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
384 	if (ret)
385 		errx(1, "submit failed: %d", ret);
386 	fd_ringbuffer_del(dev.ring);
387 	fd_submit_del(dev.submit);
388 
389 	dev.ring = NULL;
390 	dev.submit = NULL;
391 }
392 
393 static void
select_counter(struct counter_group * group,int ctr,int n)394 select_counter(struct counter_group *group, int ctr, int n)
395 {
396 	assert(n < group->group->num_countables);
397 	assert(ctr < group->group->num_counters);
398 
399 	group->label[ctr] = group->group->countables[n].name;
400 	group->counter[ctr].select_val = n;
401 
402 	if (!dev.submit) {
403 		dev.submit = fd_submit_new(dev.pipe);
404 		dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
405 				FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
406 	}
407 
408 	/* bashing select register directly while gpu is active will end
409 	 * in tears.. so we need to write it via the ring:
410 	 *
411 	 * TODO it would help startup time, if gpu is loaded, to batch
412 	 * all the initial writes and do a single flush.. although that
413 	 * makes things more complicated for capturing inital sample value
414 	 */
415 	struct fd_ringbuffer *ring = dev.ring;
416 	switch (dev.chipid >> 24) {
417 	case 2:
418 	case 3:
419 	case 4:
420 		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
421 		OUT_RING(ring, 0x00000000);
422 
423 		if (group->group->counters[ctr].enable) {
424 			OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
425 			OUT_RING(ring, 0);
426 		}
427 
428 		if (group->group->counters[ctr].clear) {
429 			OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
430 			OUT_RING(ring, 1);
431 
432 			OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
433 			OUT_RING(ring, 0);
434 		}
435 
436 		OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
437 		OUT_RING(ring, n);
438 
439 		if (group->group->counters[ctr].enable) {
440 			OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
441 			OUT_RING(ring, 1);
442 		}
443 
444 		break;
445 	case 5:
446 	case 6:
447 		OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
448 
449 		if (group->group->counters[ctr].enable) {
450 			OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
451 			OUT_RING(ring, 0);
452 		}
453 
454 		if (group->group->counters[ctr].clear) {
455 			OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
456 			OUT_RING(ring, 1);
457 
458 			OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
459 			OUT_RING(ring, 0);
460 		}
461 
462 		OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
463 		OUT_RING(ring, n);
464 
465 		if (group->group->counters[ctr].enable) {
466 			OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
467 			OUT_RING(ring, 1);
468 		}
469 
470 		break;
471 	}
472 
473 	group->last[ctr] = *group->counter[ctr].val_lo;
474 	group->stime[ctr] = gettime_us();
475 }
476 
477 static void
resample_counter(struct counter_group * group,int ctr)478 resample_counter(struct counter_group *group, int ctr)
479 {
480 	uint32_t val = *group->counter[ctr].val_lo;
481 	uint32_t t = gettime_us();
482 	uint32_t dt = delta(group->stime[ctr], t);
483 	uint32_t dval = delta(group->last[ctr], val);
484 	group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
485 	group->last[ctr] = val;
486 	group->stime[ctr] = t;
487 }
488 
489 #define REFRESH_MS 500
490 
491 /* sample all the counters: */
492 static void
resample(void)493 resample(void)
494 {
495 	static uint64_t last_time;
496 	uint64_t current_time = gettime_us();
497 
498 	if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
499 		return;
500 
501 	last_time = current_time;
502 
503 	for (unsigned i = 0; i < dev.ngroups; i++) {
504 		struct counter_group *group = &dev.groups[i];
505 		for (unsigned j = 0; j < group->group->num_counters; j++) {
506 			resample_counter(group, j);
507 		}
508 	}
509 }
510 
511 /*
512  * The UI
513  */
514 
515 #define COLOR_GROUP_HEADER 1
516 #define COLOR_FOOTER       2
517 #define COLOR_INVERSE      3
518 
519 static int w, h;
520 static int ctr_width;
521 static int max_rows, current_cntr = 1;
522 
523 static void
redraw_footer(WINDOW * win)524 redraw_footer(WINDOW *win)
525 {
526 	char *footer;
527 	int n;
528 
529 	n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
530 			CHIP_ARGS(dev.chipid),
531 			((float)dev.min_freq) / 1000000.0,
532 			((float)dev.max_freq) / 1000000.0);
533 
534 	wmove(win, h - 1, 0);
535 	wattron(win, COLOR_PAIR(COLOR_FOOTER));
536 	waddstr(win, footer);
537 	whline(win, ' ', w - n);
538 	wattroff(win, COLOR_PAIR(COLOR_FOOTER));
539 
540 	free(footer);
541 }
542 
543 static void
redraw_group_header(WINDOW * win,int row,const char * name)544 redraw_group_header(WINDOW *win, int row, const char *name)
545 {
546 	wmove(win, row, 0);
547 	wattron(win, A_BOLD);
548 	wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
549 	waddstr(win, name);
550 	whline(win, ' ', w - strlen(name));
551 	wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
552 	wattroff(win, A_BOLD);
553 }
554 
555 static void
redraw_counter_label(WINDOW * win,int row,const char * name,bool selected)556 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
557 {
558 	int n = strlen(name);
559 	assert(n <= ctr_width);
560 	wmove(win, row, 0);
561 	whline(win, ' ', ctr_width - n);
562 	wmove(win, row, ctr_width - n);
563 	if (selected)
564 		wattron(win, COLOR_PAIR(COLOR_INVERSE));
565 	waddstr(win, name);
566 	if (selected)
567 		wattroff(win, COLOR_PAIR(COLOR_INVERSE));
568 	waddstr(win, ": ");
569 }
570 
571 static void
redraw_counter_value_cycles(WINDOW * win,float val)572 redraw_counter_value_cycles(WINDOW *win, float val)
573 {
574 	char *str;
575 	int x = getcurx(win);
576 	int valwidth = w - x;
577 	int barwidth, n;
578 
579 	/* convert to fraction of max freq: */
580 	val = val / (float)dev.max_freq;
581 
582 	/* figure out percentage-bar width: */
583 	barwidth = (int)(val * valwidth);
584 
585 	/* sometimes things go over 100%.. idk why, could be
586 	 * things running faster than base clock, or counter
587 	 * summing up cycles in multiple cores?
588 	 */
589 	barwidth = MIN2(barwidth, valwidth - 1);
590 
591 	n = asprintf(&str, "%.2f%%", 100.0 * val);
592 	wattron(win, COLOR_PAIR(COLOR_INVERSE));
593 	waddnstr(win, str, barwidth);
594 	if (barwidth > n) {
595 		whline(win, ' ', barwidth - n);
596 		wmove(win, getcury(win), x + barwidth);
597 	}
598 	wattroff(win, COLOR_PAIR(COLOR_INVERSE));
599 	if (barwidth < n)
600 		waddstr(win, str + barwidth);
601 	whline(win, ' ', w - getcurx(win));
602 
603 	free(str);
604 }
605 
606 static void
redraw_counter_value_raw(WINDOW * win,float val)607 redraw_counter_value_raw(WINDOW *win, float val)
608 {
609 	char *str;
610 	(void) asprintf(&str, "%'.2f", val);
611 	waddstr(win, str);
612 	whline(win, ' ', w - getcurx(win));
613 	free(str);
614 }
615 
616 static void
redraw_counter(WINDOW * win,int row,struct counter_group * group,int ctr,bool selected)617 redraw_counter(WINDOW *win, int row, struct counter_group *group,
618 		int ctr, bool selected)
619 {
620 	redraw_counter_label(win, row, group->label[ctr], selected);
621 
622 	/* quick hack, if the label has "CYCLE" in the name, it is
623 	 * probably a cycle counter ;-)
624 	 * Perhaps add more info in rnndb schema to know how to
625 	 * treat individual counters (ie. which are cycles, and
626 	 * for those we want to present as a percentage do we
627 	 * need to scale the result.. ie. is it running at some
628 	 * multiple or divisor of core clk, etc)
629 	 *
630 	 * TODO it would be much more clever to get this from xml
631 	 * Also.. in some cases I think we want to know how many
632 	 * units the counter is counting for, ie. if a320 has 2x
633 	 * shader as a306 we might need to scale the result..
634 	 */
635 	if (strstr(group->label[ctr], "CYCLE") ||
636 			strstr(group->label[ctr], "BUSY") ||
637 			strstr(group->label[ctr], "IDLE"))
638 		redraw_counter_value_cycles(win, group->current[ctr]);
639 	else
640 		redraw_counter_value_raw(win, group->current[ctr]);
641 }
642 
643 static void
redraw(WINDOW * win)644 redraw(WINDOW *win)
645 {
646 	static int scroll = 0;
647 	int max, row = 0;
648 
649 	w = getmaxx(win);
650 	h = getmaxy(win);
651 
652 	max = h - 3;
653 
654 	if ((current_cntr - scroll) > (max - 1)) {
655 		scroll = current_cntr - (max - 1);
656 	} else if ((current_cntr - 1) < scroll) {
657 		scroll = current_cntr - 1;
658 	}
659 
660 	for (unsigned i = 0; i < dev.ngroups; i++) {
661 		struct counter_group *group = &dev.groups[i];
662 		unsigned j = 0;
663 
664 		/* NOTE skip CP the first CP counter */
665 		if (i == 0)
666 			j++;
667 
668 		if (j < group->group->num_counters) {
669 			if ((scroll <= row) && ((row - scroll) < max))
670 				redraw_group_header(win, row - scroll, group->group->name);
671 			row++;
672 		}
673 
674 		for (; j < group->group->num_counters; j++) {
675 			if ((scroll <= row) && ((row - scroll) < max))
676 				redraw_counter(win, row - scroll, group, j, row == current_cntr);
677 			row++;
678 		}
679 	}
680 
681 	/* convert back to physical (unscrolled) offset: */
682 	row = max;
683 
684 	redraw_group_header(win, row, "Status");
685 	row++;
686 
687 	/* Draw GPU freq row: */
688 	redraw_counter_label(win, row, "Freq (MHz)", false);
689 	redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
690 	row++;
691 
692 	redraw_footer(win);
693 
694 	refresh();
695 }
696 
697 static struct counter_group *
current_counter(int * ctr)698 current_counter(int *ctr)
699 {
700 	int n = 0;
701 
702 	for (unsigned i = 0; i < dev.ngroups; i++) {
703 		struct counter_group *group = &dev.groups[i];
704 		unsigned j = 0;
705 
706 		/* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
707 		if (i == 0)
708 			j++;
709 
710 		/* account for group header: */
711 		if (j < group->group->num_counters) {
712 			/* cannot select group header.. return null to indicate this
713 			 * main_ui():
714 			 */
715 			if (n == current_cntr)
716 				return NULL;
717 			n++;
718 		}
719 
720 
721 		for (; j < group->group->num_counters; j++) {
722 			if (n == current_cntr) {
723 				if (ctr)
724 					*ctr = j;
725 				return group;
726 			}
727 			n++;
728 		}
729 	}
730 
731 	assert(0);
732 	return NULL;
733 }
734 
735 static void
counter_dialog(void)736 counter_dialog(void)
737 {
738 	WINDOW *dialog;
739 	struct counter_group *group;
740 	int cnt = 0, current = 0, scroll;
741 
742 	/* figure out dialog size: */
743 	int dh = h/2;
744 	int dw = ctr_width + 2;
745 
746 	group = current_counter(&cnt);
747 
748 	/* find currently selected idx (note there can be discontinuities
749 	 * so the selected value does not map 1:1 to current idx)
750 	 */
751 	uint32_t selected = group->counter[cnt].select_val;
752 	for (int i = 0; i < group->group->num_countables; i++) {
753 		if (group->group->countables[i].selector == selected) {
754 			current = i;
755 			break;
756 		}
757 	}
758 
759 	/* scrolling offset, if dialog is too small for all the choices: */
760 	scroll = 0;
761 
762 	dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
763 	box(dialog, 0, 0);
764 	wrefresh(dialog);
765 	keypad(dialog, TRUE);
766 
767 	while (true) {
768 		int max = MIN2(dh - 2, group->group->num_countables);
769 		int selector = -1;
770 
771 		if ((current - scroll) >= (dh - 3)) {
772 			scroll = current - (dh - 3);
773 		} else if (current < scroll) {
774 			scroll = current;
775 		}
776 
777 		for (int i = 0; i < max; i++) {
778 			int n = scroll + i;
779 			wmove(dialog, i+1, 1);
780 			if (n == current) {
781 				assert (n < group->group->num_countables);
782 				selector = group->group->countables[n].selector;
783 				wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
784 			}
785 			if (n < group->group->num_countables)
786 				waddstr(dialog, group->group->countables[n].name);
787 			whline(dialog, ' ', dw - getcurx(dialog) - 1);
788 			if (n == current)
789 				wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
790 		}
791 
792 		assert (selector >= 0);
793 
794 		switch (wgetch(dialog)) {
795 		case KEY_UP:
796 			current = MAX2(0, current - 1);
797 			break;
798 		case KEY_DOWN:
799 			current = MIN2(group->group->num_countables - 1, current + 1);
800 			break;
801 		case KEY_LEFT:
802 		case KEY_ENTER:
803 			/* select new sampler */
804 			select_counter(group, cnt, selector);
805 			flush_ring();
806 			config_save();
807 			goto out;
808 		case 'q':
809 			goto out;
810 		default:
811 			/* ignore */
812 			break;
813 		}
814 
815 		resample();
816 	}
817 
818 out:
819 	wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
820 	delwin(dialog);
821 }
822 
823 static void
scroll_cntr(int amount)824 scroll_cntr(int amount)
825 {
826 	if (amount < 0) {
827 		current_cntr = MAX2(1, current_cntr + amount);
828 		if (current_counter(NULL) == NULL) {
829 			current_cntr = MAX2(1, current_cntr - 1);
830 		}
831 	} else {
832 		current_cntr = MIN2(max_rows - 1, current_cntr + amount);
833 		if (current_counter(NULL) == NULL)
834 			current_cntr = MIN2(max_rows - 1, current_cntr + 1);
835 	}
836 }
837 
838 static void
main_ui(void)839 main_ui(void)
840 {
841 	WINDOW *mainwin;
842 	uint32_t last_time = gettime_us();
843 
844 	/* curses setup: */
845 	mainwin = initscr();
846 	if (!mainwin)
847 		goto out;
848 
849 	cbreak();
850 	wtimeout(mainwin, REFRESH_MS);
851 	noecho();
852 	keypad(mainwin, TRUE);
853 	curs_set(0);
854 	start_color();
855 	init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
856 	init_pair(COLOR_FOOTER,       COLOR_WHITE, COLOR_BLUE);
857 	init_pair(COLOR_INVERSE,      COLOR_BLACK, COLOR_WHITE);
858 
859 	while (true) {
860 		switch (wgetch(mainwin)) {
861 		case KEY_UP:
862 			scroll_cntr(-1);
863 			break;
864 		case KEY_DOWN:
865 			scroll_cntr(+1);
866 			break;
867 		case KEY_NPAGE:  /* page-down */
868 			/* TODO figure out # of rows visible? */
869 			scroll_cntr(+15);
870 			break;
871 		case KEY_PPAGE:  /* page-up */
872 			/* TODO figure out # of rows visible? */
873 			scroll_cntr(-15);
874 			break;
875 		case KEY_RIGHT:
876 			counter_dialog();
877 			break;
878 		case 'q':
879 			goto out;
880 			break;
881 		default:
882 			/* ignore */
883 			break;
884 		}
885 		resample();
886 		redraw(mainwin);
887 
888 		/* restore the counters every 0.5s in case the GPU has suspended,
889 		 * in which case the current selected countables will have reset:
890 		 */
891 		uint32_t t = gettime_us();
892 		if (delta(last_time, t) > 500000) {
893 			restore_counter_groups();
894 			flush_ring();
895 			last_time = t;
896 		}
897 	}
898 
899 	/* restore settings.. maybe we need an atexit()??*/
900 out:
901 	delwin(mainwin);
902 	endwin();
903 	refresh();
904 }
905 
906 static void
restore_counter_groups(void)907 restore_counter_groups(void)
908 {
909 	for (unsigned i = 0; i < dev.ngroups; i++) {
910 		struct counter_group *group = &dev.groups[i];
911 		unsigned j = 0;
912 
913 		/* NOTE skip CP the first CP counter */
914 		if (i == 0)
915 			j++;
916 
917 		for (; j < group->group->num_counters; j++) {
918 			select_counter(group, j, group->counter[j].select_val);
919 		}
920 	}
921 }
922 
923 static void
setup_counter_groups(const struct fd_perfcntr_group * groups)924 setup_counter_groups(const struct fd_perfcntr_group *groups)
925 {
926 	for (unsigned i = 0; i < dev.ngroups; i++) {
927 		struct counter_group *group = &dev.groups[i];
928 
929 		group->group = &groups[i];
930 
931 		max_rows += group->group->num_counters + 1;
932 
933 		/* the first CP counter is hidden: */
934 		if (i == 0) {
935 			max_rows--;
936 			if (group->group->num_counters <= 1)
937 				max_rows--;
938 		}
939 
940 		for (unsigned j = 0; j < group->group->num_counters; j++) {
941 			group->counter[j].counter = &group->group->counters[j];
942 
943 			group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
944 			group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
945 
946 			group->counter[j].select_val = j;
947 		}
948 
949 		for (unsigned j = 0; j < group->group->num_countables; j++) {
950 			ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
951 		}
952 	}
953 }
954 
955 /*
956  * configuration / persistence
957  */
958 
959 static config_t cfg;
960 static config_setting_t *setting;
961 
962 static void
config_save(void)963 config_save(void)
964 {
965 	for (unsigned i = 0; i < dev.ngroups; i++) {
966 		struct counter_group *group = &dev.groups[i];
967 		unsigned j = 0;
968 
969 		/* NOTE skip CP the first CP counter */
970 		if (i == 0)
971 			j++;
972 
973 		config_setting_t *sect =
974 			config_setting_get_member(setting, group->group->name);
975 
976 		for (; j < group->group->num_counters; j++) {
977 			char name[] = "counter0000";
978 			sprintf(name, "counter%d", j);
979 			config_setting_t *s =
980 				config_setting_lookup(sect, name);
981 			config_setting_set_int(s, group->counter[j].select_val);
982 		}
983 	}
984 
985 	config_write_file(&cfg, "fdperf.cfg");
986 }
987 
988 static void
config_restore(void)989 config_restore(void)
990 {
991 	char *str;
992 
993 	config_init(&cfg);
994 
995 	/* Read the file. If there is an error, report it and exit. */
996 	if(!config_read_file(&cfg, "fdperf.cfg")) {
997 		warn("could not restore settings");
998 	}
999 
1000 	config_setting_t *root = config_root_setting(&cfg);
1001 
1002 	/* per device settings: */
1003 	(void) asprintf(&str, "a%dxx", dev.chipid >> 24);
1004 	setting = config_setting_get_member(root, str);
1005 	if (!setting)
1006 		setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
1007 	free(str);
1008 
1009 	for (unsigned i = 0; i < dev.ngroups; i++) {
1010 		struct counter_group *group = &dev.groups[i];
1011 		unsigned j = 0;
1012 
1013 		/* NOTE skip CP the first CP counter */
1014 		if (i == 0)
1015 			j++;
1016 
1017 		config_setting_t *sect =
1018 			config_setting_get_member(setting, group->group->name);
1019 
1020 		if (!sect) {
1021 			sect = config_setting_add(setting, group->group->name,
1022 					CONFIG_TYPE_GROUP);
1023 		}
1024 
1025 		for (; j < group->group->num_counters; j++) {
1026 			char name[] = "counter0000";
1027 			sprintf(name, "counter%d", j);
1028 			config_setting_t *s = config_setting_lookup(sect, name);
1029 			if (!s) {
1030 				config_setting_add(sect, name, CONFIG_TYPE_INT);
1031 				continue;
1032 			}
1033 			select_counter(group, j, config_setting_get_int(s));
1034 		}
1035 	}
1036 }
1037 
1038 /*
1039  * main
1040  */
1041 
1042 int
main(int argc,char ** argv)1043 main(int argc, char **argv)
1044 {
1045 	find_device();
1046 
1047 	const struct fd_perfcntr_group *groups;
1048 	groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1049 	if (!groups) {
1050 		errx(1, "no perfcntr support");
1051 	}
1052 
1053 	dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1054 
1055 	setlocale(LC_NUMERIC, "en_US.UTF-8");
1056 
1057 	setup_counter_groups(groups);
1058 	restore_counter_groups();
1059 	config_restore();
1060 	flush_ring();
1061 
1062 	main_ui();
1063 
1064 	return 0;
1065 }
1066