1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <errno.h>
32 #include <assert.h>
33 
34 #include "igt_perf.h"
35 
36 #include "gpu-top.h"
37 
38 #define RING_TAIL      0x00
39 #define RING_HEAD      0x04
40 #define ADDR_MASK      0x001FFFFC
41 #define RING_CTL       0x0C
42 #define   RING_WAIT		(1<<11)
43 #define   RING_WAIT_SEMAPHORE	(1<<10)
44 
perf_init(struct gpu_top * gt)45 static int perf_init(struct gpu_top *gt)
46 {
47 	struct engine_desc {
48 		unsigned class, inst;
49 		const char *name;
50 	} *d, engines[] = {
51 		{ I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
52 		{ I915_ENGINE_CLASS_COPY, 0, "bcs0" },
53 		{ I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
54 		{ I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
55 		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },
56 		{ 0, 0, NULL }
57 	};
58 
59 	d = &engines[0];
60 
61 	gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
62 				      -1);
63 	if (gt->fd < 0)
64 		return -1;
65 
66 	if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
67 				 gt->fd) >= 0)
68 		gt->have_wait = 1;
69 
70 	if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
71 				 gt->fd) >= 0)
72 		gt->have_sema = 1;
73 
74 	gt->ring[0].name = d->name;
75 	gt->num_rings = 1;
76 
77 	for (d++; d->name; d++) {
78 		if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
79 							      d->inst),
80 					gt->fd) < 0)
81 			continue;
82 
83 		if (gt->have_wait &&
84 		    perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
85 							      d->inst),
86 					 gt->fd) < 0)
87 			return -1;
88 
89 		if (gt->have_sema &&
90 		    perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
91 							      d->inst),
92 				   gt->fd) < 0)
93 			return -1;
94 
95 		gt->ring[gt->num_rings++].name = d->name;
96 	}
97 
98 	return 0;
99 }
100 
gpu_top_init(struct gpu_top * gt)101 void gpu_top_init(struct gpu_top *gt)
102 {
103 	memset(gt, 0, sizeof(*gt));
104 	gt->fd = -1;
105 
106 	perf_init(gt);
107 }
108 
gpu_top_update(struct gpu_top * gt)109 int gpu_top_update(struct gpu_top *gt)
110 {
111 	uint32_t data[1024];
112 	int update, len;
113 
114 	if (gt->fd < 0)
115 		return 0;
116 
117 	if (gt->type == PERF) {
118 		struct gpu_top_stat *s = &gt->stat[gt->count++&1];
119 		struct gpu_top_stat *d = &gt->stat[gt->count&1];
120 		uint64_t *sample, d_time;
121 		int n, m;
122 
123 		len = read(gt->fd, data, sizeof(data));
124 		if (len < 0)
125 			return 0;
126 
127 		sample = (uint64_t *)data + 1;
128 
129 		s->time = *sample++;
130 		for (n = m = 0; n < gt->num_rings; n++) {
131 			s->busy[n] = sample[m++];
132 			if (gt->have_wait)
133 				s->wait[n] = sample[m++];
134 			if (gt->have_sema)
135 				s->sema[n] = sample[m++];
136 		}
137 
138 		if (gt->count == 1)
139 			return 0;
140 
141 		d_time = s->time - d->time;
142 		for (n = 0; n < gt->num_rings; n++) {
143 			gt->ring[n].u.u.busy = (100 * (s->busy[n] - d->busy[n]) + d_time/2) / d_time;
144 			if (gt->have_wait)
145 				gt->ring[n].u.u.wait = (100 * (s->wait[n] - d->wait[n]) + d_time/2) / d_time;
146 			if (gt->have_sema)
147 				gt->ring[n].u.u.sema = (100 * (s->sema[n] - d->sema[n]) + d_time/2) / d_time;
148 
149 			/* in case of rounding + sampling errors, fudge */
150 			if (gt->ring[n].u.u.busy > 100)
151 				gt->ring[n].u.u.busy = 100;
152 			if (gt->ring[n].u.u.wait > 100)
153 				gt->ring[n].u.u.wait = 100;
154 			if (gt->ring[n].u.u.sema > 100)
155 				gt->ring[n].u.u.sema = 100;
156 		}
157 
158 		update = 1;
159 	} else {
160 		while ((len = read(gt->fd, data, sizeof(data))) > 0) {
161 			uint32_t *ptr = &data[len/sizeof(uint32_t) - MAX_RINGS];
162 			gt->ring[0].u.payload = ptr[0];
163 			gt->ring[1].u.payload = ptr[1];
164 			gt->ring[2].u.payload = ptr[2];
165 			gt->ring[3].u.payload = ptr[3];
166 			update = 1;
167 		}
168 	}
169 
170 	return update;
171 }
172