1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <errno.h>
32 #include <assert.h>
33
34 #include "igt_perf.h"
35
36 #include "gpu-top.h"
37
38 #define RING_TAIL 0x00
39 #define RING_HEAD 0x04
40 #define ADDR_MASK 0x001FFFFC
41 #define RING_CTL 0x0C
42 #define RING_WAIT (1<<11)
43 #define RING_WAIT_SEMAPHORE (1<<10)
44
perf_init(struct gpu_top * gt)45 static int perf_init(struct gpu_top *gt)
46 {
47 struct engine_desc {
48 unsigned class, inst;
49 const char *name;
50 } *d, engines[] = {
51 { I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
52 { I915_ENGINE_CLASS_COPY, 0, "bcs0" },
53 { I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
54 { I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
55 { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },
56 { 0, 0, NULL }
57 };
58
59 d = &engines[0];
60
61 gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
62 -1);
63 if (gt->fd < 0)
64 return -1;
65
66 if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
67 gt->fd) >= 0)
68 gt->have_wait = 1;
69
70 if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
71 gt->fd) >= 0)
72 gt->have_sema = 1;
73
74 gt->ring[0].name = d->name;
75 gt->num_rings = 1;
76
77 for (d++; d->name; d++) {
78 if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
79 d->inst),
80 gt->fd) < 0)
81 continue;
82
83 if (gt->have_wait &&
84 perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
85 d->inst),
86 gt->fd) < 0)
87 return -1;
88
89 if (gt->have_sema &&
90 perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
91 d->inst),
92 gt->fd) < 0)
93 return -1;
94
95 gt->ring[gt->num_rings++].name = d->name;
96 }
97
98 return 0;
99 }
100
gpu_top_init(struct gpu_top * gt)101 void gpu_top_init(struct gpu_top *gt)
102 {
103 memset(gt, 0, sizeof(*gt));
104 gt->fd = -1;
105
106 perf_init(gt);
107 }
108
gpu_top_update(struct gpu_top * gt)109 int gpu_top_update(struct gpu_top *gt)
110 {
111 uint32_t data[1024];
112 int update, len;
113
114 if (gt->fd < 0)
115 return 0;
116
117 if (gt->type == PERF) {
118 struct gpu_top_stat *s = >->stat[gt->count++&1];
119 struct gpu_top_stat *d = >->stat[gt->count&1];
120 uint64_t *sample, d_time;
121 int n, m;
122
123 len = read(gt->fd, data, sizeof(data));
124 if (len < 0)
125 return 0;
126
127 sample = (uint64_t *)data + 1;
128
129 s->time = *sample++;
130 for (n = m = 0; n < gt->num_rings; n++) {
131 s->busy[n] = sample[m++];
132 if (gt->have_wait)
133 s->wait[n] = sample[m++];
134 if (gt->have_sema)
135 s->sema[n] = sample[m++];
136 }
137
138 if (gt->count == 1)
139 return 0;
140
141 d_time = s->time - d->time;
142 for (n = 0; n < gt->num_rings; n++) {
143 gt->ring[n].u.u.busy = (100 * (s->busy[n] - d->busy[n]) + d_time/2) / d_time;
144 if (gt->have_wait)
145 gt->ring[n].u.u.wait = (100 * (s->wait[n] - d->wait[n]) + d_time/2) / d_time;
146 if (gt->have_sema)
147 gt->ring[n].u.u.sema = (100 * (s->sema[n] - d->sema[n]) + d_time/2) / d_time;
148
149 /* in case of rounding + sampling errors, fudge */
150 if (gt->ring[n].u.u.busy > 100)
151 gt->ring[n].u.u.busy = 100;
152 if (gt->ring[n].u.u.wait > 100)
153 gt->ring[n].u.u.wait = 100;
154 if (gt->ring[n].u.u.sema > 100)
155 gt->ring[n].u.u.sema = 100;
156 }
157
158 update = 1;
159 } else {
160 while ((len = read(gt->fd, data, sizeof(data))) > 0) {
161 uint32_t *ptr = &data[len/sizeof(uint32_t) - MAX_RINGS];
162 gt->ring[0].u.payload = ptr[0];
163 gt->ring[1].u.payload = ptr[1];
164 gt->ring[2].u.payload = ptr[2];
165 gt->ring[3].u.payload = ptr[3];
166 update = 1;
167 }
168 }
169
170 return update;
171 }
172