1 /*
2  * Copyright (C) 2013 Christoph Bumiller
3  * Copyright (C) 2015 Samuel Pitoiset
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * Performance monitoring counters interface to gallium.
26  */
27 
28 #include "st_debug.h"
29 #include "st_context.h"
30 #include "st_cb_bitmap.h"
31 #include "st_cb_perfmon.h"
32 
33 #include "util/bitset.h"
34 
35 #include "pipe/p_context.h"
36 #include "pipe/p_screen.h"
37 #include "util/u_memory.h"
38 
39 static bool
init_perf_monitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)40 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
41 {
42    struct st_context *st = st_context(ctx);
43    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
44    struct pipe_context *pipe = st->pipe;
45    unsigned *batch = NULL;
46    unsigned num_active_counters = 0;
47    unsigned max_batch_counters = 0;
48    unsigned num_batch_counters = 0;
49    int gid, cid;
50 
51    st_flush_bitmap_cache(st);
52 
53    /* Determine the number of active counters. */
54    for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
55       const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
56       const struct st_perf_monitor_group *stg = &st->perfmon[gid];
57 
58       if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
59          /* Maximum number of counters reached. Cannot start the session. */
60          if (ST_DEBUG & DEBUG_MESA) {
61             debug_printf("Maximum number of counters reached. "
62                          "Cannot start the session!\n");
63          }
64          return false;
65       }
66 
67       num_active_counters += m->ActiveGroups[gid];
68       if (stg->has_batch)
69          max_batch_counters += m->ActiveGroups[gid];
70    }
71 
72    if (!num_active_counters)
73       return true;
74 
75    stm->active_counters = CALLOC(num_active_counters,
76                                  sizeof(*stm->active_counters));
77    if (!stm->active_counters)
78       return false;
79 
80    if (max_batch_counters) {
81       batch = CALLOC(max_batch_counters, sizeof(*batch));
82       if (!batch)
83          return false;
84    }
85 
86    /* Create a query for each active counter. */
87    for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
88       const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
89       const struct st_perf_monitor_group *stg = &st->perfmon[gid];
90       BITSET_WORD tmp;
91 
92       BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
93          const struct st_perf_monitor_counter *stc = &stg->counters[cid];
94          struct st_perf_counter_object *cntr =
95             &stm->active_counters[stm->num_active_counters];
96 
97          cntr->id       = cid;
98          cntr->group_id = gid;
99          if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
100             cntr->batch_index = num_batch_counters;
101             batch[num_batch_counters++] = stc->query_type;
102          } else {
103             cntr->query = pipe->create_query(pipe, stc->query_type, 0);
104             if (!cntr->query)
105                goto fail;
106          }
107          ++stm->num_active_counters;
108       }
109    }
110 
111    /* Create the batch query. */
112    if (num_batch_counters) {
113       stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
114                                                   batch);
115       stm->batch_result = CALLOC(num_batch_counters, sizeof(stm->batch_result->batch[0]));
116       if (!stm->batch_query || !stm->batch_result)
117          goto fail;
118    }
119 
120    FREE(batch);
121    return true;
122 
123 fail:
124    FREE(batch);
125    return false;
126 }
127 
128 static void
reset_perf_monitor(struct st_perf_monitor_object * stm,struct pipe_context * pipe)129 reset_perf_monitor(struct st_perf_monitor_object *stm,
130                    struct pipe_context *pipe)
131 {
132    unsigned i;
133 
134    for (i = 0; i < stm->num_active_counters; ++i) {
135       struct pipe_query *query = stm->active_counters[i].query;
136       if (query)
137          pipe->destroy_query(pipe, query);
138    }
139    FREE(stm->active_counters);
140    stm->active_counters = NULL;
141    stm->num_active_counters = 0;
142 
143    if (stm->batch_query) {
144       pipe->destroy_query(pipe, stm->batch_query);
145       stm->batch_query = NULL;
146    }
147    FREE(stm->batch_result);
148    stm->batch_result = NULL;
149 }
150 
151 static struct gl_perf_monitor_object *
st_NewPerfMonitor(struct gl_context * ctx)152 st_NewPerfMonitor(struct gl_context *ctx)
153 {
154    struct st_perf_monitor_object *stq = ST_CALLOC_STRUCT(st_perf_monitor_object);
155    if (stq)
156       return &stq->base;
157    return NULL;
158 }
159 
160 static void
st_DeletePerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)161 st_DeletePerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
162 {
163    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
164    struct pipe_context *pipe = st_context(ctx)->pipe;
165 
166    reset_perf_monitor(stm, pipe);
167    FREE(stm);
168 }
169 
170 static GLboolean
st_BeginPerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)171 st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
172 {
173    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
174    struct pipe_context *pipe = st_context(ctx)->pipe;
175    unsigned i;
176 
177    if (!stm->num_active_counters) {
178       /* Create a query for each active counter before starting
179        * a new monitoring session. */
180       if (!init_perf_monitor(ctx, m))
181          goto fail;
182    }
183 
184    /* Start the query for each active counter. */
185    for (i = 0; i < stm->num_active_counters; ++i) {
186       struct pipe_query *query = stm->active_counters[i].query;
187       if (query && !pipe->begin_query(pipe, query))
188           goto fail;
189    }
190 
191    if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
192       goto fail;
193 
194    return true;
195 
196 fail:
197    /* Failed to start the monitoring session. */
198    reset_perf_monitor(stm, pipe);
199    return false;
200 }
201 
202 static void
st_EndPerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)203 st_EndPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
204 {
205    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
206    struct pipe_context *pipe = st_context(ctx)->pipe;
207    unsigned i;
208 
209    /* Stop the query for each active counter. */
210    for (i = 0; i < stm->num_active_counters; ++i) {
211       struct pipe_query *query = stm->active_counters[i].query;
212       if (query)
213          pipe->end_query(pipe, query);
214    }
215 
216    if (stm->batch_query)
217       pipe->end_query(pipe, stm->batch_query);
218 }
219 
220 static void
st_ResetPerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)221 st_ResetPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
222 {
223    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
224    struct pipe_context *pipe = st_context(ctx)->pipe;
225 
226    if (!m->Ended)
227       st_EndPerfMonitor(ctx, m);
228 
229    reset_perf_monitor(stm, pipe);
230 
231    if (m->Active)
232       st_BeginPerfMonitor(ctx, m);
233 }
234 
235 static GLboolean
st_IsPerfMonitorResultAvailable(struct gl_context * ctx,struct gl_perf_monitor_object * m)236 st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
237                                 struct gl_perf_monitor_object *m)
238 {
239    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
240    struct pipe_context *pipe = st_context(ctx)->pipe;
241    unsigned i;
242 
243    if (!stm->num_active_counters)
244       return false;
245 
246    /* The result of a monitoring session is only available if the query of
247     * each active counter is idle. */
248    for (i = 0; i < stm->num_active_counters; ++i) {
249       struct pipe_query *query = stm->active_counters[i].query;
250       union pipe_query_result result;
251       if (query && !pipe->get_query_result(pipe, query, FALSE, &result)) {
252          /* The query is busy. */
253          return false;
254       }
255    }
256 
257    if (stm->batch_query &&
258        !pipe->get_query_result(pipe, stm->batch_query, FALSE, stm->batch_result))
259       return false;
260 
261    return true;
262 }
263 
264 static void
st_GetPerfMonitorResult(struct gl_context * ctx,struct gl_perf_monitor_object * m,GLsizei dataSize,GLuint * data,GLint * bytesWritten)265 st_GetPerfMonitorResult(struct gl_context *ctx,
266                         struct gl_perf_monitor_object *m,
267                         GLsizei dataSize,
268                         GLuint *data,
269                         GLint *bytesWritten)
270 {
271    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
272    struct pipe_context *pipe = st_context(ctx)->pipe;
273    unsigned i;
274 
275    /* Copy data to the supplied array (data).
276     *
277     * The output data format is: <group ID, counter ID, value> for each
278     * active counter. The API allows counters to appear in any order.
279     */
280    GLsizei offset = 0;
281    bool have_batch_query = false;
282 
283    if (stm->batch_query)
284       have_batch_query = pipe->get_query_result(pipe, stm->batch_query, TRUE,
285                                                 stm->batch_result);
286 
287    /* Read query results for each active counter. */
288    for (i = 0; i < stm->num_active_counters; ++i) {
289       struct st_perf_counter_object *cntr = &stm->active_counters[i];
290       union pipe_query_result result = { 0 };
291       int gid, cid;
292       GLenum type;
293 
294       cid  = cntr->id;
295       gid  = cntr->group_id;
296       type = ctx->PerfMonitor.Groups[gid].Counters[cid].Type;
297 
298       if (cntr->query) {
299          if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result))
300             continue;
301       } else {
302          if (!have_batch_query)
303             continue;
304          result.batch[0] = stm->batch_result->batch[cntr->batch_index];
305       }
306 
307       data[offset++] = gid;
308       data[offset++] = cid;
309       switch (type) {
310       case GL_UNSIGNED_INT64_AMD:
311          memcpy(&data[offset], &result.u64, sizeof(uint64_t));
312          offset += sizeof(uint64_t) / sizeof(GLuint);
313          break;
314       case GL_UNSIGNED_INT:
315          memcpy(&data[offset], &result.u32, sizeof(uint32_t));
316          offset += sizeof(uint32_t) / sizeof(GLuint);
317          break;
318       case GL_FLOAT:
319       case GL_PERCENTAGE_AMD:
320          memcpy(&data[offset], &result.f, sizeof(GLfloat));
321          offset += sizeof(GLfloat) / sizeof(GLuint);
322          break;
323       }
324    }
325 
326    if (bytesWritten)
327       *bytesWritten = offset * sizeof(GLuint);
328 }
329 
330 
331 bool
st_have_perfmon(struct st_context * st)332 st_have_perfmon(struct st_context *st)
333 {
334    struct pipe_screen *screen = st->pipe->screen;
335 
336    if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
337       return false;
338 
339    return screen->get_driver_query_group_info(screen, 0, NULL) != 0;
340 }
341 
342 static void
st_InitPerfMonitorGroups(struct gl_context * ctx)343 st_InitPerfMonitorGroups(struct gl_context *ctx)
344 {
345    struct st_context *st = st_context(ctx);
346    struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
347    struct pipe_screen *screen = st->pipe->screen;
348    struct gl_perf_monitor_group *groups = NULL;
349    struct st_perf_monitor_group *stgroups = NULL;
350    int num_counters, num_groups;
351    int gid, cid;
352 
353    /* Get the number of available queries. */
354    num_counters = screen->get_driver_query_info(screen, 0, NULL);
355 
356    /* Get the number of available groups. */
357    num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
358    groups = CALLOC(num_groups, sizeof(*groups));
359    if (!groups)
360       return;
361 
362    stgroups = CALLOC(num_groups, sizeof(*stgroups));
363    if (!stgroups)
364       goto fail_only_groups;
365 
366    for (gid = 0; gid < num_groups; gid++) {
367       struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups];
368       struct st_perf_monitor_group *stg = &stgroups[perfmon->NumGroups];
369       struct pipe_driver_query_group_info group_info;
370       struct gl_perf_monitor_counter *counters = NULL;
371       struct st_perf_monitor_counter *stcounters = NULL;
372 
373       if (!screen->get_driver_query_group_info(screen, gid, &group_info))
374          continue;
375 
376       g->Name = group_info.name;
377       g->MaxActiveCounters = group_info.max_active_queries;
378 
379       if (group_info.num_queries)
380          counters = CALLOC(group_info.num_queries, sizeof(*counters));
381       if (!counters)
382          goto fail;
383       g->Counters = counters;
384 
385       stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
386       if (!stcounters)
387          goto fail;
388       stg->counters = stcounters;
389 
390       for (cid = 0; cid < num_counters; cid++) {
391          struct gl_perf_monitor_counter *c = &counters[g->NumCounters];
392          struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters];
393          struct pipe_driver_query_info info;
394 
395          if (!screen->get_driver_query_info(screen, cid, &info))
396             continue;
397          if (info.group_id != gid)
398             continue;
399 
400          c->Name = info.name;
401          switch (info.type) {
402             case PIPE_DRIVER_QUERY_TYPE_UINT64:
403             case PIPE_DRIVER_QUERY_TYPE_BYTES:
404             case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
405             case PIPE_DRIVER_QUERY_TYPE_HZ:
406                c->Minimum.u64 = 0;
407                c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
408                c->Type = GL_UNSIGNED_INT64_AMD;
409                break;
410             case PIPE_DRIVER_QUERY_TYPE_UINT:
411                c->Minimum.u32 = 0;
412                c->Maximum.u32 = info.max_value.u32 ? info.max_value.u32 : -1;
413                c->Type = GL_UNSIGNED_INT;
414                break;
415             case PIPE_DRIVER_QUERY_TYPE_FLOAT:
416                c->Minimum.f = 0.0;
417                c->Maximum.f = info.max_value.f ? info.max_value.f : -1;
418                c->Type = GL_FLOAT;
419                break;
420             case PIPE_DRIVER_QUERY_TYPE_PERCENTAGE:
421                c->Minimum.f = 0.0f;
422                c->Maximum.f = 100.0f;
423                c->Type = GL_PERCENTAGE_AMD;
424                break;
425             default:
426                unreachable("Invalid driver query type!");
427          }
428 
429          stc->query_type = info.query_type;
430          stc->flags = info.flags;
431          if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH)
432             stg->has_batch = true;
433 
434          g->NumCounters++;
435       }
436       perfmon->NumGroups++;
437    }
438    perfmon->Groups = groups;
439    st->perfmon = stgroups;
440 
441    return;
442 
443 fail:
444    for (gid = 0; gid < num_groups; gid++) {
445       FREE(stgroups[gid].counters);
446       FREE((void *)groups[gid].Counters);
447    }
448    FREE(stgroups);
449 fail_only_groups:
450    FREE(groups);
451 }
452 
453 void
st_destroy_perfmon(struct st_context * st)454 st_destroy_perfmon(struct st_context *st)
455 {
456    struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
457    int gid;
458 
459    for (gid = 0; gid < perfmon->NumGroups; gid++) {
460       FREE(st->perfmon[gid].counters);
461       FREE((void *)perfmon->Groups[gid].Counters);
462    }
463    FREE(st->perfmon);
464    FREE((void *)perfmon->Groups);
465 }
466 
st_init_perfmon_functions(struct dd_function_table * functions)467 void st_init_perfmon_functions(struct dd_function_table *functions)
468 {
469    functions->InitPerfMonitorGroups = st_InitPerfMonitorGroups;
470    functions->NewPerfMonitor = st_NewPerfMonitor;
471    functions->DeletePerfMonitor = st_DeletePerfMonitor;
472    functions->BeginPerfMonitor = st_BeginPerfMonitor;
473    functions->EndPerfMonitor = st_EndPerfMonitor;
474    functions->ResetPerfMonitor = st_ResetPerfMonitor;
475    functions->IsPerfMonitorResultAvailable = st_IsPerfMonitorResultAvailable;
476    functions->GetPerfMonitorResult = st_GetPerfMonitorResult;
477 }
478