1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "gen_perf.h"
25 #include "gen_perf_mdapi.h"
26 #include "gen_perf_private.h"
27 #include "gen_perf_regs.h"
28 
29 #include "dev/gen_device_info.h"
30 
31 #include <drm-uapi/i915_drm.h>
32 
33 
34 int
gen_perf_query_result_write_mdapi(void * data,uint32_t data_size,const struct gen_device_info * devinfo,const struct gen_perf_query_result * result,uint64_t freq_start,uint64_t freq_end)35 gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
36                                   const struct gen_device_info *devinfo,
37                                   const struct gen_perf_query_result *result,
38                                   uint64_t freq_start, uint64_t freq_end)
39 {
40    switch (devinfo->gen) {
41    case 7: {
42       struct gen7_mdapi_metrics *mdapi_data = (struct gen7_mdapi_metrics *) data;
43 
44       if (data_size < sizeof(*mdapi_data))
45          return 0;
46 
47       assert(devinfo->is_haswell);
48 
49       for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
50          mdapi_data->ACounters[i] = result->accumulator[1 + i];
51 
52       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
53          mdapi_data->NOACounters[i] =
54             result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
55       }
56 
57       mdapi_data->ReportsCount = result->reports_accumulated;
58       mdapi_data->TotalTime =
59          gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
60       mdapi_data->CoreFrequency = freq_end;
61       mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
62       mdapi_data->SplitOccured = result->query_disjoint;
63       return sizeof(*mdapi_data);
64    }
65    case 8: {
66       struct gen8_mdapi_metrics *mdapi_data = (struct gen8_mdapi_metrics *) data;
67 
68       if (data_size < sizeof(*mdapi_data))
69          return 0;
70 
71       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
72          mdapi_data->OaCntr[i] = result->accumulator[2 + i];
73       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
74          mdapi_data->NoaCntr[i] =
75             result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
76       }
77 
78       mdapi_data->ReportId = result->hw_id;
79       mdapi_data->ReportsCount = result->reports_accumulated;
80       mdapi_data->TotalTime =
81          gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
82       mdapi_data->BeginTimestamp =
83          gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
84       mdapi_data->GPUTicks = result->accumulator[1];
85       mdapi_data->CoreFrequency = freq_end;
86       mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
87       mdapi_data->SliceFrequency =
88          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
89       mdapi_data->UnsliceFrequency =
90          (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
91       mdapi_data->SplitOccured = result->query_disjoint;
92       return sizeof(*mdapi_data);
93    }
94    case 9:
95    case 11:
96    case 12:{
97       struct gen9_mdapi_metrics *mdapi_data = (struct gen9_mdapi_metrics *) data;
98 
99       if (data_size < sizeof(*mdapi_data))
100          return 0;
101 
102       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
103          mdapi_data->OaCntr[i] = result->accumulator[2 + i];
104       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
105          mdapi_data->NoaCntr[i] =
106             result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
107       }
108 
109       mdapi_data->ReportId = result->hw_id;
110       mdapi_data->ReportsCount = result->reports_accumulated;
111       mdapi_data->TotalTime =
112          gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
113       mdapi_data->BeginTimestamp =
114          gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
115       mdapi_data->GPUTicks = result->accumulator[1];
116       mdapi_data->CoreFrequency = freq_end;
117       mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
118       mdapi_data->SliceFrequency =
119          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
120       mdapi_data->UnsliceFrequency =
121          (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
122       mdapi_data->SplitOccured = result->query_disjoint;
123       return sizeof(*mdapi_data);
124    }
125    default:
126       unreachable("unexpected gen");
127    }
128 }
129 
130 void
gen_perf_register_mdapi_statistic_query(struct gen_perf_config * perf_cfg,const struct gen_device_info * devinfo)131 gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
132                                         const struct gen_device_info *devinfo)
133 {
134    if (!(devinfo->gen >= 7 && devinfo->gen <= 12))
135       return;
136 
137    struct gen_perf_query_info *query =
138       gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
139 
140    query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
141    query->name = "Intel_Raw_Pipeline_Statistics_Query";
142 
143    /* The order has to match mdapi_pipeline_metrics. */
144    gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
145                                      "N vertices submitted");
146    gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
147                                      "N primitives submitted");
148    gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
149                                      "N vertex shader invocations");
150    gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
151                                      "N geometry shader invocations");
152    gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
153                                      "N geometry shader primitives emitted");
154    gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
155                                      "N primitives entering clipping");
156    gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
157                                      "N primitives leaving clipping");
158    if (devinfo->is_haswell || devinfo->gen == 8) {
159       gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
160                                   "N fragment shader invocations",
161                                   "N fragment shader invocations");
162    } else {
163       gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
164                                         "N fragment shader invocations");
165    }
166    gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
167                                      "N TCS shader invocations");
168    gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
169                                      "N TES shader invocations");
170    if (devinfo->gen >= 7) {
171       gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
172                                         "N compute shader invocations");
173    }
174 
175    if (devinfo->gen >= 10) {
176       /* Reuse existing CS invocation register until we can expose this new
177        * one.
178        */
179       gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
180                                         "Reserved1");
181    }
182 
183    query->data_size = sizeof(uint64_t) * query->n_counters;
184 }
185 
186 static void
fill_mdapi_perf_query_counter(struct gen_perf_query_info * query,const char * name,uint32_t data_offset,uint32_t data_size,enum gen_perf_counter_data_type data_type)187 fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
188                               const char *name,
189                               uint32_t data_offset,
190                               uint32_t data_size,
191                               enum gen_perf_counter_data_type data_type)
192 {
193    struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
194 
195    assert(query->n_counters <= query->max_counters);
196 
197    counter->name = name;
198    counter->desc = "Raw counter value";
199    counter->type = GEN_PERF_COUNTER_TYPE_RAW;
200    counter->data_type = data_type;
201    counter->offset = data_offset;
202 
203    query->n_counters++;
204 
205    assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
206 }
207 
208 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
209    fill_mdapi_perf_query_counter(query, #field_name,                    \
210                                  (uint8_t *) &struct_name.field_name -  \
211                                  (uint8_t *) &struct_name,              \
212                                  sizeof(struct_name.field_name),        \
213                                  GEN_PERF_COUNTER_DATA_TYPE_##type_name)
214 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
215    fill_mdapi_perf_query_counter(query,                                 \
216                                  ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
217                                  (uint8_t *) &struct_name.field_name[idx] - \
218                                  (uint8_t *) &struct_name,              \
219                                  sizeof(struct_name.field_name[0]),     \
220                                  GEN_PERF_COUNTER_DATA_TYPE_##type_name)
221 
222 void
gen_perf_register_mdapi_oa_query(struct gen_perf_config * perf,const struct gen_device_info * devinfo)223 gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
224                                  const struct gen_device_info *devinfo)
225 {
226    struct gen_perf_query_info *query = NULL;
227 
228    /* MDAPI requires different structures for pretty much every generation
229     * (right now we have definitions for gen 7 to 12).
230     */
231    if (!(devinfo->gen >= 7 && devinfo->gen <= 12))
232       return;
233 
234    switch (devinfo->gen) {
235    case 7: {
236       query = gen_perf_append_query_info(perf, 1 + 45 + 16 + 7);
237       query->oa_format = I915_OA_FORMAT_A45_B8_C8;
238 
239       struct gen7_mdapi_metrics metric_data;
240       query->data_size = sizeof(metric_data);
241 
242       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
243       for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
244          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
245                                        metric_data, ACounters, i, UINT64);
246       }
247       for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
248          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
249                                        metric_data, NOACounters, i, UINT64);
250       }
251       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
252       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
253       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
254       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
255       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
256       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
257       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
258       break;
259    }
260    case 8: {
261       query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16);
262       query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
263 
264       struct gen8_mdapi_metrics metric_data;
265       query->data_size = sizeof(metric_data);
266 
267       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
268       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
269       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
270          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
271                                        metric_data, OaCntr, i, UINT64);
272       }
273       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
274          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
275                                        metric_data, NoaCntr, i, UINT64);
276       }
277       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
278       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
279       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
280       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
281       MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
282       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
283       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
284       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
285       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
286       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
287       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
288       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
289       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
290       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
291       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
292       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
293       break;
294    }
295    case 9:
296    case 11:
297    case 12: {
298       query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
299       query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
300 
301       struct gen9_mdapi_metrics metric_data;
302       query->data_size = sizeof(metric_data);
303 
304       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
305       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
306       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
307          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
308                                        metric_data, OaCntr, i, UINT64);
309       }
310       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
311          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
312                                        metric_data, NoaCntr, i, UINT64);
313       }
314       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
315       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
316       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
317       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
318       MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
319       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
320       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
321       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
322       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
323       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
324       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
325       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
326       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
327       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
328       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
329       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
330       for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
331          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
332                                        metric_data, UserCntr, i, UINT64);
333       }
334       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
335       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
336       break;
337    }
338    default:
339       unreachable("Unsupported gen");
340       break;
341    }
342 
343    query->kind = GEN_PERF_QUERY_TYPE_RAW;
344    query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
345    query->guid = GEN_PERF_QUERY_GUID_MDAPI;
346 
347    {
348       /* Accumulation buffer offsets copied from an actual query... */
349       const struct gen_perf_query_info *copy_query =
350          &perf->queries[0];
351 
352       query->gpu_time_offset = copy_query->gpu_time_offset;
353       query->gpu_clock_offset = copy_query->gpu_clock_offset;
354       query->a_offset = copy_query->a_offset;
355       query->b_offset = copy_query->b_offset;
356       query->c_offset = copy_query->c_offset;
357    }
358 }
359