1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 
28 #include "anv_private.h"
29 #include "vk_util.h"
30 
31 #include "perf/gen_perf.h"
32 #include "perf/gen_perf_mdapi.h"
33 
34 #include "util/mesa-sha1.h"
35 
36 struct gen_perf_config *
anv_get_perf(const struct gen_device_info * devinfo,int fd)37 anv_get_perf(const struct gen_device_info *devinfo, int fd)
38 {
39    /* We need self modifying batches. The i915 parser prevents it on
40     * Gen7.5 :( maybe one day.
41     */
42    if (devinfo->gen < 8)
43       return NULL;
44 
45    struct gen_perf_config *perf = gen_perf_new(NULL);
46 
47    gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */);
48 
49    if (!perf->n_queries) {
50       if (perf->platform_supported)
51          mesa_logw("Performance support disabled, "
52                    "consider sysctl dev.i915.perf_stream_paranoid=0\n");
53       goto err;
54    }
55 
56    /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
57     * perf revision 2.
58     */
59    if (perf->i915_perf_version < 3)
60       goto err;
61 
62    return perf;
63 
64  err:
65    ralloc_free(perf);
66    return NULL;
67 }
68 
69 void
anv_device_perf_init(struct anv_device * device)70 anv_device_perf_init(struct anv_device *device)
71 {
72    device->perf_fd = -1;
73 }
74 
75 static int
anv_device_perf_open(struct anv_device * device,uint64_t metric_id)76 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
77 {
78    uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
79    struct drm_i915_perf_open_param param;
80    int p = 0, stream_fd;
81 
82    properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
83    properties[p++] = true;
84 
85    properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
86    properties[p++] = metric_id;
87 
88    properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
89    properties[p++] = device->info.gen >= 8 ?
90       I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
91       I915_OA_FORMAT_A45_B8_C8;
92 
93    properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
94    properties[p++] = 31; /* slowest sampling period */
95 
96    properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
97    properties[p++] = device->context_id;
98 
99    properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
100    properties[p++] = true;
101 
102    /* If global SSEU is available, pin it to the default. This will ensure on
103     * Gen11 for instance we use the full EU array. Initially when perf was
104     * enabled we would use only half on Gen11 because of functional
105     * requirements.
106     */
107    if (device->physical->perf->i915_perf_version >= 4) {
108       properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
109       properties[p++] = (uintptr_t) &device->physical->perf->sseu;
110    }
111 
112    memset(&param, 0, sizeof(param));
113    param.flags = 0;
114    param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
115    param.properties_ptr = (uintptr_t)properties;
116    param.num_properties = p / 2;
117 
118    stream_fd = gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
119    return stream_fd;
120 }
121 
122 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)123 VkResult anv_InitializePerformanceApiINTEL(
124     VkDevice                                    _device,
125     const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
126 {
127    ANV_FROM_HANDLE(anv_device, device, _device);
128 
129    if (!device->physical->perf)
130       return VK_ERROR_EXTENSION_NOT_PRESENT;
131 
132    /* Not much to do here */
133    return VK_SUCCESS;
134 }
135 
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)136 VkResult anv_GetPerformanceParameterINTEL(
137     VkDevice                                    _device,
138     VkPerformanceParameterTypeINTEL             parameter,
139     VkPerformanceValueINTEL*                    pValue)
140 {
141       ANV_FROM_HANDLE(anv_device, device, _device);
142 
143       if (!device->physical->perf)
144          return VK_ERROR_EXTENSION_NOT_PRESENT;
145 
146       VkResult result = VK_SUCCESS;
147       switch (parameter) {
148       case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
149          pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
150          pValue->data.valueBool = VK_TRUE;
151          break;
152 
153       case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
154          pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
155          pValue->data.value32 = 25;
156          break;
157 
158       default:
159          result = VK_ERROR_FEATURE_NOT_PRESENT;
160          break;
161       }
162 
163       return result;
164 }
165 
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)166 VkResult anv_CmdSetPerformanceMarkerINTEL(
167     VkCommandBuffer                             commandBuffer,
168     const VkPerformanceMarkerInfoINTEL*         pMarkerInfo)
169 {
170    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
171 
172    cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
173 
174    return VK_SUCCESS;
175 }
176 
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)177 VkResult anv_AcquirePerformanceConfigurationINTEL(
178     VkDevice                                    _device,
179     const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
180     VkPerformanceConfigurationINTEL*            pConfiguration)
181 {
182    ANV_FROM_HANDLE(anv_device, device, _device);
183    struct anv_performance_configuration_intel *config;
184 
185    config = vk_alloc(&device->vk.alloc, sizeof(*config), 8,
186                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
187    if (!config)
188       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
189 
190    if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
191       config->register_config =
192          gen_perf_load_configuration(device->physical->perf, device->fd,
193                                      GEN_PERF_QUERY_GUID_MDAPI);
194       if (!config->register_config) {
195          vk_free(&device->vk.alloc, config);
196          return VK_INCOMPLETE;
197       }
198 
199       int ret =
200          gen_perf_store_configuration(device->physical->perf, device->fd,
201                                       config->register_config, NULL /* guid */);
202       if (ret < 0) {
203          ralloc_free(config->register_config);
204          vk_free(&device->vk.alloc, config);
205          return VK_INCOMPLETE;
206       }
207 
208       config->config_id = ret;
209    }
210 
211    vk_object_base_init(&device->vk, &config->base,
212                        VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
213 
214    *pConfiguration = anv_performance_configuration_intel_to_handle(config);
215 
216    return VK_SUCCESS;
217 }
218 
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)219 VkResult anv_ReleasePerformanceConfigurationINTEL(
220     VkDevice                                    _device,
221     VkPerformanceConfigurationINTEL             _configuration)
222 {
223    ANV_FROM_HANDLE(anv_device, device, _device);
224    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
225 
226    if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG))
227       gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
228 
229    ralloc_free(config->register_config);
230    vk_object_base_finish(&config->base);
231    vk_free(&device->vk.alloc, config);
232 
233    return VK_SUCCESS;
234 }
235 
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)236 VkResult anv_QueueSetPerformanceConfigurationINTEL(
237     VkQueue                                     _queue,
238     VkPerformanceConfigurationINTEL             _configuration)
239 {
240    ANV_FROM_HANDLE(anv_queue, queue, _queue);
241    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
242    struct anv_device *device = queue->device;
243 
244    if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
245       if (device->perf_fd < 0) {
246          device->perf_fd = anv_device_perf_open(device, config->config_id);
247          if (device->perf_fd < 0)
248             return VK_ERROR_INITIALIZATION_FAILED;
249       } else {
250          int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
251                           (void *)(uintptr_t) config->config_id);
252          if (ret < 0)
253             return anv_device_set_lost(device, "i915-perf config failed: %m");
254       }
255    }
256 
257    return VK_SUCCESS;
258 }
259 
anv_UninitializePerformanceApiINTEL(VkDevice _device)260 void anv_UninitializePerformanceApiINTEL(
261     VkDevice                                    _device)
262 {
263    ANV_FROM_HANDLE(anv_device, device, _device);
264 
265    if (device->perf_fd >= 0) {
266       close(device->perf_fd);
267       device->perf_fd = -1;
268    }
269 }
270 
271 /* VK_KHR_performance_query */
272 static const VkPerformanceCounterUnitKHR
273 gen_perf_counter_unit_to_vk_unit[] = {
274    [GEN_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
275    [GEN_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
276    [GEN_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
277    [GEN_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
278    [GEN_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
279    [GEN_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
280    [GEN_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
281    [GEN_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
282    [GEN_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
283    [GEN_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
284    [GEN_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
285    [GEN_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
286    [GEN_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
287    [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
288    [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
289    [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
290    [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
291 };
292 
293 static const VkPerformanceCounterStorageKHR
294 gen_perf_counter_data_type_to_vk_storage[] = {
295    [GEN_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
296    [GEN_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
297    [GEN_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
298    [GEN_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
299    [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
300 };
301 
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)302 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
303     VkPhysicalDevice                            physicalDevice,
304     uint32_t                                    queueFamilyIndex,
305     uint32_t*                                   pCounterCount,
306     VkPerformanceCounterKHR*                    pCounters,
307     VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
308 {
309    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
310    struct gen_perf_config *perf = pdevice->perf;
311 
312    uint32_t desc_count = *pCounterCount;
313 
314    VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
315    VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
316 
317    for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
318       const struct gen_perf_query_counter *gen_counter = perf->counter_infos[c].counter;
319 
320       vk_outarray_append(&out, counter) {
321          counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units];
322          counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
323          counter->storage = gen_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
324 
325          unsigned char sha1_result[20];
326          _mesa_sha1_compute(gen_counter->symbol_name,
327                             strlen(gen_counter->symbol_name),
328                             sha1_result);
329          memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
330       }
331 
332       vk_outarray_append(&out_desc, desc) {
333          desc->flags = 0; /* None so far. */
334          snprintf(desc->name, sizeof(desc->name), "%s", gen_counter->name);
335          snprintf(desc->category, sizeof(desc->category), "%s", gen_counter->category);
336          snprintf(desc->description, sizeof(desc->description), "%s", gen_counter->desc);
337       }
338    }
339 
340    return vk_outarray_status(&out);
341 }
342 
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)343 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
344     VkPhysicalDevice                            physicalDevice,
345     const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
346     uint32_t*                                   pNumPasses)
347 {
348    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
349    struct gen_perf_config *perf = pdevice->perf;
350 
351    if (!perf) {
352       *pNumPasses = 0;
353       return;
354    }
355 
356    *pNumPasses = gen_perf_get_n_passes(perf,
357                                        pPerformanceQueryCreateInfo->pCounterIndices,
358                                        pPerformanceQueryCreateInfo->counterIndexCount,
359                                        NULL);
360 }
361 
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)362 VkResult anv_AcquireProfilingLockKHR(
363     VkDevice                                    _device,
364     const VkAcquireProfilingLockInfoKHR*        pInfo)
365 {
366    ANV_FROM_HANDLE(anv_device, device, _device);
367    struct gen_perf_config *perf = device->physical->perf;
368    struct gen_perf_query_info *first_metric_set = &perf->queries[0];
369    int fd = -1;
370 
371    assert(device->perf_fd == -1);
372 
373    if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
374       fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
375       if (fd < 0)
376          return VK_TIMEOUT;
377    }
378 
379    device->perf_fd = fd;
380    return VK_SUCCESS;
381 }
382 
anv_ReleaseProfilingLockKHR(VkDevice _device)383 void anv_ReleaseProfilingLockKHR(
384     VkDevice                                    _device)
385 {
386    ANV_FROM_HANDLE(anv_device, device, _device);
387 
388    if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
389       assert(device->perf_fd >= 0);
390       close(device->perf_fd);
391    }
392    device->perf_fd = -1;
393 }
394 
395 void
anv_perf_write_pass_results(struct gen_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct gen_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)396 anv_perf_write_pass_results(struct gen_perf_config *perf,
397                             struct anv_query_pool *pool, uint32_t pass,
398                             const struct gen_perf_query_result *accumulated_results,
399                             union VkPerformanceCounterResultKHR *results)
400 {
401    for (uint32_t c = 0; c < pool->n_counters; c++) {
402       const struct gen_perf_counter_pass *counter_pass = &pool->counter_pass[c];
403 
404       if (counter_pass->pass != pass)
405          continue;
406 
407       switch (pool->pass_query[pass]->kind) {
408       case GEN_PERF_QUERY_TYPE_PIPELINE: {
409          assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
410          uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
411          results[c].uint64 = accumulated_results->accumulator[accu_offset];
412          break;
413       }
414 
415       case GEN_PERF_QUERY_TYPE_OA:
416       case GEN_PERF_QUERY_TYPE_RAW:
417          switch (counter_pass->counter->data_type) {
418          case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
419             results[c].uint64 =
420                counter_pass->counter->oa_counter_read_uint64(perf,
421                                                              counter_pass->query,
422                                                              accumulated_results->accumulator);
423             break;
424          case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
425             results[c].float32 =
426                counter_pass->counter->oa_counter_read_float(perf,
427                                                             counter_pass->query,
428                                                             accumulated_results->accumulator);
429             break;
430          default:
431             /* So far we aren't using uint32, double or bool32... */
432             unreachable("unexpected counter data type");
433          }
434          break;
435 
436       default:
437          unreachable("invalid query type");
438       }
439 
440       /* The Vulkan extension only has nanoseconds as a unit */
441       if (counter_pass->counter->units == GEN_PERF_COUNTER_UNITS_US) {
442          assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
443          results[c].uint64 *= 1000;
444       }
445    }
446 }
447