1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27
28 #include "anv_private.h"
29 #include "vk_util.h"
30
31 #include "perf/gen_perf.h"
32 #include "perf/gen_perf_mdapi.h"
33
34 #include "util/mesa-sha1.h"
35
36 struct gen_perf_config *
anv_get_perf(const struct gen_device_info * devinfo,int fd)37 anv_get_perf(const struct gen_device_info *devinfo, int fd)
38 {
39 /* We need self modifying batches. The i915 parser prevents it on
40 * Gen7.5 :( maybe one day.
41 */
42 if (devinfo->gen < 8)
43 return NULL;
44
45 struct gen_perf_config *perf = gen_perf_new(NULL);
46
47 gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */);
48
49 if (!perf->n_queries) {
50 if (perf->platform_supported)
51 mesa_logw("Performance support disabled, "
52 "consider sysctl dev.i915.perf_stream_paranoid=0\n");
53 goto err;
54 }
55
56 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
57 * perf revision 2.
58 */
59 if (perf->i915_perf_version < 3)
60 goto err;
61
62 return perf;
63
64 err:
65 ralloc_free(perf);
66 return NULL;
67 }
68
69 void
anv_device_perf_init(struct anv_device * device)70 anv_device_perf_init(struct anv_device *device)
71 {
72 device->perf_fd = -1;
73 }
74
75 static int
anv_device_perf_open(struct anv_device * device,uint64_t metric_id)76 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
77 {
78 uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
79 struct drm_i915_perf_open_param param;
80 int p = 0, stream_fd;
81
82 properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
83 properties[p++] = true;
84
85 properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
86 properties[p++] = metric_id;
87
88 properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
89 properties[p++] = device->info.gen >= 8 ?
90 I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
91 I915_OA_FORMAT_A45_B8_C8;
92
93 properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
94 properties[p++] = 31; /* slowest sampling period */
95
96 properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
97 properties[p++] = device->context_id;
98
99 properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
100 properties[p++] = true;
101
102 /* If global SSEU is available, pin it to the default. This will ensure on
103 * Gen11 for instance we use the full EU array. Initially when perf was
104 * enabled we would use only half on Gen11 because of functional
105 * requirements.
106 */
107 if (device->physical->perf->i915_perf_version >= 4) {
108 properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
109 properties[p++] = (uintptr_t) &device->physical->perf->sseu;
110 }
111
112 memset(¶m, 0, sizeof(param));
113 param.flags = 0;
114 param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
115 param.properties_ptr = (uintptr_t)properties;
116 param.num_properties = p / 2;
117
118 stream_fd = gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
119 return stream_fd;
120 }
121
122 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)123 VkResult anv_InitializePerformanceApiINTEL(
124 VkDevice _device,
125 const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
126 {
127 ANV_FROM_HANDLE(anv_device, device, _device);
128
129 if (!device->physical->perf)
130 return VK_ERROR_EXTENSION_NOT_PRESENT;
131
132 /* Not much to do here */
133 return VK_SUCCESS;
134 }
135
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)136 VkResult anv_GetPerformanceParameterINTEL(
137 VkDevice _device,
138 VkPerformanceParameterTypeINTEL parameter,
139 VkPerformanceValueINTEL* pValue)
140 {
141 ANV_FROM_HANDLE(anv_device, device, _device);
142
143 if (!device->physical->perf)
144 return VK_ERROR_EXTENSION_NOT_PRESENT;
145
146 VkResult result = VK_SUCCESS;
147 switch (parameter) {
148 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
149 pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
150 pValue->data.valueBool = VK_TRUE;
151 break;
152
153 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
154 pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
155 pValue->data.value32 = 25;
156 break;
157
158 default:
159 result = VK_ERROR_FEATURE_NOT_PRESENT;
160 break;
161 }
162
163 return result;
164 }
165
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)166 VkResult anv_CmdSetPerformanceMarkerINTEL(
167 VkCommandBuffer commandBuffer,
168 const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
169 {
170 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
171
172 cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
173
174 return VK_SUCCESS;
175 }
176
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)177 VkResult anv_AcquirePerformanceConfigurationINTEL(
178 VkDevice _device,
179 const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
180 VkPerformanceConfigurationINTEL* pConfiguration)
181 {
182 ANV_FROM_HANDLE(anv_device, device, _device);
183 struct anv_performance_configuration_intel *config;
184
185 config = vk_alloc(&device->vk.alloc, sizeof(*config), 8,
186 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
187 if (!config)
188 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
189
190 if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
191 config->register_config =
192 gen_perf_load_configuration(device->physical->perf, device->fd,
193 GEN_PERF_QUERY_GUID_MDAPI);
194 if (!config->register_config) {
195 vk_free(&device->vk.alloc, config);
196 return VK_INCOMPLETE;
197 }
198
199 int ret =
200 gen_perf_store_configuration(device->physical->perf, device->fd,
201 config->register_config, NULL /* guid */);
202 if (ret < 0) {
203 ralloc_free(config->register_config);
204 vk_free(&device->vk.alloc, config);
205 return VK_INCOMPLETE;
206 }
207
208 config->config_id = ret;
209 }
210
211 vk_object_base_init(&device->vk, &config->base,
212 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
213
214 *pConfiguration = anv_performance_configuration_intel_to_handle(config);
215
216 return VK_SUCCESS;
217 }
218
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)219 VkResult anv_ReleasePerformanceConfigurationINTEL(
220 VkDevice _device,
221 VkPerformanceConfigurationINTEL _configuration)
222 {
223 ANV_FROM_HANDLE(anv_device, device, _device);
224 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
225
226 if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG))
227 gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
228
229 ralloc_free(config->register_config);
230 vk_object_base_finish(&config->base);
231 vk_free(&device->vk.alloc, config);
232
233 return VK_SUCCESS;
234 }
235
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)236 VkResult anv_QueueSetPerformanceConfigurationINTEL(
237 VkQueue _queue,
238 VkPerformanceConfigurationINTEL _configuration)
239 {
240 ANV_FROM_HANDLE(anv_queue, queue, _queue);
241 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
242 struct anv_device *device = queue->device;
243
244 if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
245 if (device->perf_fd < 0) {
246 device->perf_fd = anv_device_perf_open(device, config->config_id);
247 if (device->perf_fd < 0)
248 return VK_ERROR_INITIALIZATION_FAILED;
249 } else {
250 int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
251 (void *)(uintptr_t) config->config_id);
252 if (ret < 0)
253 return anv_device_set_lost(device, "i915-perf config failed: %m");
254 }
255 }
256
257 return VK_SUCCESS;
258 }
259
anv_UninitializePerformanceApiINTEL(VkDevice _device)260 void anv_UninitializePerformanceApiINTEL(
261 VkDevice _device)
262 {
263 ANV_FROM_HANDLE(anv_device, device, _device);
264
265 if (device->perf_fd >= 0) {
266 close(device->perf_fd);
267 device->perf_fd = -1;
268 }
269 }
270
271 /* VK_KHR_performance_query */
272 static const VkPerformanceCounterUnitKHR
273 gen_perf_counter_unit_to_vk_unit[] = {
274 [GEN_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
275 [GEN_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
276 [GEN_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
277 [GEN_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
278 [GEN_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
279 [GEN_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
280 [GEN_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
281 [GEN_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
282 [GEN_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
283 [GEN_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
284 [GEN_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
285 [GEN_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
286 [GEN_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
287 [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
288 [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
289 [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
290 [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
291 };
292
293 static const VkPerformanceCounterStorageKHR
294 gen_perf_counter_data_type_to_vk_storage[] = {
295 [GEN_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
296 [GEN_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
297 [GEN_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
298 [GEN_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
299 [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
300 };
301
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)302 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
303 VkPhysicalDevice physicalDevice,
304 uint32_t queueFamilyIndex,
305 uint32_t* pCounterCount,
306 VkPerformanceCounterKHR* pCounters,
307 VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
308 {
309 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
310 struct gen_perf_config *perf = pdevice->perf;
311
312 uint32_t desc_count = *pCounterCount;
313
314 VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
315 VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
316
317 for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
318 const struct gen_perf_query_counter *gen_counter = perf->counter_infos[c].counter;
319
320 vk_outarray_append(&out, counter) {
321 counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units];
322 counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
323 counter->storage = gen_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
324
325 unsigned char sha1_result[20];
326 _mesa_sha1_compute(gen_counter->symbol_name,
327 strlen(gen_counter->symbol_name),
328 sha1_result);
329 memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
330 }
331
332 vk_outarray_append(&out_desc, desc) {
333 desc->flags = 0; /* None so far. */
334 snprintf(desc->name, sizeof(desc->name), "%s", gen_counter->name);
335 snprintf(desc->category, sizeof(desc->category), "%s", gen_counter->category);
336 snprintf(desc->description, sizeof(desc->description), "%s", gen_counter->desc);
337 }
338 }
339
340 return vk_outarray_status(&out);
341 }
342
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)343 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
344 VkPhysicalDevice physicalDevice,
345 const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
346 uint32_t* pNumPasses)
347 {
348 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
349 struct gen_perf_config *perf = pdevice->perf;
350
351 if (!perf) {
352 *pNumPasses = 0;
353 return;
354 }
355
356 *pNumPasses = gen_perf_get_n_passes(perf,
357 pPerformanceQueryCreateInfo->pCounterIndices,
358 pPerformanceQueryCreateInfo->counterIndexCount,
359 NULL);
360 }
361
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)362 VkResult anv_AcquireProfilingLockKHR(
363 VkDevice _device,
364 const VkAcquireProfilingLockInfoKHR* pInfo)
365 {
366 ANV_FROM_HANDLE(anv_device, device, _device);
367 struct gen_perf_config *perf = device->physical->perf;
368 struct gen_perf_query_info *first_metric_set = &perf->queries[0];
369 int fd = -1;
370
371 assert(device->perf_fd == -1);
372
373 if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
374 fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
375 if (fd < 0)
376 return VK_TIMEOUT;
377 }
378
379 device->perf_fd = fd;
380 return VK_SUCCESS;
381 }
382
anv_ReleaseProfilingLockKHR(VkDevice _device)383 void anv_ReleaseProfilingLockKHR(
384 VkDevice _device)
385 {
386 ANV_FROM_HANDLE(anv_device, device, _device);
387
388 if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
389 assert(device->perf_fd >= 0);
390 close(device->perf_fd);
391 }
392 device->perf_fd = -1;
393 }
394
395 void
anv_perf_write_pass_results(struct gen_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct gen_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)396 anv_perf_write_pass_results(struct gen_perf_config *perf,
397 struct anv_query_pool *pool, uint32_t pass,
398 const struct gen_perf_query_result *accumulated_results,
399 union VkPerformanceCounterResultKHR *results)
400 {
401 for (uint32_t c = 0; c < pool->n_counters; c++) {
402 const struct gen_perf_counter_pass *counter_pass = &pool->counter_pass[c];
403
404 if (counter_pass->pass != pass)
405 continue;
406
407 switch (pool->pass_query[pass]->kind) {
408 case GEN_PERF_QUERY_TYPE_PIPELINE: {
409 assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
410 uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
411 results[c].uint64 = accumulated_results->accumulator[accu_offset];
412 break;
413 }
414
415 case GEN_PERF_QUERY_TYPE_OA:
416 case GEN_PERF_QUERY_TYPE_RAW:
417 switch (counter_pass->counter->data_type) {
418 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
419 results[c].uint64 =
420 counter_pass->counter->oa_counter_read_uint64(perf,
421 counter_pass->query,
422 accumulated_results->accumulator);
423 break;
424 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
425 results[c].float32 =
426 counter_pass->counter->oa_counter_read_float(perf,
427 counter_pass->query,
428 accumulated_results->accumulator);
429 break;
430 default:
431 /* So far we aren't using uint32, double or bool32... */
432 unreachable("unexpected counter data type");
433 }
434 break;
435
436 default:
437 unreachable("invalid query type");
438 }
439
440 /* The Vulkan extension only has nanoseconds as a unit */
441 if (counter_pass->counter->units == GEN_PERF_COUNTER_UNITS_US) {
442 assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
443 results[c].uint64 *= 1000;
444 }
445 }
446 }
447