1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include "dirent.h"
29 
30 #include <stdatomic.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <fcntl.h>
35 
36 #include "radv_debug.h"
37 #include "radv_private.h"
38 #include "radv_shader.h"
39 #include "radv_cs.h"
40 #include "util/disk_cache.h"
41 #include "vk_util.h"
42 #include <xf86drm.h>
43 #include <amdgpu.h>
44 #include "drm-uapi/amdgpu_drm.h"
45 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
46 #include "winsys/null/radv_null_winsys_public.h"
47 #include "ac_llvm_util.h"
48 #include "vk_format.h"
49 #include "sid.h"
50 #include "git_sha1.h"
51 #include "util/build_id.h"
52 #include "util/debug.h"
53 #include "util/mesa-sha1.h"
54 #include "util/timespec.h"
55 #include "util/u_atomic.h"
56 #include "compiler/glsl_types.h"
57 #include "util/driconf.h"
58 
59 /* The number of IBs per submit isn't infinite, it depends on the ring type
60  * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
61  * This limit is arbitrary but should be safe for now.  Ideally, we should get
62  * this limit from the KMD.
63 */
64 #define RADV_MAX_IBS_PER_SUBMIT 192
65 
66 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
67 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
68 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
69 #endif
70 
71 static struct radv_timeline_point *
72 radv_timeline_find_point_at_least_locked(struct radv_device *device,
73                                          struct radv_timeline *timeline,
74                                          uint64_t p);
75 
76 static struct radv_timeline_point *
77 radv_timeline_add_point_locked(struct radv_device *device,
78                                struct radv_timeline *timeline,
79                                uint64_t p);
80 
81 static void
82 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
83                                      struct list_head *processing_list);
84 
85 static
86 void radv_destroy_semaphore_part(struct radv_device *device,
87                                  struct radv_semaphore_part *part);
88 
89 static VkResult
90 radv_create_pthread_cond(pthread_cond_t *cond);
91 
radv_get_current_time(void)92 uint64_t radv_get_current_time(void)
93 {
94 	struct timespec tv;
95 	clock_gettime(CLOCK_MONOTONIC, &tv);
96 	return tv.tv_nsec + tv.tv_sec*1000000000ull;
97 }
98 
radv_get_absolute_timeout(uint64_t timeout)99 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
100 {
101 	uint64_t current_time = radv_get_current_time();
102 
103 	timeout = MIN2(UINT64_MAX - current_time, timeout);
104 
105 	return current_time + timeout;
106 }
107 
108 static int
radv_device_get_cache_uuid(enum radeon_family family,void * uuid)109 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
110 {
111 	struct mesa_sha1 ctx;
112 	unsigned char sha1[20];
113 	unsigned ptr_size = sizeof(void*);
114 
115 	memset(uuid, 0, VK_UUID_SIZE);
116 	_mesa_sha1_init(&ctx);
117 
118 	if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
119 	    !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
120 		return -1;
121 
122 	_mesa_sha1_update(&ctx, &family, sizeof(family));
123 	_mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
124 	_mesa_sha1_final(&ctx, sha1);
125 
126 	memcpy(uuid, sha1, VK_UUID_SIZE);
127 	return 0;
128 }
129 
130 static void
radv_get_driver_uuid(void * uuid)131 radv_get_driver_uuid(void *uuid)
132 {
133 	ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
134 }
135 
136 static void
radv_get_device_uuid(struct radeon_info * info,void * uuid)137 radv_get_device_uuid(struct radeon_info *info, void *uuid)
138 {
139 	ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
140 }
141 
142 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * device)143 radv_get_adjusted_vram_size(struct radv_physical_device *device)
144 {
145 	int ov = driQueryOptioni(&device->instance->dri_options,
146 	                         "override_vram_size");
147 	if (ov >= 0)
148 		return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
149 	return device->rad_info.vram_size;
150 }
151 
152 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * device)153 radv_get_visible_vram_size(struct radv_physical_device *device)
154 {
155 	return MIN2(radv_get_adjusted_vram_size(device) , device->rad_info.vram_vis_size);
156 }
157 
158 static uint64_t
radv_get_vram_size(struct radv_physical_device * device)159 radv_get_vram_size(struct radv_physical_device *device)
160 {
161 	return radv_get_adjusted_vram_size(device) - device->rad_info.vram_vis_size;
162 }
163 
164 enum radv_heap {
165 	RADV_HEAP_VRAM     = 1 << 0,
166 	RADV_HEAP_GTT      = 1 << 1,
167 	RADV_HEAP_VRAM_VIS = 1 << 2,
168 	RADV_HEAP_MAX      = 1 << 3,
169 };
170 
171 static void
radv_physical_device_init_mem_types(struct radv_physical_device * device)172 radv_physical_device_init_mem_types(struct radv_physical_device *device)
173 {
174 	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
175 	uint64_t vram_size = radv_get_vram_size(device);
176 	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
177 	device->memory_properties.memoryHeapCount = 0;
178 	device->heaps = 0;
179 
180 	/* Only get a VRAM heap if it is significant, not if it is a 16 MiB
181 	 * remainder above visible VRAM. */
182 	if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
183 		vram_index = device->memory_properties.memoryHeapCount++;
184 		device->heaps |= RADV_HEAP_VRAM;
185 		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
186 			.size = vram_size,
187 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
188 		};
189 	}
190 
191 	if (device->rad_info.gart_size > 0) {
192 		gart_index = device->memory_properties.memoryHeapCount++;
193 		device->heaps |= RADV_HEAP_GTT;
194 		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
195 			.size = device->rad_info.gart_size,
196 			.flags = 0,
197 		};
198 	}
199 
200 	if (visible_vram_size) {
201 		visible_vram_index = device->memory_properties.memoryHeapCount++;
202 		device->heaps |= RADV_HEAP_VRAM_VIS;
203 		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
204 			.size = visible_vram_size,
205 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
206 		};
207 	}
208 
209 	unsigned type_count = 0;
210 
211 	if (vram_index >= 0 || visible_vram_index >= 0) {
212 		device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
213 		device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
214 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
215 			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
216 			.heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
217 		};
218 	}
219 
220 	if (gart_index >= 0) {
221 		device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
222 		device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
223 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
224 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
225 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
226 			.heapIndex = gart_index,
227 		};
228 	}
229 	if (visible_vram_index >= 0) {
230 		device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
231 		device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
232 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
233 			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
234 			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
235 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
236 			.heapIndex = visible_vram_index,
237 		};
238 	}
239 
240 	if (gart_index >= 0) {
241 		device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
242 		device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
243 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
244 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
245 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
246 			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
247 			.heapIndex = gart_index,
248 		};
249 	}
250 	device->memory_properties.memoryTypeCount = type_count;
251 
252 	if (device->rad_info.has_l2_uncached) {
253 		for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
254 			VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
255 
256 			if ((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
257 						       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
258 			    mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
259 
260 				VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
261 					VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
262 					VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
263 
264 				device->memory_domains[type_count] = device->memory_domains[i];
265 				device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
266 				device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
267 					.propertyFlags = property_flags,
268 					.heapIndex = mem_type.heapIndex,
269 				};
270 			}
271 		}
272 		device->memory_properties.memoryTypeCount = type_count;
273 	}
274 }
275 
276 static const char *
radv_get_compiler_string(struct radv_physical_device * pdevice)277 radv_get_compiler_string(struct radv_physical_device *pdevice)
278 {
279 	if (!pdevice->use_llvm) {
280 		/* Some games like SotTR apply shader workarounds if the LLVM
281 		 * version is too old or if the LLVM version string is
282 		 * missing. This gives 2-5% performance with SotTR and ACO.
283 		 */
284 		if (driQueryOptionb(&pdevice->instance->dri_options,
285 				    "radv_report_llvm9_version_string")) {
286 			return "ACO/LLVM 9.0.1";
287 		}
288 
289 		return "ACO";
290 	}
291 
292 	return "LLVM " MESA_LLVM_VERSION_STRING;
293 }
294 
295 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** device_out)296 radv_physical_device_try_create(struct radv_instance *instance,
297 				drmDevicePtr drm_device,
298 				struct radv_physical_device **device_out)
299 {
300 	VkResult result;
301 	int fd = -1;
302 	int master_fd = -1;
303 
304 	if (drm_device) {
305 		const char *path = drm_device->nodes[DRM_NODE_RENDER];
306 		drmVersionPtr version;
307 
308 		fd = open(path, O_RDWR | O_CLOEXEC);
309 		if (fd < 0) {
310 			if (instance->debug_flags & RADV_DEBUG_STARTUP)
311 				radv_logi("Could not open device '%s'", path);
312 
313 			return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
314 		}
315 
316 		version = drmGetVersion(fd);
317 		if (!version) {
318 			close(fd);
319 
320 			if (instance->debug_flags & RADV_DEBUG_STARTUP)
321 				radv_logi("Could not get the kernel driver version for device '%s'", path);
322 
323 			return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
324 					 "failed to get version %s: %m", path);
325 		}
326 
327 		if (strcmp(version->name, "amdgpu")) {
328 			drmFreeVersion(version);
329 			close(fd);
330 
331 			if (instance->debug_flags & RADV_DEBUG_STARTUP)
332 				radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
333 
334 			return VK_ERROR_INCOMPATIBLE_DRIVER;
335 		}
336 		drmFreeVersion(version);
337 
338 		if (instance->debug_flags & RADV_DEBUG_STARTUP)
339 				radv_logi("Found compatible device '%s'.", path);
340 	}
341 
342 	struct radv_physical_device *device =
343 		vk_zalloc2(&instance->alloc, NULL, sizeof(*device), 8,
344 			   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
345 	if (!device) {
346 		result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
347 		goto fail_fd;
348 	}
349 
350 	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
351 	device->instance = instance;
352 
353 	if (drm_device) {
354 		device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
355 						       instance->perftest_flags);
356 	} else {
357 		device->ws = radv_null_winsys_create();
358 	}
359 
360 	if (!device->ws) {
361 		result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
362 				   "failed to initialize winsys");
363 		goto fail_alloc;
364 	}
365 
366 	if (drm_device && instance->enabled_extensions.KHR_display) {
367 		master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
368 		if (master_fd >= 0) {
369 			uint32_t accel_working = 0;
370 			struct drm_amdgpu_info request = {
371 				.return_pointer = (uintptr_t)&accel_working,
372 				.return_size = sizeof(accel_working),
373 				.query = AMDGPU_INFO_ACCEL_WORKING
374 			};
375 
376 			if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
377 				close(master_fd);
378 				master_fd = -1;
379 			}
380 		}
381 	}
382 
383 	device->master_fd = master_fd;
384 	device->local_fd = fd;
385 	device->ws->query_info(device->ws, &device->rad_info);
386 
387 	device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
388 
389 	snprintf(device->name, sizeof(device->name),
390 		 "AMD RADV %s (%s)",
391 		 device->rad_info.name, radv_get_compiler_string(device));
392 
393 	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
394 		result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
395 				   "cannot generate UUID");
396 		goto fail_wsi;
397 	}
398 
399 	/* These flags affect shader compilation. */
400 	uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
401 
402 	/* The gpu id is already embedded in the uuid so we just pass "radv"
403 	 * when creating the cache.
404 	 */
405 	char buf[VK_UUID_SIZE * 2 + 1];
406 	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
407 	device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
408 
409 	if (device->rad_info.chip_class < GFX8 ||
410 	    device->rad_info.chip_class > GFX10)
411 		fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
412 
413 	radv_get_driver_uuid(&device->driver_uuid);
414 	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
415 
416 	device->out_of_order_rast_allowed = device->rad_info.has_out_of_order_rast &&
417 					    !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
418 
419 	device->dcc_msaa_allowed =
420 		(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
421 
422 	device->use_ngg = device->rad_info.chip_class >= GFX10 &&
423 			  device->rad_info.family != CHIP_NAVI14 &&
424 			  device->rad_info.has_dedicated_vram &&
425 			  !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
426 
427 	device->use_ngg_streamout = false;
428 
429 	/* Determine the number of threads per wave for all stages. */
430 	device->cs_wave_size = 64;
431 	device->ps_wave_size = 64;
432 	device->ge_wave_size = 64;
433 
434 	if (device->rad_info.chip_class >= GFX10) {
435 		if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
436 			device->cs_wave_size = 32;
437 
438 		/* For pixel shaders, wave64 is recommanded. */
439 		if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
440 			device->ps_wave_size = 32;
441 
442 		if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
443 			device->ge_wave_size = 32;
444 	}
445 
446 	radv_physical_device_init_mem_types(device);
447 
448 	radv_physical_device_get_supported_extensions(device,
449 						      &device->supported_extensions);
450 
451 	if (drm_device)
452 		device->bus_info = *drm_device->businfo.pci;
453 
454 	if ((device->instance->debug_flags & RADV_DEBUG_INFO))
455 		ac_print_gpu_info(&device->rad_info, stdout);
456 
457 	/* The WSI is structured as a layer on top of the driver, so this has
458 	 * to be the last part of initialization (at least until we get other
459 	 * semi-layers).
460 	 */
461 	result = radv_init_wsi(device);
462 	if (result != VK_SUCCESS) {
463 		vk_error(instance, result);
464 		goto fail_disk_cache;
465 	}
466 
467 	*device_out = device;
468 
469 	return VK_SUCCESS;
470 
471 fail_disk_cache:
472 	disk_cache_destroy(device->disk_cache);
473 fail_wsi:
474 	device->ws->destroy(device->ws);
475 fail_alloc:
476 	vk_free(&instance->alloc, device);
477 fail_fd:
478 	if (fd != -1)
479 		close(fd);
480 	if (master_fd != -1)
481 		close(master_fd);
482 	return result;
483 }
484 
485 static void
radv_physical_device_destroy(struct radv_physical_device * device)486 radv_physical_device_destroy(struct radv_physical_device *device)
487 {
488 	radv_finish_wsi(device);
489 	device->ws->destroy(device->ws);
490 	disk_cache_destroy(device->disk_cache);
491 	if (device->local_fd != -1)
492 		close(device->local_fd);
493 	if (device->master_fd != -1)
494 		close(device->master_fd);
495 	vk_free(&device->instance->alloc, device);
496 }
497 
498 static void *
default_alloc_func(void * pUserData,size_t size,size_t align,VkSystemAllocationScope allocationScope)499 default_alloc_func(void *pUserData, size_t size, size_t align,
500                    VkSystemAllocationScope allocationScope)
501 {
502 	return malloc(size);
503 }
504 
505 static void *
default_realloc_func(void * pUserData,void * pOriginal,size_t size,size_t align,VkSystemAllocationScope allocationScope)506 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
507                      size_t align, VkSystemAllocationScope allocationScope)
508 {
509 	return realloc(pOriginal, size);
510 }
511 
512 static void
default_free_func(void * pUserData,void * pMemory)513 default_free_func(void *pUserData, void *pMemory)
514 {
515 	free(pMemory);
516 }
517 
518 static const VkAllocationCallbacks default_alloc = {
519 	.pUserData = NULL,
520 	.pfnAllocation = default_alloc_func,
521 	.pfnReallocation = default_realloc_func,
522 	.pfnFree = default_free_func,
523 };
524 
525 static const struct debug_control radv_debug_options[] = {
526 	{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
527 	{"nodcc", RADV_DEBUG_NO_DCC},
528 	{"shaders", RADV_DEBUG_DUMP_SHADERS},
529 	{"nocache", RADV_DEBUG_NO_CACHE},
530 	{"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
531 	{"nohiz", RADV_DEBUG_NO_HIZ},
532 	{"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
533 	{"allbos", RADV_DEBUG_ALL_BOS},
534 	{"noibs", RADV_DEBUG_NO_IBS},
535 	{"spirv", RADV_DEBUG_DUMP_SPIRV},
536 	{"vmfaults", RADV_DEBUG_VM_FAULTS},
537 	{"zerovram", RADV_DEBUG_ZERO_VRAM},
538 	{"syncshaders", RADV_DEBUG_SYNC_SHADERS},
539 	{"preoptir", RADV_DEBUG_PREOPTIR},
540 	{"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
541 	{"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
542 	{"info", RADV_DEBUG_INFO},
543 	{"errors", RADV_DEBUG_ERRORS},
544 	{"startup", RADV_DEBUG_STARTUP},
545 	{"checkir", RADV_DEBUG_CHECKIR},
546 	{"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
547 	{"nobinning", RADV_DEBUG_NOBINNING},
548 	{"nongg", RADV_DEBUG_NO_NGG},
549 	{"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS},
550 	{"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
551 	{"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
552 	{"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
553 	{"llvm", RADV_DEBUG_LLVM},
554 	{"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
555 	{"hang", RADV_DEBUG_HANG},
556 	{"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
557 	{NULL, 0}
558 };
559 
560 const char *
radv_get_debug_option_name(int id)561 radv_get_debug_option_name(int id)
562 {
563 	assert(id < ARRAY_SIZE(radv_debug_options) - 1);
564 	return radv_debug_options[id].string;
565 }
566 
567 static const struct debug_control radv_perftest_options[] = {
568 	{"localbos", RADV_PERFTEST_LOCAL_BOS},
569 	{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
570 	{"bolist", RADV_PERFTEST_BO_LIST},
571 	{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
572 	{"cswave32", RADV_PERFTEST_CS_WAVE_32},
573 	{"pswave32", RADV_PERFTEST_PS_WAVE_32},
574 	{"gewave32", RADV_PERFTEST_GE_WAVE_32},
575 	{"dfsm", RADV_PERFTEST_DFSM},
576 	{NULL, 0}
577 };
578 
579 const char *
radv_get_perftest_option_name(int id)580 radv_get_perftest_option_name(int id)
581 {
582 	assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
583 	return radv_perftest_options[id].string;
584 }
585 
586 static void
radv_handle_per_app_options(struct radv_instance * instance,const VkApplicationInfo * info)587 radv_handle_per_app_options(struct radv_instance *instance,
588 			    const VkApplicationInfo *info)
589 {
590 	const char *name = info ? info->pApplicationName : NULL;
591 	const char *engine_name = info ? info->pEngineName : NULL;
592 
593 	if (name) {
594 		if (!strcmp(name, "DOOM_VFR")) {
595 			/* Work around a Doom VFR game bug */
596 			instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
597 		} else if (!strcmp(name, "Fledge")) {
598 			/*
599 			 * Zero VRAM for "The Surge 2"
600 			 *
601 			 * This avoid a hang when when rendering any level. Likely
602 			 * uninitialized data in an indirect draw.
603 			 */
604 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
605 		} else if (!strcmp(name, "No Man's Sky")) {
606 			/* Work around a NMS game bug */
607 			instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
608 		} else if (!strcmp(name, "DOOMEternal")) {
609 			/* Zero VRAM for Doom Eternal to fix rendering issues. */
610 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
611 		} else if (!strcmp(name, "ShadowOfTheTomb")) {
612 			/* Work around flickering foliage for native Shadow of the Tomb Raider
613 			 * on GFX10.3 */
614 			instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
615 		}
616 	}
617 
618 	if (engine_name) {
619 		if (!strcmp(engine_name, "vkd3d")) {
620 			/* Zero VRAM for all VKD3D (DX12->VK) games to fix
621 			 * rendering issues.
622 			 */
623 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
624 		} else if (!strcmp(engine_name, "Quantic Dream Engine")) {
625 			/* Fix various artifacts in Detroit: Become Human */
626 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM |
627 			                         RADV_DEBUG_DISCARD_TO_DEMOTE;
628 
629 			/* Fix rendering issues in Detroit: Become Human
630 			 * because the game uses render loops (it
631 			 * samples/renders from/to the same depth/stencil
632 			 * texture inside the same draw) without input
633 			 * attachments and that is invalid Vulkan usage.
634 			 */
635 			instance->disable_tc_compat_htile_in_general = true;
636 		}
637 	}
638 
639 	instance->enable_mrt_output_nan_fixup =
640 		driQueryOptionb(&instance->dri_options,
641 				"radv_enable_mrt_output_nan_fixup");
642 
643 	if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
644 		instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
645 }
646 
647 static const driOptionDescription radv_dri_options[] = {
648 	DRI_CONF_SECTION_PERFORMANCE
649 		DRI_CONF_ADAPTIVE_SYNC(true)
650 		DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
651 		DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
652 		DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
653 		DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
654 		DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
655 		DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
656 		DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
657 	DRI_CONF_SECTION_END
658 
659 	DRI_CONF_SECTION_DEBUG
660 		DRI_CONF_OVERRIDE_VRAM_SIZE()
661 		DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
662 	DRI_CONF_SECTION_END
663 };
664 
radv_init_dri_options(struct radv_instance * instance)665 static void  radv_init_dri_options(struct radv_instance *instance)
666 {
667 	driParseOptionInfo(&instance->available_dri_options, radv_dri_options, ARRAY_SIZE(radv_dri_options));
668 	driParseConfigFiles(&instance->dri_options,
669 	                    &instance->available_dri_options,
670 	                    0, "radv", NULL,
671 	                    instance->applicationName,
672 	                    instance->applicationVersion,
673 	                    instance->engineName,
674 	                    instance->engineVersion);
675 }
676 
radv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)677 VkResult radv_CreateInstance(
678 	const VkInstanceCreateInfo*                 pCreateInfo,
679 	const VkAllocationCallbacks*                pAllocator,
680 	VkInstance*                                 pInstance)
681 {
682 	struct radv_instance *instance;
683 	VkResult result;
684 
685 	instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
686 			      VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
687 	if (!instance)
688 		return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
689 
690 	vk_object_base_init(NULL, &instance->base, VK_OBJECT_TYPE_INSTANCE);
691 
692 	if (pAllocator)
693 		instance->alloc = *pAllocator;
694 	else
695 		instance->alloc = default_alloc;
696 
697 	if (pCreateInfo->pApplicationInfo) {
698 		const VkApplicationInfo *app = pCreateInfo->pApplicationInfo;
699 
700 		instance->applicationName =
701 			vk_strdup(&instance->alloc, app->pApplicationName,
702 				  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
703 		instance->applicationVersion = app->applicationVersion;
704 
705 		instance->engineName =
706 			vk_strdup(&instance->alloc, app->pEngineName,
707 				  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
708 		instance->engineVersion = app->engineVersion;
709 		instance->apiVersion = app->apiVersion;
710 	}
711 
712 	if (instance->apiVersion == 0)
713 		instance->apiVersion = VK_API_VERSION_1_0;
714 
715 	instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
716 						   radv_debug_options);
717 
718 	const char *radv_perftest_str = getenv("RADV_PERFTEST");
719 	instance->perftest_flags = parse_debug_string(radv_perftest_str,
720 						      radv_perftest_options);
721 
722 	if (radv_perftest_str) {
723 		/* Output warnings for famous RADV_PERFTEST options that no
724 		 * longer exist or are deprecated.
725 		 */
726 		if (strstr(radv_perftest_str, "aco")) {
727 			fprintf(stderr, "*******************************************************************************\n");
728 			fprintf(stderr, "* WARNING: Unknown option RADV_PERFTEST='aco'. ACO is enabled by default now. *\n");
729 			fprintf(stderr, "*******************************************************************************\n");
730 		}
731 		if (strstr(radv_perftest_str, "llvm")) {
732 			fprintf(stderr, "*********************************************************************************\n");
733 			fprintf(stderr, "* WARNING: Unknown option 'RADV_PERFTEST=llvm'. Did you mean 'RADV_DEBUG=llvm'? *\n");
734 			fprintf(stderr, "*********************************************************************************\n");
735 			abort();
736 		}
737 	}
738 
739 	if (instance->debug_flags & RADV_DEBUG_STARTUP)
740 		radv_logi("Created an instance");
741 
742 	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
743 		int idx;
744 		for (idx = 0; idx < RADV_INSTANCE_EXTENSION_COUNT; idx++) {
745 			if (!strcmp(pCreateInfo->ppEnabledExtensionNames[i],
746 				    radv_instance_extensions[idx].extensionName))
747 				break;
748 		}
749 
750 		if (idx >= RADV_INSTANCE_EXTENSION_COUNT ||
751 		    !radv_instance_extensions_supported.extensions[idx]) {
752 			vk_object_base_finish(&instance->base);
753 			vk_free2(&default_alloc, pAllocator, instance);
754 			return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
755 		}
756 
757 		instance->enabled_extensions.extensions[idx] = true;
758 	}
759 
760 	bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
761 
762 	for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) {
763 		/* Vulkan requires that entrypoints for extensions which have
764 		 * not been enabled must not be advertised.
765 		 */
766 		if (!unchecked &&
767 		    !radv_instance_entrypoint_is_enabled(i, instance->apiVersion,
768 							 &instance->enabled_extensions)) {
769 			instance->dispatch.entrypoints[i] = NULL;
770 		} else {
771 			instance->dispatch.entrypoints[i] =
772 				radv_instance_dispatch_table.entrypoints[i];
773 		}
774 	}
775 
776 	 for (unsigned i = 0; i < ARRAY_SIZE(instance->physical_device_dispatch.entrypoints); i++) {
777 		/* Vulkan requires that entrypoints for extensions which have
778 		 * not been enabled must not be advertised.
779 		 */
780 		if (!unchecked &&
781 		    !radv_physical_device_entrypoint_is_enabled(i, instance->apiVersion,
782 								&instance->enabled_extensions)) {
783 			instance->physical_device_dispatch.entrypoints[i] = NULL;
784 		} else {
785 			instance->physical_device_dispatch.entrypoints[i] =
786 				radv_physical_device_dispatch_table.entrypoints[i];
787 		}
788 	}
789 
790 	for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); i++) {
791 		/* Vulkan requires that entrypoints for extensions which have
792 		 * not been enabled must not be advertised.
793 		 */
794 		if (!unchecked &&
795 		    !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
796 						       &instance->enabled_extensions, NULL)) {
797 			instance->device_dispatch.entrypoints[i] = NULL;
798 		} else {
799 			instance->device_dispatch.entrypoints[i] =
800 				radv_device_dispatch_table.entrypoints[i];
801 		}
802 	}
803 
804 	instance->physical_devices_enumerated = false;
805 	list_inithead(&instance->physical_devices);
806 
807 	result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
808 	if (result != VK_SUCCESS) {
809 		vk_object_base_finish(&instance->base);
810 		vk_free2(&default_alloc, pAllocator, instance);
811 		return vk_error(instance, result);
812 	}
813 
814 	glsl_type_singleton_init_or_ref();
815 
816 	VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
817 
818 	radv_init_dri_options(instance);
819 	radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
820 
821 	*pInstance = radv_instance_to_handle(instance);
822 
823 	return VK_SUCCESS;
824 }
825 
radv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)826 void radv_DestroyInstance(
827 	VkInstance                                  _instance,
828 	const VkAllocationCallbacks*                pAllocator)
829 {
830 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
831 
832 	if (!instance)
833 		return;
834 
835 	list_for_each_entry_safe(struct radv_physical_device, pdevice,
836 				 &instance->physical_devices, link) {
837 		radv_physical_device_destroy(pdevice);
838 	}
839 
840 	vk_free(&instance->alloc, instance->engineName);
841 	vk_free(&instance->alloc, instance->applicationName);
842 
843 	VG(VALGRIND_DESTROY_MEMPOOL(instance));
844 
845 	glsl_type_singleton_decref();
846 
847 	driDestroyOptionCache(&instance->dri_options);
848 	driDestroyOptionInfo(&instance->available_dri_options);
849 
850 	vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
851 
852 	vk_object_base_finish(&instance->base);
853 	vk_free(&instance->alloc, instance);
854 }
855 
856 static VkResult
radv_enumerate_physical_devices(struct radv_instance * instance)857 radv_enumerate_physical_devices(struct radv_instance *instance)
858 {
859 	if (instance->physical_devices_enumerated)
860 		return VK_SUCCESS;
861 
862 	instance->physical_devices_enumerated = true;
863 
864 	/* TODO: Check for more devices ? */
865 	drmDevicePtr devices[8];
866 	VkResult result = VK_SUCCESS;
867 	int max_devices;
868 
869 	if (getenv("RADV_FORCE_FAMILY")) {
870 		/* When RADV_FORCE_FAMILY is set, the driver creates a nul
871 		 * device that allows to test the compiler without having an
872 		 * AMDGPU instance.
873 		 */
874 		struct radv_physical_device *pdevice;
875 
876 		result = radv_physical_device_try_create(instance, NULL, &pdevice);
877 		if (result != VK_SUCCESS)
878 			return result;
879 
880 		list_addtail(&pdevice->link, &instance->physical_devices);
881 		return VK_SUCCESS;
882 	}
883 
884 	max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
885 
886 	if (instance->debug_flags & RADV_DEBUG_STARTUP)
887 		radv_logi("Found %d drm nodes", max_devices);
888 
889 	if (max_devices < 1)
890 		return vk_error(instance, VK_SUCCESS);
891 
892 	for (unsigned i = 0; i < (unsigned)max_devices; i++) {
893 		if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
894 		    devices[i]->bustype == DRM_BUS_PCI &&
895 		    devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
896 
897 			struct radv_physical_device *pdevice;
898 			result = radv_physical_device_try_create(instance, devices[i],
899 								 &pdevice);
900 			/* Incompatible DRM device, skip. */
901 			if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
902 				result = VK_SUCCESS;
903 				continue;
904 			}
905 
906 			/* Error creating the physical device, report the error. */
907 			if (result != VK_SUCCESS)
908 				break;
909 
910 			list_addtail(&pdevice->link, &instance->physical_devices);
911 		}
912 	}
913 	drmFreeDevices(devices, max_devices);
914 
915 	/* If we successfully enumerated any devices, call it success */
916 	return result;
917 }
918 
radv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)919 VkResult radv_EnumeratePhysicalDevices(
920 	VkInstance                                  _instance,
921 	uint32_t*                                   pPhysicalDeviceCount,
922 	VkPhysicalDevice*                           pPhysicalDevices)
923 {
924 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
925 	VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices,
926 			       pPhysicalDeviceCount);
927 
928 	VkResult result = radv_enumerate_physical_devices(instance);
929 	if (result != VK_SUCCESS)
930 		return result;
931 
932 	list_for_each_entry(struct radv_physical_device, pdevice,
933 			    &instance->physical_devices, link) {
934 		vk_outarray_append_typed(VkPhysicalDevice , &out, i) {
935 			*i = radv_physical_device_to_handle(pdevice);
936 		}
937 	}
938 
939 	return vk_outarray_status(&out);
940 }
941 
radv_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)942 VkResult radv_EnumeratePhysicalDeviceGroups(
943     VkInstance                                  _instance,
944     uint32_t*                                   pPhysicalDeviceGroupCount,
945     VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
946 {
947 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
948 	VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
949 			       pPhysicalDeviceGroupProperties,
950 			       pPhysicalDeviceGroupCount);
951 
952 	VkResult result = radv_enumerate_physical_devices(instance);
953 	if (result != VK_SUCCESS)
954 		return result;
955 
956 	list_for_each_entry(struct radv_physical_device, pdevice,
957 			    &instance->physical_devices, link) {
958 		vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) {
959 			p->physicalDeviceCount = 1;
960 			memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
961 			p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
962 			p->subsetAllocation = false;
963 		}
964 	}
965 
966 	return vk_outarray_status(&out);
967 }
968 
radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)969 void radv_GetPhysicalDeviceFeatures(
970 	VkPhysicalDevice                            physicalDevice,
971 	VkPhysicalDeviceFeatures*                   pFeatures)
972 {
973 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
974 	memset(pFeatures, 0, sizeof(*pFeatures));
975 
976 	*pFeatures = (VkPhysicalDeviceFeatures) {
977 		.robustBufferAccess                       = true,
978 		.fullDrawIndexUint32                      = true,
979 		.imageCubeArray                           = true,
980 		.independentBlend                         = true,
981 		.geometryShader                           = true,
982 		.tessellationShader                       = true,
983 		.sampleRateShading                        = true,
984 		.dualSrcBlend                             = true,
985 		.logicOp                                  = true,
986 		.multiDrawIndirect                        = true,
987 		.drawIndirectFirstInstance                = true,
988 		.depthClamp                               = true,
989 		.depthBiasClamp                           = true,
990 		.fillModeNonSolid                         = true,
991 		.depthBounds                              = true,
992 		.wideLines                                = true,
993 		.largePoints                              = true,
994 		.alphaToOne                               = false,
995 		.multiViewport                            = true,
996 		.samplerAnisotropy                        = true,
997 		.textureCompressionETC2                   = radv_device_supports_etc(pdevice),
998 		.textureCompressionASTC_LDR               = false,
999 		.textureCompressionBC                     = true,
1000 		.occlusionQueryPrecise                    = true,
1001 		.pipelineStatisticsQuery                  = true,
1002 		.vertexPipelineStoresAndAtomics           = true,
1003 		.fragmentStoresAndAtomics                 = true,
1004 		.shaderTessellationAndGeometryPointSize   = true,
1005 		.shaderImageGatherExtended                = true,
1006 		.shaderStorageImageExtendedFormats        = true,
1007 		.shaderStorageImageMultisample            = true,
1008 		.shaderUniformBufferArrayDynamicIndexing  = true,
1009 		.shaderSampledImageArrayDynamicIndexing   = true,
1010 		.shaderStorageBufferArrayDynamicIndexing  = true,
1011 		.shaderStorageImageArrayDynamicIndexing   = true,
1012 		.shaderStorageImageReadWithoutFormat      = true,
1013 		.shaderStorageImageWriteWithoutFormat     = true,
1014 		.shaderClipDistance                       = true,
1015 		.shaderCullDistance                       = true,
1016 		.shaderFloat64                            = true,
1017 		.shaderInt64                              = true,
1018 		.shaderInt16                              = true,
1019 		.sparseBinding                            = true,
1020 		.variableMultisampleRate                  = true,
1021 		.shaderResourceMinLod                     = true,
1022 		.inheritedQueries                         = true,
1023 	};
1024 }
1025 
1026 static void
radv_get_physical_device_features_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * f)1027 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
1028 				      VkPhysicalDeviceVulkan11Features *f)
1029 {
1030 	assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1031 
1032 	f->storageBuffer16BitAccess            = true;
1033 	f->uniformAndStorageBuffer16BitAccess  = true;
1034 	f->storagePushConstant16               = true;
1035 	f->storageInputOutput16                = pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
1036 	f->multiview                           = true;
1037 	f->multiviewGeometryShader             = true;
1038 	f->multiviewTessellationShader         = true;
1039 	f->variablePointersStorageBuffer       = true;
1040 	f->variablePointers                    = true;
1041 	f->protectedMemory                     = false;
1042 	f->samplerYcbcrConversion              = true;
1043 	f->shaderDrawParameters                = true;
1044 }
1045 
1046 static void
radv_get_physical_device_features_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * f)1047 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1048 				      VkPhysicalDeviceVulkan12Features *f)
1049 {
1050 	assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1051 
1052 	f->samplerMirrorClampToEdge = true;
1053 	f->drawIndirectCount = true;
1054 	f->storageBuffer8BitAccess = true;
1055 	f->uniformAndStorageBuffer8BitAccess = true;
1056 	f->storagePushConstant8 = true;
1057 	f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1058 	f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1059 	f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1060 	f->shaderInt8 = true;
1061 
1062 	f->descriptorIndexing = true;
1063 	f->shaderInputAttachmentArrayDynamicIndexing = true;
1064 	f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1065 	f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1066 	f->shaderUniformBufferArrayNonUniformIndexing = true;
1067 	f->shaderSampledImageArrayNonUniformIndexing = true;
1068 	f->shaderStorageBufferArrayNonUniformIndexing = true;
1069 	f->shaderStorageImageArrayNonUniformIndexing = true;
1070 	f->shaderInputAttachmentArrayNonUniformIndexing = true;
1071 	f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1072 	f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1073 	f->descriptorBindingUniformBufferUpdateAfterBind = true;
1074 	f->descriptorBindingSampledImageUpdateAfterBind = true;
1075 	f->descriptorBindingStorageImageUpdateAfterBind = true;
1076 	f->descriptorBindingStorageBufferUpdateAfterBind = true;
1077 	f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1078 	f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1079 	f->descriptorBindingUpdateUnusedWhilePending = true;
1080 	f->descriptorBindingPartiallyBound = true;
1081 	f->descriptorBindingVariableDescriptorCount = true;
1082 	f->runtimeDescriptorArray = true;
1083 
1084 	f->samplerFilterMinmax = true;
1085 	f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
1086 	f->imagelessFramebuffer = true;
1087 	f->uniformBufferStandardLayout = true;
1088 	f->shaderSubgroupExtendedTypes = true;
1089 	f->separateDepthStencilLayouts = true;
1090 	f->hostQueryReset = true;
1091 	f->timelineSemaphore = pdevice->rad_info.has_syncobj_wait_for_submit;
1092 	f->bufferDeviceAddress = true;
1093 	f->bufferDeviceAddressCaptureReplay = false;
1094 	f->bufferDeviceAddressMultiDevice = false;
1095 	f->vulkanMemoryModel = true;
1096 	f->vulkanMemoryModelDeviceScope = true;
1097 	f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1098 	f->shaderOutputViewportIndex = true;
1099 	f->shaderOutputLayer = true;
1100 	f->subgroupBroadcastDynamicId = true;
1101 }
1102 
radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)1103 void radv_GetPhysicalDeviceFeatures2(
1104 	VkPhysicalDevice                            physicalDevice,
1105 	VkPhysicalDeviceFeatures2                  *pFeatures)
1106 {
1107 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1108 	radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1109 
1110 	VkPhysicalDeviceVulkan11Features core_1_1 = {
1111 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1112 	};
1113 	radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1114 
1115 	VkPhysicalDeviceVulkan12Features core_1_2 = {
1116 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1117 	};
1118 	radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1119 
1120 #define CORE_FEATURE(major, minor, feature) \
1121    features->feature = core_##major##_##minor.feature
1122 
1123 	vk_foreach_struct(ext, pFeatures->pNext) {
1124 		switch (ext->sType) {
1125 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
1126 			VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
1127 			CORE_FEATURE(1, 1, variablePointersStorageBuffer);
1128 			CORE_FEATURE(1, 1, variablePointers);
1129 			break;
1130 		}
1131 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
1132 			VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
1133 			CORE_FEATURE(1, 1, multiview);
1134 			CORE_FEATURE(1, 1, multiviewGeometryShader);
1135 			CORE_FEATURE(1, 1, multiviewTessellationShader);
1136 			break;
1137 		}
1138 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
1139 			VkPhysicalDeviceShaderDrawParametersFeatures *features =
1140 			    (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
1141 			CORE_FEATURE(1, 1, shaderDrawParameters);
1142 			break;
1143 		}
1144 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
1145 			VkPhysicalDeviceProtectedMemoryFeatures *features =
1146 			    (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
1147 			CORE_FEATURE(1, 1, protectedMemory);
1148 			break;
1149 		}
1150 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
1151 			VkPhysicalDevice16BitStorageFeatures *features =
1152 			    (VkPhysicalDevice16BitStorageFeatures*)ext;
1153 			CORE_FEATURE(1, 1, storageBuffer16BitAccess);
1154 			CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
1155 			CORE_FEATURE(1, 1, storagePushConstant16);
1156 			CORE_FEATURE(1, 1, storageInputOutput16);
1157 			break;
1158 		}
1159 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
1160 			VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
1161 			    (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
1162 			CORE_FEATURE(1, 1, samplerYcbcrConversion);
1163 			break;
1164 		}
1165 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
1166 			VkPhysicalDeviceDescriptorIndexingFeatures *features =
1167 				(VkPhysicalDeviceDescriptorIndexingFeatures*)ext;
1168 			CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
1169 			CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
1170 			CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
1171 			CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
1172 			CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
1173 			CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
1174 			CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
1175 			CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
1176 			CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
1177 			CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
1178 			CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
1179 			CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
1180 			CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
1181 			CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
1182 			CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
1183 			CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
1184 			CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
1185 			CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
1186 			CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
1187 			CORE_FEATURE(1, 2, runtimeDescriptorArray);
1188 			break;
1189 		}
1190 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1191 			VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1192 				(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1193 			features->conditionalRendering = true;
1194 			features->inheritedConditionalRendering = false;
1195 			break;
1196 		}
1197 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1198 			VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1199 				(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1200 			features->vertexAttributeInstanceRateDivisor = true;
1201 			features->vertexAttributeInstanceRateZeroDivisor = true;
1202 			break;
1203 		}
1204 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1205 			VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1206 				(VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
1207 			features->transformFeedback = true;
1208 			features->geometryStreams = !pdevice->use_ngg_streamout;
1209 			break;
1210 		}
1211 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1212 			VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1213 				(VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1214 			CORE_FEATURE(1, 2, scalarBlockLayout);
1215 			break;
1216 		}
1217 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1218 			VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1219 				(VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1220 			features->memoryPriority = true;
1221 			break;
1222 		}
1223 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1224 			VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1225 				(VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1226 			features->bufferDeviceAddress = true;
1227 			features->bufferDeviceAddressCaptureReplay = false;
1228 			features->bufferDeviceAddressMultiDevice = false;
1229 			break;
1230 		}
1231 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
1232 			VkPhysicalDeviceBufferDeviceAddressFeatures *features =
1233 				(VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
1234 			CORE_FEATURE(1, 2, bufferDeviceAddress);
1235 			CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1236 			CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1237 			break;
1238 		}
1239 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1240 			VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1241 				(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1242 			features->depthClipEnable = true;
1243 			break;
1244 		}
1245 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
1246 			VkPhysicalDeviceHostQueryResetFeatures *features =
1247 				(VkPhysicalDeviceHostQueryResetFeatures *)ext;
1248 			CORE_FEATURE(1, 2, hostQueryReset);
1249 			break;
1250 		}
1251 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
1252 			VkPhysicalDevice8BitStorageFeatures *features =
1253 			    (VkPhysicalDevice8BitStorageFeatures *)ext;
1254 			CORE_FEATURE(1, 2, storageBuffer8BitAccess);
1255 			CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
1256 			CORE_FEATURE(1, 2, storagePushConstant8);
1257 			break;
1258 		}
1259 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
1260 			VkPhysicalDeviceShaderFloat16Int8Features *features =
1261 				(VkPhysicalDeviceShaderFloat16Int8Features*)ext;
1262 			CORE_FEATURE(1, 2, shaderFloat16);
1263 			CORE_FEATURE(1, 2, shaderInt8);
1264 			break;
1265 		}
1266 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
1267 			VkPhysicalDeviceShaderAtomicInt64Features *features =
1268 				(VkPhysicalDeviceShaderAtomicInt64Features *)ext;
1269 			CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
1270 			CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
1271 			break;
1272 		}
1273 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1274 			VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
1275 				(VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
1276 			features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1277 			break;
1278 		}
1279 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1280 			VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1281 				(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1282 
1283 			features->inlineUniformBlock = true;
1284 			features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1285 			break;
1286 		}
1287 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1288 			VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1289 				(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1290 			features->computeDerivativeGroupQuads = false;
1291 			features->computeDerivativeGroupLinear = true;
1292 			break;
1293 		}
1294 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1295 			VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1296 				(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
1297 			features->ycbcrImageArrays = true;
1298 			break;
1299 		}
1300 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
1301 			VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
1302 				(VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
1303 			CORE_FEATURE(1, 2, uniformBufferStandardLayout);
1304 			break;
1305 		}
1306 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1307 			VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1308 				(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1309 			features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1310 			break;
1311 		}
1312 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
1313 			VkPhysicalDeviceImagelessFramebufferFeatures *features =
1314 				(VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
1315 			CORE_FEATURE(1, 2, imagelessFramebuffer);
1316 			break;
1317 		}
1318 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1319 			VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1320 				(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1321 			features->pipelineExecutableInfo = true;
1322 			break;
1323 		}
1324 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1325 			VkPhysicalDeviceShaderClockFeaturesKHR *features =
1326 				(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1327 			features->shaderSubgroupClock = true;
1328 			features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
1329 			break;
1330 		}
1331 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1332 			VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1333 				(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1334 			features->texelBufferAlignment = true;
1335 			break;
1336 		}
1337 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
1338 			VkPhysicalDeviceTimelineSemaphoreFeatures *features =
1339 				(VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
1340 			CORE_FEATURE(1, 2, timelineSemaphore);
1341 			break;
1342 		}
1343 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1344 			VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1345 				(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1346 			features->subgroupSizeControl = true;
1347 			features->computeFullSubgroups = true;
1348 			break;
1349 		}
1350 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1351 			VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1352 				(VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1353 			features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1354 			break;
1355 		}
1356 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
1357 			VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
1358 				(VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
1359 			CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
1360 			break;
1361 		}
1362 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
1363 			VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
1364 				(VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
1365 			CORE_FEATURE(1, 2, separateDepthStencilLayouts);
1366 			break;
1367 		}
1368 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
1369 			radv_get_physical_device_features_1_1(pdevice, (void *)ext);
1370 			break;
1371 		}
1372 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
1373 			radv_get_physical_device_features_1_2(pdevice, (void *)ext);
1374 			break;
1375 		}
1376 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1377 			VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1378 				(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1379 			features->rectangularLines = false;
1380 			features->bresenhamLines = true;
1381 			features->smoothLines = false;
1382 			features->stippledRectangularLines = false;
1383 			features->stippledBresenhamLines = true;
1384 			features->stippledSmoothLines = false;
1385 			break;
1386 		}
1387 		case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1388 			VkDeviceMemoryOverallocationCreateInfoAMD *features =
1389 				(VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1390 			features->overallocationBehavior = true;
1391 			break;
1392 		}
1393 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1394 			VkPhysicalDeviceRobustness2FeaturesEXT *features =
1395 				(VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1396 			features->robustBufferAccess2 = true;
1397 			features->robustImageAccess2 = true;
1398 			features->nullDescriptor = true;
1399 			break;
1400 		}
1401 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1402 			VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1403 				(VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1404 			features->customBorderColors = true;
1405 			features->customBorderColorWithoutFormat = true;
1406 			break;
1407 		}
1408 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1409 			VkPhysicalDevicePrivateDataFeaturesEXT *features =
1410 				(VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
1411 			features->privateData = true;
1412 			break;
1413 		}
1414 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1415 			VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1416 				(VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1417 			features-> pipelineCreationCacheControl = true;
1418 			break;
1419 		}
1420 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
1421 			VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
1422 				(VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
1423 			CORE_FEATURE(1, 2, vulkanMemoryModel);
1424 			CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
1425 			CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
1426 			break;
1427 		}
1428 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1429 			VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1430 				(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *) ext;
1431 			features->extendedDynamicState = true;
1432 			break;
1433 		}
1434 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1435 			VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1436 				(VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1437 			features->robustImageAccess = true;
1438 			break;
1439 		}
1440 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1441 			VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1442 				(VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1443 			features->shaderBufferFloat32Atomics = true;
1444 			features->shaderBufferFloat32AtomicAdd = false;
1445 			features->shaderBufferFloat64Atomics = true;
1446 			features->shaderBufferFloat64AtomicAdd = false;
1447 			features->shaderSharedFloat32Atomics = true;
1448 			features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
1449 								 (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
1450 			features->shaderSharedFloat64Atomics = true;
1451 			features->shaderSharedFloat64AtomicAdd = false;
1452 			features->shaderImageFloat32Atomics = true;
1453 			features->shaderImageFloat32AtomicAdd = false;
1454 			features->sparseImageFloat32Atomics = false;
1455 			features->sparseImageFloat32AtomicAdd = false;
1456 			break;
1457 		}
1458 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1459 			VkPhysicalDevice4444FormatsFeaturesEXT *features =
1460 				(VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1461 			features->formatA4R4G4B4 = true;
1462 			features->formatA4B4G4R4 = true;
1463 			break;
1464 		}
1465 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
1466 			VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
1467 				(VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
1468 			features->shaderTerminateInvocation = true;
1469 			break;
1470 		}
1471 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
1472 			VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
1473 				(VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
1474 			features->shaderImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
1475 			features->sparseImageInt64Atomics = false;
1476 			break;
1477 		}
1478 		default:
1479 			break;
1480 		}
1481 	}
1482 #undef CORE_FEATURE
1483 }
1484 
1485 static size_t
radv_max_descriptor_set_size()1486 radv_max_descriptor_set_size()
1487 {
1488 	/* make sure that the entire descriptor set is addressable with a signed
1489 	 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1490 	 * be at most 2 GiB. the combined image & samples object count as one of
1491 	 * both. This limit is for the pipeline layout, not for the set layout, but
1492 	 * there is no set limit, so we just set a pipeline limit. I don't think
1493 	 * any app is going to hit this soon. */
1494 	return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS
1495 	                     - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1496 	          (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1497 	           32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1498 	           32 /* sampler, largest when combined with image */ +
1499 	           64 /* sampled image */ +
1500 	           64 /* storage image */);
1501 }
1502 
1503 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdevice)1504 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1505 {
1506 	uint32_t uniform_offset_alignment = driQueryOptioni(&pdevice->instance->dri_options,
1507 	                                                   "radv_override_uniform_offset_alignment");
1508 	if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1509 		fprintf(stderr, "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1510 		                "not a power of two\n", uniform_offset_alignment);
1511 		uniform_offset_alignment = 0;
1512 	}
1513 
1514 	/* Take at least the hardware limit. */
1515 	return MAX2(uniform_offset_alignment, 4);
1516 }
1517 
radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1518 void radv_GetPhysicalDeviceProperties(
1519 	VkPhysicalDevice                            physicalDevice,
1520 	VkPhysicalDeviceProperties*                 pProperties)
1521 {
1522 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1523 	VkSampleCountFlags sample_counts = 0xf;
1524 
1525 	size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1526 
1527 	VkPhysicalDeviceLimits limits = {
1528 		.maxImageDimension1D                      = (1 << 14),
1529 		.maxImageDimension2D                      = (1 << 14),
1530 		.maxImageDimension3D                      = (1 << 11),
1531 		.maxImageDimensionCube                    = (1 << 14),
1532 		.maxImageArrayLayers                      = (1 << 11),
1533 		.maxTexelBufferElements                   = UINT32_MAX,
1534 		.maxUniformBufferRange                    = UINT32_MAX,
1535 		.maxStorageBufferRange                    = UINT32_MAX,
1536 		.maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1537 		.maxMemoryAllocationCount                 = UINT32_MAX,
1538 		.maxSamplerAllocationCount                = 64 * 1024,
1539 		.bufferImageGranularity                   = 64, /* A cache line */
1540 		.sparseAddressSpaceSize                   = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1541 		.maxBoundDescriptorSets                   = MAX_SETS,
1542 		.maxPerStageDescriptorSamplers            = max_descriptor_set_size,
1543 		.maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
1544 		.maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
1545 		.maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
1546 		.maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
1547 		.maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
1548 		.maxPerStageResources                     = max_descriptor_set_size,
1549 		.maxDescriptorSetSamplers                 = max_descriptor_set_size,
1550 		.maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
1551 		.maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_UNIFORM_BUFFERS,
1552 		.maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
1553 		.maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_STORAGE_BUFFERS,
1554 		.maxDescriptorSetSampledImages            = max_descriptor_set_size,
1555 		.maxDescriptorSetStorageImages            = max_descriptor_set_size,
1556 		.maxDescriptorSetInputAttachments         = max_descriptor_set_size,
1557 		.maxVertexInputAttributes                 = MAX_VERTEX_ATTRIBS,
1558 		.maxVertexInputBindings                   = MAX_VBS,
1559 		.maxVertexInputAttributeOffset            = 2047,
1560 		.maxVertexInputBindingStride              = 2048,
1561 		.maxVertexOutputComponents                = 128,
1562 		.maxTessellationGenerationLevel           = 64,
1563 		.maxTessellationPatchSize                 = 32,
1564 		.maxTessellationControlPerVertexInputComponents = 128,
1565 		.maxTessellationControlPerVertexOutputComponents = 128,
1566 		.maxTessellationControlPerPatchOutputComponents = 120,
1567 		.maxTessellationControlTotalOutputComponents = 4096,
1568 		.maxTessellationEvaluationInputComponents = 128,
1569 		.maxTessellationEvaluationOutputComponents = 128,
1570 		.maxGeometryShaderInvocations             = 127,
1571 		.maxGeometryInputComponents               = 64,
1572 		.maxGeometryOutputComponents              = 128,
1573 		.maxGeometryOutputVertices                = 256,
1574 		.maxGeometryTotalOutputComponents         = 1024,
1575 		.maxFragmentInputComponents               = 128,
1576 		.maxFragmentOutputAttachments             = 8,
1577 		.maxFragmentDualSrcAttachments            = 1,
1578 		.maxFragmentCombinedOutputResources       = 8,
1579 		.maxComputeSharedMemorySize               = 32768,
1580 		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1581 		.maxComputeWorkGroupInvocations           = 1024,
1582 		.maxComputeWorkGroupSize = {
1583 			1024,
1584 			1024,
1585 			1024
1586 		},
1587 		.subPixelPrecisionBits                    = 8,
1588 		.subTexelPrecisionBits                    = 8,
1589 		.mipmapPrecisionBits                      = 8,
1590 		.maxDrawIndexedIndexValue                 = UINT32_MAX,
1591 		.maxDrawIndirectCount                     = UINT32_MAX,
1592 		.maxSamplerLodBias                        = 16,
1593 		.maxSamplerAnisotropy                     = 16,
1594 		.maxViewports                             = MAX_VIEWPORTS,
1595 		.maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1596 		.viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1597 		.viewportSubPixelBits                     = 8,
1598 		.minMemoryMapAlignment                    = 4096, /* A page */
1599 		.minTexelBufferOffsetAlignment            = 4,
1600 		.minUniformBufferOffsetAlignment          = radv_uniform_buffer_offset_alignment(pdevice),
1601 		.minStorageBufferOffsetAlignment          = 4,
1602 		.minTexelOffset                           = -32,
1603 		.maxTexelOffset                           = 31,
1604 		.minTexelGatherOffset                     = -32,
1605 		.maxTexelGatherOffset                     = 31,
1606 		.minInterpolationOffset                   = -2,
1607 		.maxInterpolationOffset                   = 2,
1608 		.subPixelInterpolationOffsetBits          = 8,
1609 		.maxFramebufferWidth                      = (1 << 14),
1610 		.maxFramebufferHeight                     = (1 << 14),
1611 		.maxFramebufferLayers                     = (1 << 10),
1612 		.framebufferColorSampleCounts             = sample_counts,
1613 		.framebufferDepthSampleCounts             = sample_counts,
1614 		.framebufferStencilSampleCounts           = sample_counts,
1615 		.framebufferNoAttachmentsSampleCounts     = sample_counts,
1616 		.maxColorAttachments                      = MAX_RTS,
1617 		.sampledImageColorSampleCounts            = sample_counts,
1618 		.sampledImageIntegerSampleCounts          = sample_counts,
1619 		.sampledImageDepthSampleCounts            = sample_counts,
1620 		.sampledImageStencilSampleCounts          = sample_counts,
1621 		.storageImageSampleCounts                 = sample_counts,
1622 		.maxSampleMaskWords                       = 1,
1623 		.timestampComputeAndGraphics              = true,
1624 		.timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1625 		.maxClipDistances                         = 8,
1626 		.maxCullDistances                         = 8,
1627 		.maxCombinedClipAndCullDistances          = 8,
1628 		.discreteQueuePriorities                  = 2,
1629 		.pointSizeRange                           = { 0.0, 8191.875 },
1630 		.lineWidthRange                           = { 0.0, 8191.875 },
1631 		.pointSizeGranularity                     = (1.0 / 8.0),
1632 		.lineWidthGranularity                     = (1.0 / 8.0),
1633 		.strictLines                              = false, /* FINISHME */
1634 		.standardSampleLocations                  = true,
1635 		.optimalBufferCopyOffsetAlignment         = 128,
1636 		.optimalBufferCopyRowPitchAlignment       = 128,
1637 		.nonCoherentAtomSize                      = 64,
1638 	};
1639 
1640 	*pProperties = (VkPhysicalDeviceProperties) {
1641 		.apiVersion = radv_physical_device_api_version(pdevice),
1642 		.driverVersion = vk_get_driver_version(),
1643 		.vendorID = ATI_VENDOR_ID,
1644 		.deviceID = pdevice->rad_info.pci_id,
1645 		.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1646 		.limits = limits,
1647 		.sparseProperties = {0},
1648 	};
1649 
1650 	strcpy(pProperties->deviceName, pdevice->name);
1651 	memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1652 }
1653 
1654 static void
radv_get_physical_device_properties_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1655 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1656 					VkPhysicalDeviceVulkan11Properties *p)
1657 {
1658 	assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1659 
1660 	memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1661 	memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1662 	memset(p->deviceLUID, 0, VK_LUID_SIZE);
1663 	/* The LUID is for Windows. */
1664 	p->deviceLUIDValid = false;
1665 	p->deviceNodeMask = 0;
1666 
1667 	p->subgroupSize = RADV_SUBGROUP_SIZE;
1668 	p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS |
1669 				     VK_SHADER_STAGE_COMPUTE_BIT;
1670 	p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
1671 					 VK_SUBGROUP_FEATURE_VOTE_BIT |
1672 					 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1673 					 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1674 					 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
1675 					 VK_SUBGROUP_FEATURE_QUAD_BIT |
1676 					 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1677 					 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1678 	p->subgroupQuadOperationsInAllStages = true;
1679 
1680 	p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1681 	p->maxMultiviewViewCount = MAX_VIEWS;
1682 	p->maxMultiviewInstanceIndex = INT_MAX;
1683 	p->protectedNoFault = false;
1684 	p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1685 	p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1686 }
1687 
1688 static void
radv_get_physical_device_properties_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)1689 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
1690 					VkPhysicalDeviceVulkan12Properties *p)
1691 {
1692 	assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1693 
1694 	p->driverID = VK_DRIVER_ID_MESA_RADV;
1695 	snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1696 	snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1697 		 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
1698 		 radv_get_compiler_string(pdevice));
1699 	p->conformanceVersion = (VkConformanceVersion) {
1700 		.major = 1,
1701 		.minor = 2,
1702 		.subminor = 3,
1703 		.patch = 0,
1704 	};
1705 
1706 	/* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1707 	 * controlled by the same config register.
1708 	 */
1709 	if (pdevice->rad_info.has_packed_math_16bit) {
1710 		p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1711 		p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1712 	} else {
1713 		p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1714 		p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1715 	}
1716 
1717 	/* With LLVM, do not allow both preserving and flushing denorms because
1718 	 * different shaders in the same pipeline can have different settings and
1719 	 * this won't work for merged shaders. To make it work, this requires LLVM
1720 	 * support for changing the register. The same logic applies for the
1721 	 * rounding modes because they are configured with the same config
1722 	 * register.
1723 	 */
1724 	p->shaderDenormFlushToZeroFloat32 = true;
1725 	p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
1726 	p->shaderRoundingModeRTEFloat32 = true;
1727 	p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
1728 	p->shaderSignedZeroInfNanPreserveFloat32 = true;
1729 
1730 	p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1731 	p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1732 	p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1733 	p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1734 	p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1735 
1736 	p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1737 	p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1738 	p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
1739 	p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1740 	p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1741 
1742 	p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1743 	p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1744 	p->shaderSampledImageArrayNonUniformIndexingNative = false;
1745 	p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1746 	p->shaderStorageImageArrayNonUniformIndexingNative = false;
1747 	p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1748 	p->robustBufferAccessUpdateAfterBind = false;
1749 	p->quadDivergentImplicitLod = false;
1750 
1751 	size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1752 		MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1753 			(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1754 			 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1755 			 32 /* sampler, largest when combined with image */ +
1756 			 64 /* sampled image */ +
1757 			 64 /* storage image */);
1758 	p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1759 	p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1760 	p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1761 	p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1762 	p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1763 	p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1764 	p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1765 	p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1766 	p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1767 	p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1768 	p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1769 	p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1770 	p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1771 	p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1772 	p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1773 
1774 	/* We support all of the depth resolve modes */
1775 	p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1776 					    VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
1777 					    VK_RESOLVE_MODE_MIN_BIT_KHR |
1778 					    VK_RESOLVE_MODE_MAX_BIT_KHR;
1779 
1780 	/* Average doesn't make sense for stencil so we don't support that */
1781 	p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1782 					      VK_RESOLVE_MODE_MIN_BIT_KHR |
1783 					      VK_RESOLVE_MODE_MAX_BIT_KHR;
1784 
1785 	p->independentResolveNone = true;
1786 	p->independentResolve = true;
1787 
1788 	/* GFX6-8 only support single channel min/max filter. */
1789 	p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1790 	p->filterMinmaxSingleComponentFormats = true;
1791 
1792 	p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1793 
1794 	p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1795 }
1796 
radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1797 void radv_GetPhysicalDeviceProperties2(
1798 	VkPhysicalDevice                            physicalDevice,
1799 	VkPhysicalDeviceProperties2                *pProperties)
1800 {
1801 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1802 	radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1803 
1804 	VkPhysicalDeviceVulkan11Properties core_1_1 = {
1805 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1806 	};
1807 	radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1808 
1809 	VkPhysicalDeviceVulkan12Properties core_1_2 = {
1810 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1811 	};
1812 	radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1813 
1814 #define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
1815    memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
1816           sizeof(core_##major##_##minor.core_property))
1817 
1818 #define CORE_PROPERTY(major, minor, property) \
1819    CORE_RENAMED_PROPERTY(major, minor, property, property)
1820 
1821 	vk_foreach_struct(ext, pProperties->pNext) {
1822 		switch (ext->sType) {
1823 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1824 			VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1825 				(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1826 			properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1827 			break;
1828 		}
1829 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1830 			VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1831 			CORE_PROPERTY(1, 1, deviceUUID);
1832 			CORE_PROPERTY(1, 1, driverUUID);
1833 			CORE_PROPERTY(1, 1, deviceLUID);
1834 			CORE_PROPERTY(1, 1, deviceLUIDValid);
1835 			break;
1836 		}
1837 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1838 			VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1839 			CORE_PROPERTY(1, 1, maxMultiviewViewCount);
1840 			CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
1841 			break;
1842 		}
1843 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1844 			VkPhysicalDevicePointClippingProperties *properties =
1845 			    (VkPhysicalDevicePointClippingProperties*)ext;
1846 			CORE_PROPERTY(1, 1, pointClippingBehavior);
1847 			break;
1848 		}
1849 		case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1850 			VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1851 			    (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1852 			properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1853 			break;
1854 		}
1855 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1856 			VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1857 			    (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1858 			properties->minImportedHostPointerAlignment = 4096;
1859 			break;
1860 		}
1861 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1862 			VkPhysicalDeviceSubgroupProperties *properties =
1863 			    (VkPhysicalDeviceSubgroupProperties*)ext;
1864 			CORE_PROPERTY(1, 1, subgroupSize);
1865 			CORE_RENAMED_PROPERTY(1, 1, supportedStages,
1866 						    subgroupSupportedStages);
1867 			CORE_RENAMED_PROPERTY(1, 1, supportedOperations,
1868 						    subgroupSupportedOperations);
1869 			CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages,
1870 						    subgroupQuadOperationsInAllStages);
1871 			break;
1872 		}
1873 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1874 			VkPhysicalDeviceMaintenance3Properties *properties =
1875 			    (VkPhysicalDeviceMaintenance3Properties*)ext;
1876 			CORE_PROPERTY(1, 1, maxPerSetDescriptors);
1877 			CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
1878 			break;
1879 		}
1880 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
1881 			VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
1882 				(VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
1883 			CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
1884 			CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
1885 			break;
1886 		}
1887 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1888 			VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1889 				(VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1890 
1891 			/* Shader engines. */
1892 			properties->shaderEngineCount =
1893 				pdevice->rad_info.max_se;
1894 			properties->shaderArraysPerEngineCount =
1895 				pdevice->rad_info.max_sh_per_se;
1896 			properties->computeUnitsPerShaderArray =
1897 				pdevice->rad_info.min_good_cu_per_sa;
1898 			properties->simdPerComputeUnit =
1899 				pdevice->rad_info.num_simd_per_compute_unit;
1900 			properties->wavefrontsPerSimd =
1901 				pdevice->rad_info.max_wave64_per_simd;
1902 			properties->wavefrontSize = 64;
1903 
1904 			/* SGPR. */
1905 			properties->sgprsPerSimd =
1906 				pdevice->rad_info.num_physical_sgprs_per_simd;
1907 			properties->minSgprAllocation =
1908 				pdevice->rad_info.min_sgpr_alloc;
1909 			properties->maxSgprAllocation =
1910 				pdevice->rad_info.max_sgpr_alloc;
1911 			properties->sgprAllocationGranularity =
1912 				pdevice->rad_info.sgpr_alloc_granularity;
1913 
1914 			/* VGPR. */
1915 			properties->vgprsPerSimd =
1916 				pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
1917 			properties->minVgprAllocation =
1918 				pdevice->rad_info.min_wave64_vgpr_alloc;
1919 			properties->maxVgprAllocation =
1920 				pdevice->rad_info.max_vgpr_alloc;
1921 			properties->vgprAllocationGranularity =
1922 				pdevice->rad_info.wave64_vgpr_alloc_granularity;
1923 			break;
1924 		}
1925 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
1926 			VkPhysicalDeviceShaderCoreProperties2AMD *properties =
1927 				(VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
1928 
1929 			properties->shaderCoreFeatures = 0;
1930 			properties->activeComputeUnitCount =
1931 				pdevice->rad_info.num_good_compute_units;
1932 			break;
1933 		}
1934 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1935 			VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1936 				(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1937 			properties->maxVertexAttribDivisor = UINT32_MAX;
1938 			break;
1939 		}
1940 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
1941 			VkPhysicalDeviceDescriptorIndexingProperties *properties =
1942 				(VkPhysicalDeviceDescriptorIndexingProperties*)ext;
1943 			CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
1944 			CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
1945 			CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
1946 			CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
1947 			CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
1948 			CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
1949 			CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
1950 			CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
1951 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
1952 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
1953 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
1954 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
1955 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
1956 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
1957 			CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
1958 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
1959 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
1960 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
1961 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
1962 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
1963 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
1964 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
1965 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
1966 			break;
1967 		}
1968 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1969 			VkPhysicalDeviceProtectedMemoryProperties *properties =
1970 				(VkPhysicalDeviceProtectedMemoryProperties *)ext;
1971 			CORE_PROPERTY(1, 1, protectedNoFault);
1972 			break;
1973 		}
1974 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1975 			VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1976 				(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1977 			properties->primitiveOverestimationSize = 0;
1978 			properties->maxExtraPrimitiveOverestimationSize = 0;
1979 			properties->extraPrimitiveOverestimationSizeGranularity = 0;
1980 			properties->primitiveUnderestimation = false;
1981 			properties->conservativePointAndLineRasterization = false;
1982 			properties->degenerateTrianglesRasterized = false;
1983 			properties->degenerateLinesRasterized = false;
1984 			properties->fullyCoveredFragmentShaderInputVariable = false;
1985 			properties->conservativeRasterizationPostDepthCoverage = false;
1986 			break;
1987 		}
1988 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1989 			VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1990 				(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1991 			properties->pciDomain = pdevice->bus_info.domain;
1992 			properties->pciBus = pdevice->bus_info.bus;
1993 			properties->pciDevice = pdevice->bus_info.dev;
1994 			properties->pciFunction = pdevice->bus_info.func;
1995 			break;
1996 		}
1997 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
1998 			VkPhysicalDeviceDriverProperties *properties =
1999 				(VkPhysicalDeviceDriverProperties *) ext;
2000 			CORE_PROPERTY(1, 2, driverID);
2001 			CORE_PROPERTY(1, 2, driverName);
2002 			CORE_PROPERTY(1, 2, driverInfo);
2003 			CORE_PROPERTY(1, 2, conformanceVersion);
2004 			break;
2005 		}
2006 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2007 			VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
2008 				(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2009 			properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
2010 			properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
2011 			properties->maxTransformFeedbackBufferSize = UINT32_MAX;
2012 			properties->maxTransformFeedbackStreamDataSize = 512;
2013 			properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
2014 			properties->maxTransformFeedbackBufferDataStride = 512;
2015 			properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
2016 			properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
2017 			properties->transformFeedbackRasterizationStreamSelect = false;
2018 			properties->transformFeedbackDraw = true;
2019 			break;
2020 		}
2021 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
2022 			VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
2023 				(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
2024 
2025 			props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2026 			props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2027 			props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2028 			props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2029 			props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2030 			break;
2031 		}
2032 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2033 			VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
2034 				(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2035 			properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
2036 								 VK_SAMPLE_COUNT_4_BIT |
2037 								 VK_SAMPLE_COUNT_8_BIT;
2038 			properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
2039 			properties->sampleLocationCoordinateRange[0] = 0.0f;
2040 			properties->sampleLocationCoordinateRange[1] = 0.9375f;
2041 			properties->sampleLocationSubPixelBits = 4;
2042 			properties->variableSampleLocations = false;
2043 			break;
2044 		}
2045 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
2046 			VkPhysicalDeviceDepthStencilResolveProperties *properties =
2047 				(VkPhysicalDeviceDepthStencilResolveProperties *)ext;
2048 			CORE_PROPERTY(1, 2, supportedDepthResolveModes);
2049 			CORE_PROPERTY(1, 2, supportedStencilResolveModes);
2050 			CORE_PROPERTY(1, 2, independentResolveNone);
2051 			CORE_PROPERTY(1, 2, independentResolve);
2052 			break;
2053 		}
2054 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
2055 			VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
2056 				(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
2057 			properties->storageTexelBufferOffsetAlignmentBytes = 4;
2058 			properties->storageTexelBufferOffsetSingleTexelAlignment = true;
2059 			properties->uniformTexelBufferOffsetAlignmentBytes = 4;
2060 			properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
2061 			break;
2062 		}
2063 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES : {
2064 			VkPhysicalDeviceFloatControlsProperties *properties =
2065 				(VkPhysicalDeviceFloatControlsProperties *)ext;
2066 			CORE_PROPERTY(1, 2, denormBehaviorIndependence);
2067 			CORE_PROPERTY(1, 2, roundingModeIndependence);
2068 			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
2069 			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
2070 			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
2071 			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
2072 			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
2073 			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
2074 			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
2075 			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
2076 			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
2077 			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
2078 			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
2079 			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
2080 			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
2081 			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
2082 			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
2083 			break;
2084 		}
2085 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
2086 			VkPhysicalDeviceTimelineSemaphoreProperties *properties =
2087 				(VkPhysicalDeviceTimelineSemaphoreProperties *) ext;
2088 			CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
2089 			break;
2090 		}
2091 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2092 			VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2093 				(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2094 			props->minSubgroupSize = 64;
2095 			props->maxSubgroupSize = 64;
2096 			props->maxComputeWorkgroupSubgroups = UINT32_MAX;
2097 			props->requiredSubgroupSizeStages = 0;
2098 
2099 			if (pdevice->rad_info.chip_class >= GFX10) {
2100 				/* Only GFX10+ supports wave32. */
2101 				props->minSubgroupSize = 32;
2102 				props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2103 			}
2104 			break;
2105 		}
2106 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
2107 			radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
2108 			break;
2109 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
2110 			radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
2111 			break;
2112 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2113 			VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2114 				(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2115 			props->lineSubPixelPrecisionBits = 4;
2116 			break;
2117 		}
2118 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2119 			VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2120 				(VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2121 			properties->robustStorageBufferAccessSizeAlignment = 4;
2122 			properties->robustUniformBufferAccessSizeAlignment = 4;
2123 			break;
2124 		}
2125 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2126 			VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2127 				(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2128 			props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2129 			break;
2130 		}
2131 		default:
2132 			break;
2133 		}
2134 	}
2135 }
2136 
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdevice,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2137 static void radv_get_physical_device_queue_family_properties(
2138 	struct radv_physical_device*                pdevice,
2139 	uint32_t*                                   pCount,
2140 	VkQueueFamilyProperties**                    pQueueFamilyProperties)
2141 {
2142 	int num_queue_families = 1;
2143 	int idx;
2144 	if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2145 	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2146 		num_queue_families++;
2147 
2148 	if (pQueueFamilyProperties == NULL) {
2149 		*pCount = num_queue_families;
2150 		return;
2151 	}
2152 
2153 	if (!*pCount)
2154 		return;
2155 
2156 	idx = 0;
2157 	if (*pCount >= 1) {
2158 		*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2159 			.queueFlags = VK_QUEUE_GRAPHICS_BIT |
2160 			              VK_QUEUE_COMPUTE_BIT |
2161 			              VK_QUEUE_TRANSFER_BIT |
2162 			              VK_QUEUE_SPARSE_BINDING_BIT,
2163 			.queueCount = 1,
2164 			.timestampValidBits = 64,
2165 			.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2166 		};
2167 		idx++;
2168 	}
2169 
2170 	if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2171 	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2172 		if (*pCount > idx) {
2173 			*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2174 				.queueFlags = VK_QUEUE_COMPUTE_BIT |
2175 				              VK_QUEUE_TRANSFER_BIT |
2176 				              VK_QUEUE_SPARSE_BINDING_BIT,
2177 				.queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
2178 				.timestampValidBits = 64,
2179 				.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2180 			};
2181 			idx++;
2182 		}
2183 	}
2184 	*pCount = idx;
2185 }
2186 
radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)2187 void radv_GetPhysicalDeviceQueueFamilyProperties(
2188 	VkPhysicalDevice                            physicalDevice,
2189 	uint32_t*                                   pCount,
2190 	VkQueueFamilyProperties*                    pQueueFamilyProperties)
2191 {
2192 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2193 	if (!pQueueFamilyProperties) {
2194 		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2195 		return;
2196 	}
2197 	VkQueueFamilyProperties *properties[] = {
2198 		pQueueFamilyProperties + 0,
2199 		pQueueFamilyProperties + 1,
2200 		pQueueFamilyProperties + 2,
2201 	};
2202 	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2203 	assert(*pCount <= 3);
2204 }
2205 
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2206 void radv_GetPhysicalDeviceQueueFamilyProperties2(
2207 	VkPhysicalDevice                            physicalDevice,
2208 	uint32_t*                                   pCount,
2209 	VkQueueFamilyProperties2                   *pQueueFamilyProperties)
2210 {
2211 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2212 	if (!pQueueFamilyProperties) {
2213 		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2214 		return;
2215 	}
2216 	VkQueueFamilyProperties *properties[] = {
2217 		&pQueueFamilyProperties[0].queueFamilyProperties,
2218 		&pQueueFamilyProperties[1].queueFamilyProperties,
2219 		&pQueueFamilyProperties[2].queueFamilyProperties,
2220 	};
2221 	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2222 	assert(*pCount <= 3);
2223 }
2224 
radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2225 void radv_GetPhysicalDeviceMemoryProperties(
2226 	VkPhysicalDevice                            physicalDevice,
2227 	VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
2228 {
2229 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2230 
2231 	*pMemoryProperties = physical_device->memory_properties;
2232 }
2233 
2234 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2235 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2236 				  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2237 {
2238 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2239 	VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2240 
2241 	/* For all memory heaps, the computation of budget is as follow:
2242 	 *	heap_budget = heap_size - global_heap_usage + app_heap_usage
2243 	 *
2244 	 * The Vulkan spec 1.1.97 says that the budget should include any
2245 	 * currently allocated device memory.
2246 	 *
2247 	 * Note that the application heap usages are not really accurate (eg.
2248 	 * in presence of shared buffers).
2249 	 */
2250 	unsigned mask = device->heaps;
2251 	unsigned heap = 0;
2252 	while (mask) {
2253 		uint64_t internal_usage = 0, total_usage = 0;
2254 		unsigned type = 1u << u_bit_scan(&mask);
2255 
2256 		switch(type) {
2257 		case RADV_HEAP_VRAM:
2258 			internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2259 			total_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2260 			break;
2261 		case RADV_HEAP_VRAM_VIS:
2262 			internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2263 			if (!(device->heaps & RADV_HEAP_VRAM))
2264 				internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2265 			total_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2266 			break;
2267 		case RADV_HEAP_GTT:
2268 			internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2269 			total_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2270 			break;
2271 		}
2272 
2273 		uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2274 			MIN2(device->memory_properties.memoryHeaps[heap].size,
2275 			     total_usage);
2276 		memoryBudget->heapBudget[heap] = free_space + internal_usage;
2277 		memoryBudget->heapUsage[heap] = internal_usage;
2278 		++heap;
2279 	}
2280 
2281 	assert(heap == memory_properties->memoryHeapCount);
2282 
2283 	/* The heapBudget and heapUsage values must be zero for array elements
2284 	 * greater than or equal to
2285 	 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2286 	 */
2287 	for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2288 		memoryBudget->heapBudget[i] = 0;
2289 		memoryBudget->heapUsage[i] = 0;
2290 	}
2291 }
2292 
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2293 void radv_GetPhysicalDeviceMemoryProperties2(
2294 	VkPhysicalDevice                            physicalDevice,
2295 	VkPhysicalDeviceMemoryProperties2          *pMemoryProperties)
2296 {
2297 	radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2298 					       &pMemoryProperties->memoryProperties);
2299 
2300 	VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2301 		vk_find_struct(pMemoryProperties->pNext,
2302 			       PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2303 	if (memory_budget)
2304 		radv_get_memory_budget_properties(physicalDevice, memory_budget);
2305 }
2306 
radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)2307 VkResult radv_GetMemoryHostPointerPropertiesEXT(
2308 	VkDevice                                    _device,
2309 	VkExternalMemoryHandleTypeFlagBits          handleType,
2310 	const void                                 *pHostPointer,
2311 	VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
2312 {
2313 	RADV_FROM_HANDLE(radv_device, device, _device);
2314 
2315 	switch (handleType)
2316 	{
2317 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2318 		const struct radv_physical_device *physical_device = device->physical_device;
2319 		uint32_t memoryTypeBits = 0;
2320 		for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2321 			if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2322 			    !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2323 				memoryTypeBits = (1 << i);
2324 				break;
2325 			}
2326 		}
2327 		pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2328 		return VK_SUCCESS;
2329 	}
2330 	default:
2331 		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2332 	}
2333 }
2334 
2335 static enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT * pObj)2336 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2337 {
2338 	/* Default to MEDIUM when a specific global priority isn't requested */
2339 	if (!pObj)
2340 		return RADEON_CTX_PRIORITY_MEDIUM;
2341 
2342 	switch(pObj->globalPriority) {
2343 	case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2344 		return RADEON_CTX_PRIORITY_REALTIME;
2345 	case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2346 		return RADEON_CTX_PRIORITY_HIGH;
2347 	case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2348 		return RADEON_CTX_PRIORITY_MEDIUM;
2349 	case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2350 		return RADEON_CTX_PRIORITY_LOW;
2351 	default:
2352 		unreachable("Illegal global priority value");
2353 		return RADEON_CTX_PRIORITY_INVALID;
2354 	}
2355 }
2356 
2357 static int
radv_queue_init(struct radv_device * device,struct radv_queue * queue,uint32_t queue_family_index,int idx,VkDeviceQueueCreateFlags flags,const VkDeviceQueueGlobalPriorityCreateInfoEXT * global_priority)2358 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
2359 		uint32_t queue_family_index, int idx,
2360 		VkDeviceQueueCreateFlags flags,
2361 		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2362 {
2363 	queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
2364 	queue->device = device;
2365 	queue->queue_family_index = queue_family_index;
2366 	queue->queue_idx = idx;
2367 	queue->priority = radv_get_queue_global_priority(global_priority);
2368 	queue->flags = flags;
2369 	queue->hw_ctx = NULL;
2370 
2371 	VkResult result = device->ws->ctx_create(device->ws, queue->priority, &queue->hw_ctx);
2372 	if (result != VK_SUCCESS)
2373 		return vk_error(device->instance, result);
2374 
2375 	list_inithead(&queue->pending_submissions);
2376 	pthread_mutex_init(&queue->pending_mutex, NULL);
2377 
2378 	pthread_mutex_init(&queue->thread_mutex, NULL);
2379 	queue->thread_submission = NULL;
2380 	queue->thread_running = queue->thread_exit = false;
2381 	result = radv_create_pthread_cond(&queue->thread_cond);
2382 	if (result != VK_SUCCESS)
2383 		return vk_error(device->instance, result);
2384 
2385 	return VK_SUCCESS;
2386 }
2387 
2388 static void
radv_queue_finish(struct radv_queue * queue)2389 radv_queue_finish(struct radv_queue *queue)
2390 {
2391 	if (queue->thread_running) {
2392 		p_atomic_set(&queue->thread_exit, true);
2393 		pthread_cond_broadcast(&queue->thread_cond);
2394 		pthread_join(queue->submission_thread, NULL);
2395 	}
2396 	pthread_cond_destroy(&queue->thread_cond);
2397 	pthread_mutex_destroy(&queue->pending_mutex);
2398 	pthread_mutex_destroy(&queue->thread_mutex);
2399 
2400 	if (queue->hw_ctx)
2401 		queue->device->ws->ctx_destroy(queue->hw_ctx);
2402 
2403 	if (queue->initial_full_flush_preamble_cs)
2404 		queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2405 	if (queue->initial_preamble_cs)
2406 		queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2407 	if (queue->continue_preamble_cs)
2408 		queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2409 	if (queue->descriptor_bo)
2410 		queue->device->ws->buffer_destroy(queue->descriptor_bo);
2411 	if (queue->scratch_bo)
2412 		queue->device->ws->buffer_destroy(queue->scratch_bo);
2413 	if (queue->esgs_ring_bo)
2414 		queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2415 	if (queue->gsvs_ring_bo)
2416 		queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2417 	if (queue->tess_rings_bo)
2418 		queue->device->ws->buffer_destroy(queue->tess_rings_bo);
2419 	if (queue->gds_bo)
2420 		queue->device->ws->buffer_destroy(queue->gds_bo);
2421 	if (queue->gds_oa_bo)
2422 		queue->device->ws->buffer_destroy(queue->gds_oa_bo);
2423 	if (queue->compute_scratch_bo)
2424 		queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2425 }
2426 
2427 static void
radv_bo_list_init(struct radv_bo_list * bo_list)2428 radv_bo_list_init(struct radv_bo_list *bo_list)
2429 {
2430 	u_rwlock_init(&bo_list->rwlock);
2431 	bo_list->list.count = bo_list->capacity = 0;
2432 	bo_list->list.bos = NULL;
2433 }
2434 
2435 static void
radv_bo_list_finish(struct radv_bo_list * bo_list)2436 radv_bo_list_finish(struct radv_bo_list *bo_list)
2437 {
2438 	free(bo_list->list.bos);
2439 	u_rwlock_destroy(&bo_list->rwlock);
2440 }
2441 
radv_bo_list_add(struct radv_device * device,struct radeon_winsys_bo * bo)2442 VkResult radv_bo_list_add(struct radv_device *device,
2443 			  struct radeon_winsys_bo *bo)
2444 {
2445 	struct radv_bo_list *bo_list = &device->bo_list;
2446 
2447 	if (bo->is_local)
2448 		return VK_SUCCESS;
2449 
2450 	if (unlikely(!device->use_global_bo_list))
2451 		return VK_SUCCESS;
2452 
2453 	u_rwlock_wrlock(&bo_list->rwlock);
2454 	if (bo_list->list.count == bo_list->capacity) {
2455 		unsigned capacity = MAX2(4, bo_list->capacity * 2);
2456 		void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
2457 
2458 		if (!data) {
2459 			u_rwlock_wrunlock(&bo_list->rwlock);
2460 			return VK_ERROR_OUT_OF_HOST_MEMORY;
2461 		}
2462 
2463 		bo_list->list.bos = (struct radeon_winsys_bo**)data;
2464 		bo_list->capacity = capacity;
2465 	}
2466 
2467 	bo_list->list.bos[bo_list->list.count++] = bo;
2468 	u_rwlock_wrunlock(&bo_list->rwlock);
2469 	return VK_SUCCESS;
2470 }
2471 
radv_bo_list_remove(struct radv_device * device,struct radeon_winsys_bo * bo)2472 void radv_bo_list_remove(struct radv_device *device,
2473 			 struct radeon_winsys_bo *bo)
2474 {
2475 	struct radv_bo_list *bo_list = &device->bo_list;
2476 
2477 	if (bo->is_local)
2478 		return;
2479 
2480 	if (unlikely(!device->use_global_bo_list))
2481 		return;
2482 
2483 	u_rwlock_wrlock(&bo_list->rwlock);
2484 	/* Loop the list backwards so we find the most recently added
2485 	 * memory first. */
2486 	for(unsigned i = bo_list->list.count; i-- > 0;) {
2487 		if (bo_list->list.bos[i] == bo) {
2488 			bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
2489 			--bo_list->list.count;
2490 			break;
2491 		}
2492 	}
2493 	u_rwlock_wrunlock(&bo_list->rwlock);
2494 }
2495 
2496 static void
radv_device_init_gs_info(struct radv_device * device)2497 radv_device_init_gs_info(struct radv_device *device)
2498 {
2499 	device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
2500 						       device->physical_device->rad_info.family);
2501 }
2502 
radv_get_device_extension_index(const char * name)2503 static int radv_get_device_extension_index(const char *name)
2504 {
2505 	for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
2506 		if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
2507 			return i;
2508 	}
2509 	return -1;
2510 }
2511 
2512 static int
radv_get_int_debug_option(const char * name,int default_value)2513 radv_get_int_debug_option(const char *name, int default_value)
2514 {
2515 	const char *str;
2516 	int result;
2517 
2518 	str = getenv(name);
2519 	if (!str) {
2520 		result = default_value;
2521 	} else {
2522 		char *endptr;
2523 
2524 		result = strtol(str, &endptr, 0);
2525 		if (str == endptr) {
2526 			/* No digits founs. */
2527 			result = default_value;
2528 		}
2529 	}
2530 
2531 	return result;
2532 }
2533 
radv_thread_trace_enabled()2534 static bool radv_thread_trace_enabled()
2535 {
2536 	return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
2537 	       getenv("RADV_THREAD_TRACE_TRIGGER");
2538 }
2539 
2540 static void
radv_device_init_dispatch(struct radv_device * device)2541 radv_device_init_dispatch(struct radv_device *device)
2542 {
2543 	const struct radv_instance *instance = device->physical_device->instance;
2544 	const struct radv_device_dispatch_table *dispatch_table_layer = NULL;
2545 	bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
2546 
2547 	if (radv_thread_trace_enabled()) {
2548 		/* Use device entrypoints from the SQTT layer if enabled. */
2549 		dispatch_table_layer = &sqtt_device_dispatch_table;
2550 	}
2551 
2552 	for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) {
2553 		/* Vulkan requires that entrypoints for extensions which have not been
2554 		 * enabled must not be advertised.
2555 		 */
2556 		if (!unchecked &&
2557 		    !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
2558 						       &instance->enabled_extensions,
2559 						       &device->enabled_extensions)) {
2560 			device->dispatch.entrypoints[i] = NULL;
2561 		} else if (dispatch_table_layer &&
2562 			   dispatch_table_layer->entrypoints[i]) {
2563 			device->dispatch.entrypoints[i] =
2564 				dispatch_table_layer->entrypoints[i];
2565 		} else {
2566 			device->dispatch.entrypoints[i] =
2567 				radv_device_dispatch_table.entrypoints[i];
2568 		}
2569 	}
2570 }
2571 
2572 static VkResult
radv_create_pthread_cond(pthread_cond_t * cond)2573 radv_create_pthread_cond(pthread_cond_t *cond)
2574 {
2575 	pthread_condattr_t condattr;
2576 	if (pthread_condattr_init(&condattr)) {
2577 		return VK_ERROR_INITIALIZATION_FAILED;
2578 	}
2579 
2580 	if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) {
2581 		pthread_condattr_destroy(&condattr);
2582 		return VK_ERROR_INITIALIZATION_FAILED;
2583 	}
2584 	if (pthread_cond_init(cond, &condattr)) {
2585 		pthread_condattr_destroy(&condattr);
2586 		return VK_ERROR_INITIALIZATION_FAILED;
2587 	}
2588 	pthread_condattr_destroy(&condattr);
2589 	return VK_SUCCESS;
2590 }
2591 
2592 static VkResult
check_physical_device_features(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceFeatures * features)2593 check_physical_device_features(VkPhysicalDevice physicalDevice,
2594 			       const VkPhysicalDeviceFeatures *features)
2595 {
2596 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2597 	VkPhysicalDeviceFeatures supported_features;
2598 	radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
2599 	VkBool32 *supported_feature = (VkBool32 *)&supported_features;
2600 	VkBool32 *enabled_feature = (VkBool32 *)features;
2601 	unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
2602 	for (uint32_t i = 0; i < num_features; i++) {
2603 		if (enabled_feature[i] && !supported_feature[i])
2604 			return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
2605 	}
2606 
2607 	return VK_SUCCESS;
2608 }
2609 
radv_device_init_border_color(struct radv_device * device)2610 static VkResult radv_device_init_border_color(struct radv_device *device)
2611 {
2612 	device->border_color_data.bo =
2613 	device->ws->buffer_create(device->ws,
2614 					RADV_BORDER_COLOR_BUFFER_SIZE,
2615 					4096,
2616 					RADEON_DOMAIN_VRAM,
2617 					RADEON_FLAG_CPU_ACCESS |
2618 					RADEON_FLAG_READ_ONLY |
2619 					RADEON_FLAG_NO_INTERPROCESS_SHARING,
2620 					RADV_BO_PRIORITY_SHADER);
2621 
2622 	if (device->border_color_data.bo == NULL)
2623 		return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2624 
2625 	device->border_color_data.colors_gpu_ptr =
2626 		device->ws->buffer_map(device->border_color_data.bo);
2627 	if (!device->border_color_data.colors_gpu_ptr)
2628 		return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2629 	pthread_mutex_init(&device->border_color_data.mutex, NULL);
2630 
2631 	return VK_SUCCESS;
2632 }
2633 
radv_device_finish_border_color(struct radv_device * device)2634 static void radv_device_finish_border_color(struct radv_device *device)
2635 {
2636 	if (device->border_color_data.bo) {
2637 		device->ws->buffer_destroy(device->border_color_data.bo);
2638 
2639 		pthread_mutex_destroy(&device->border_color_data.mutex);
2640 	}
2641 }
2642 
2643 VkResult
_radv_device_set_lost(struct radv_device * device,const char * file,int line,const char * msg,...)2644 _radv_device_set_lost(struct radv_device *device,
2645 		      const char *file, int line,
2646 		      const char *msg, ...)
2647 {
2648 	VkResult err;
2649 	va_list ap;
2650 
2651 	p_atomic_inc(&device->lost);
2652 
2653 	va_start(ap, msg);
2654 	err = __vk_errorv(device->physical_device->instance, device,
2655 			  VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
2656 			  VK_ERROR_DEVICE_LOST, file, line, msg, ap);
2657 	va_end(ap);
2658 
2659 	return err;
2660 }
2661 
radv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2662 VkResult radv_CreateDevice(
2663 	VkPhysicalDevice                            physicalDevice,
2664 	const VkDeviceCreateInfo*                   pCreateInfo,
2665 	const VkAllocationCallbacks*                pAllocator,
2666 	VkDevice*                                   pDevice)
2667 {
2668 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2669 	VkResult result;
2670 	struct radv_device *device;
2671 
2672 	bool keep_shader_info = false;
2673 	bool robust_buffer_access = false;
2674 	bool overallocation_disallowed = false;
2675 	bool custom_border_colors = false;
2676 
2677 	/* Check enabled features */
2678 	if (pCreateInfo->pEnabledFeatures) {
2679 		result = check_physical_device_features(physicalDevice,
2680 							pCreateInfo->pEnabledFeatures);
2681 		if (result != VK_SUCCESS)
2682 			return result;
2683 
2684 		if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
2685 			robust_buffer_access = true;
2686 	}
2687 
2688 	vk_foreach_struct_const(ext, pCreateInfo->pNext) {
2689 		switch (ext->sType) {
2690 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
2691 			const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
2692 			result = check_physical_device_features(physicalDevice,
2693 								&features->features);
2694 			if (result != VK_SUCCESS)
2695 				return result;
2696 
2697 			if (features->features.robustBufferAccess)
2698 				robust_buffer_access = true;
2699 			break;
2700 		}
2701 		case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
2702 			const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
2703 			if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
2704 				overallocation_disallowed = true;
2705 			break;
2706 		}
2707 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2708 			const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
2709 			custom_border_colors = border_color_features->customBorderColors;
2710 			break;
2711 		}
2712 		default:
2713 			break;
2714 		}
2715 	}
2716 
2717 	device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
2718 			    sizeof(*device), 8,
2719 			    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2720 	if (!device)
2721 		return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2722 
2723 	vk_device_init(&device->vk, pCreateInfo,
2724 		       &physical_device->instance->alloc, pAllocator);
2725 
2726 	device->instance = physical_device->instance;
2727 	device->physical_device = physical_device;
2728 
2729 	device->ws = physical_device->ws;
2730 
2731 	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
2732 		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
2733 		int index = radv_get_device_extension_index(ext_name);
2734 		if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
2735 			vk_free(&device->vk.alloc, device);
2736 			return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
2737 		}
2738 
2739 		device->enabled_extensions.extensions[index] = true;
2740 	}
2741 
2742 	radv_device_init_dispatch(device);
2743 
2744 	keep_shader_info = device->enabled_extensions.AMD_shader_info;
2745 
2746 	/* With update after bind we can't attach bo's to the command buffer
2747 	 * from the descriptor set anymore, so we have to use a global BO list.
2748 	 */
2749 	device->use_global_bo_list =
2750 		(device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
2751 		device->enabled_extensions.EXT_descriptor_indexing ||
2752 		device->enabled_extensions.EXT_buffer_device_address ||
2753 		device->enabled_extensions.KHR_buffer_device_address;
2754 
2755 	device->robust_buffer_access = robust_buffer_access;
2756 
2757 	mtx_init(&device->shader_slab_mutex, mtx_plain);
2758 	list_inithead(&device->shader_slabs);
2759 
2760 	device->overallocation_disallowed = overallocation_disallowed;
2761 	mtx_init(&device->overallocation_mutex, mtx_plain);
2762 
2763 	radv_bo_list_init(&device->bo_list);
2764 
2765 	for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2766 		const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
2767 		uint32_t qfi = queue_create->queueFamilyIndex;
2768 		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
2769 			vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
2770 
2771 		assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
2772 
2773 		device->queues[qfi] = vk_alloc(&device->vk.alloc,
2774 					       queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2775 		if (!device->queues[qfi]) {
2776 			result = VK_ERROR_OUT_OF_HOST_MEMORY;
2777 			goto fail;
2778 		}
2779 
2780 		memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
2781 
2782 		device->queue_count[qfi] = queue_create->queueCount;
2783 
2784 		for (unsigned q = 0; q < queue_create->queueCount; q++) {
2785 			result = radv_queue_init(device, &device->queues[qfi][q],
2786 						 qfi, q, queue_create->flags,
2787 						 global_priority);
2788 			if (result != VK_SUCCESS)
2789 				goto fail;
2790 		}
2791 	}
2792 
2793 	device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
2794 			      !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
2795 
2796 	/* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
2797 	device->dfsm_allowed = device->pbb_allowed &&
2798 	                       (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
2799 
2800 	device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
2801 
2802 	/* The maximum number of scratch waves. Scratch space isn't divided
2803 	 * evenly between CUs. The number is only a function of the number of CUs.
2804 	 * We can decrease the constant to decrease the scratch buffer size.
2805 	 *
2806 	 * sctx->scratch_waves must be >= the maximum possible size of
2807 	 * 1 threadgroup, so that the hw doesn't hang from being unable
2808 	 * to start any.
2809 	 *
2810 	 * The recommended value is 4 per CU at most. Higher numbers don't
2811 	 * bring much benefit, but they still occupy chip resources (think
2812 	 * async compute). I've seen ~2% performance difference between 4 and 32.
2813 	 */
2814 	uint32_t max_threads_per_block = 2048;
2815 	device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
2816 				     max_threads_per_block / 64);
2817 
2818 	device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
2819 
2820 	if (device->physical_device->rad_info.chip_class >= GFX7) {
2821 		/* If the KMD allows it (there is a KMD hw register for it),
2822 		 * allow launching waves out-of-order.
2823 		 */
2824 		device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
2825 	}
2826 
2827 	radv_device_init_gs_info(device);
2828 
2829 	device->tess_offchip_block_dw_size =
2830 		device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
2831 
2832 	if (getenv("RADV_TRACE_FILE")) {
2833 		fprintf(stderr, "***********************************************************************************\n");
2834 		fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
2835 		fprintf(stderr, "***********************************************************************************\n");
2836 		abort();
2837 	}
2838 
2839 	if (device->instance->debug_flags & RADV_DEBUG_HANG) {
2840 		/* Enable GPU hangs detection and dump logs if a GPU hang is
2841 		 * detected.
2842 		 */
2843 		keep_shader_info = true;
2844 
2845 		if (!radv_init_trace(device))
2846 			goto fail;
2847 
2848 		fprintf(stderr, "*****************************************************************************\n");
2849 		fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
2850 		fprintf(stderr, "*****************************************************************************\n");
2851 
2852 		/* Wait for idle after every draw/dispatch to identify the
2853 		 * first bad call.
2854 		 */
2855 		device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
2856 
2857 		radv_dump_enabled_options(device, stderr);
2858 	}
2859 
2860 	if (radv_thread_trace_enabled()) {
2861 		fprintf(stderr, "*************************************************\n");
2862 		fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
2863 		fprintf(stderr, "*************************************************\n");
2864 
2865 		if (device->physical_device->rad_info.chip_class < GFX8) {
2866 			fprintf(stderr, "GPU hardware not supported: refer to "
2867 					"the RGP documentation for the list of "
2868 					"supported GPUs!\n");
2869 			abort();
2870 		}
2871 
2872 		if (device->physical_device->rad_info.chip_class > GFX10) {
2873 			fprintf(stderr, "radv: Thread trace is not supported "
2874 					"for that GPU!\n");
2875 			exit(1);
2876 		}
2877 
2878 		/* Default buffer size set to 1MB per SE. */
2879 		device->thread_trace_buffer_size =
2880 			radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
2881 		device->thread_trace_start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
2882 
2883 		const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
2884 		if (trigger_file)
2885 			device->thread_trace_trigger_file = strdup(trigger_file);
2886 
2887 		if (!radv_thread_trace_init(device))
2888 			goto fail;
2889 	}
2890 
2891 	if (getenv("RADV_TRAP_HANDLER")) {
2892 		/* TODO: Add support for more hardware. */
2893 		assert(device->physical_device->rad_info.chip_class == GFX8);
2894 
2895 		fprintf(stderr, "**********************************************************************\n");
2896 		fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
2897 		fprintf(stderr, "**********************************************************************\n");
2898 
2899 		/* To get the disassembly of the faulty shaders, we have to
2900 		 * keep some shader info around.
2901 		 */
2902 		keep_shader_info = true;
2903 
2904 		if (!radv_trap_handler_init(device))
2905 			goto fail;
2906 	}
2907 
2908 	device->keep_shader_info = keep_shader_info;
2909 	result = radv_device_init_meta(device);
2910 	if (result != VK_SUCCESS)
2911 		goto fail;
2912 
2913 	radv_device_init_msaa(device);
2914 
2915  	/* If the border color extension is enabled, let's create the buffer we need. */
2916 	if (custom_border_colors) {
2917 		result = radv_device_init_border_color(device);
2918 		if (result != VK_SUCCESS)
2919 			goto fail;
2920 	}
2921 
2922 	for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
2923 		device->empty_cs[family] = device->ws->cs_create(device->ws, family);
2924 		if (!device->empty_cs[family])
2925 			goto fail;
2926 
2927 		switch (family) {
2928 		case RADV_QUEUE_GENERAL:
2929 			radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
2930 			radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
2931 			radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
2932 			break;
2933 		case RADV_QUEUE_COMPUTE:
2934 			radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
2935 			radeon_emit(device->empty_cs[family], 0);
2936 			break;
2937 		}
2938 
2939 		result = device->ws->cs_finalize(device->empty_cs[family]);
2940 		if (result != VK_SUCCESS)
2941 			goto fail;
2942 	}
2943 
2944 	if (device->physical_device->rad_info.chip_class >= GFX7)
2945 		cik_create_gfx_config(device);
2946 
2947 	VkPipelineCacheCreateInfo ci;
2948 	ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
2949 	ci.pNext = NULL;
2950 	ci.flags = 0;
2951 	ci.pInitialData = NULL;
2952 	ci.initialDataSize = 0;
2953 	VkPipelineCache pc;
2954 	result = radv_CreatePipelineCache(radv_device_to_handle(device),
2955 					  &ci, NULL, &pc);
2956 	if (result != VK_SUCCESS)
2957 		goto fail_meta;
2958 
2959 	device->mem_cache = radv_pipeline_cache_from_handle(pc);
2960 
2961 	result = radv_create_pthread_cond(&device->timeline_cond);
2962 	if (result != VK_SUCCESS)
2963 		goto fail_mem_cache;
2964 
2965 	device->force_aniso =
2966 		MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
2967 	if (device->force_aniso >= 0) {
2968 		fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
2969 			1 << util_logbase2(device->force_aniso));
2970 	}
2971 
2972 	*pDevice = radv_device_to_handle(device);
2973 	return VK_SUCCESS;
2974 
2975 fail_mem_cache:
2976 	radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
2977 fail_meta:
2978 	radv_device_finish_meta(device);
2979 fail:
2980 	radv_bo_list_finish(&device->bo_list);
2981 
2982 	radv_thread_trace_finish(device);
2983 	free(device->thread_trace_trigger_file);
2984 
2985 	radv_trap_handler_finish(device);
2986 
2987 	if (device->trace_bo)
2988 		device->ws->buffer_destroy(device->trace_bo);
2989 
2990 	if (device->gfx_init)
2991 		device->ws->buffer_destroy(device->gfx_init);
2992 
2993 	radv_device_finish_border_color(device);
2994 
2995 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2996 		for (unsigned q = 0; q < device->queue_count[i]; q++)
2997 			radv_queue_finish(&device->queues[i][q]);
2998 		if (device->queue_count[i])
2999 			vk_free(&device->vk.alloc, device->queues[i]);
3000 	}
3001 
3002 	vk_free(&device->vk.alloc, device);
3003 	return result;
3004 }
3005 
radv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)3006 void radv_DestroyDevice(
3007 	VkDevice                                    _device,
3008 	const VkAllocationCallbacks*                pAllocator)
3009 {
3010 	RADV_FROM_HANDLE(radv_device, device, _device);
3011 
3012 	if (!device)
3013 		return;
3014 
3015 	if (device->trace_bo)
3016 		device->ws->buffer_destroy(device->trace_bo);
3017 
3018 	if (device->gfx_init)
3019 		device->ws->buffer_destroy(device->gfx_init);
3020 
3021 	radv_device_finish_border_color(device);
3022 
3023 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3024 		for (unsigned q = 0; q < device->queue_count[i]; q++)
3025 			radv_queue_finish(&device->queues[i][q]);
3026 		if (device->queue_count[i])
3027 			vk_free(&device->vk.alloc, device->queues[i]);
3028 		if (device->empty_cs[i])
3029 			device->ws->cs_destroy(device->empty_cs[i]);
3030 	}
3031 	radv_device_finish_meta(device);
3032 
3033 	VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3034 	radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3035 
3036 	radv_trap_handler_finish(device);
3037 
3038 	radv_destroy_shader_slabs(device);
3039 
3040 	pthread_cond_destroy(&device->timeline_cond);
3041 	radv_bo_list_finish(&device->bo_list);
3042 
3043 	free(device->thread_trace_trigger_file);
3044 	radv_thread_trace_finish(device);
3045 
3046 	vk_free(&device->vk.alloc, device);
3047 }
3048 
radv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)3049 VkResult radv_EnumerateInstanceLayerProperties(
3050 	uint32_t*                                   pPropertyCount,
3051 	VkLayerProperties*                          pProperties)
3052 {
3053 	if (pProperties == NULL) {
3054 		*pPropertyCount = 0;
3055 		return VK_SUCCESS;
3056 	}
3057 
3058 	/* None supported at this time */
3059 	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3060 }
3061 
radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)3062 VkResult radv_EnumerateDeviceLayerProperties(
3063 	VkPhysicalDevice                            physicalDevice,
3064 	uint32_t*                                   pPropertyCount,
3065 	VkLayerProperties*                          pProperties)
3066 {
3067 	if (pProperties == NULL) {
3068 		*pPropertyCount = 0;
3069 		return VK_SUCCESS;
3070 	}
3071 
3072 	/* None supported at this time */
3073 	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3074 }
3075 
radv_GetDeviceQueue2(VkDevice _device,const VkDeviceQueueInfo2 * pQueueInfo,VkQueue * pQueue)3076 void radv_GetDeviceQueue2(
3077 	VkDevice                                    _device,
3078 	const VkDeviceQueueInfo2*                   pQueueInfo,
3079 	VkQueue*                                    pQueue)
3080 {
3081 	RADV_FROM_HANDLE(radv_device, device, _device);
3082 	struct radv_queue *queue;
3083 
3084 	queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
3085 	if (pQueueInfo->flags != queue->flags) {
3086 		/* From the Vulkan 1.1.70 spec:
3087 		 *
3088 		 * "The queue returned by vkGetDeviceQueue2 must have the same
3089 		 * flags value from this structure as that used at device
3090 		 * creation time in a VkDeviceQueueCreateInfo instance. If no
3091 		 * matching flags were specified at device creation time then
3092 		 * pQueue will return VK_NULL_HANDLE."
3093 		 */
3094 		*pQueue = VK_NULL_HANDLE;
3095 		return;
3096 	}
3097 
3098 	*pQueue = radv_queue_to_handle(queue);
3099 }
3100 
radv_GetDeviceQueue(VkDevice _device,uint32_t queueFamilyIndex,uint32_t queueIndex,VkQueue * pQueue)3101 void radv_GetDeviceQueue(
3102 	VkDevice                                    _device,
3103 	uint32_t                                    queueFamilyIndex,
3104 	uint32_t                                    queueIndex,
3105 	VkQueue*                                    pQueue)
3106 {
3107 	const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
3108 		.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
3109 		.queueFamilyIndex = queueFamilyIndex,
3110 		.queueIndex = queueIndex
3111 	};
3112 
3113 	radv_GetDeviceQueue2(_device, &info, pQueue);
3114 }
3115 
3116 static void
fill_geom_tess_rings(struct radv_queue * queue,uint32_t * map,bool add_sample_positions,uint32_t esgs_ring_size,struct radeon_winsys_bo * esgs_ring_bo,uint32_t gsvs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t tess_factor_ring_size,uint32_t tess_offchip_ring_offset,uint32_t tess_offchip_ring_size,struct radeon_winsys_bo * tess_rings_bo)3117 fill_geom_tess_rings(struct radv_queue *queue,
3118 		     uint32_t *map,
3119 		     bool add_sample_positions,
3120 		     uint32_t esgs_ring_size,
3121 		     struct radeon_winsys_bo *esgs_ring_bo,
3122 		     uint32_t gsvs_ring_size,
3123 		     struct radeon_winsys_bo *gsvs_ring_bo,
3124 		     uint32_t tess_factor_ring_size,
3125 		     uint32_t tess_offchip_ring_offset,
3126 		     uint32_t tess_offchip_ring_size,
3127 		     struct radeon_winsys_bo *tess_rings_bo)
3128 {
3129 	uint32_t *desc = &map[4];
3130 
3131 	if (esgs_ring_bo) {
3132 		uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3133 
3134 		/* stride 0, num records - size, add tid, swizzle, elsize4,
3135 		   index stride 64 */
3136 		desc[0] = esgs_va;
3137 		desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
3138 			  S_008F04_SWIZZLE_ENABLE(true);
3139 		desc[2] = esgs_ring_size;
3140 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3141 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3142 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3143 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3144 			  S_008F0C_INDEX_STRIDE(3) |
3145 			  S_008F0C_ADD_TID_ENABLE(1);
3146 
3147 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3148 			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3149 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3150 				   S_008F0C_RESOURCE_LEVEL(1);
3151 		} else {
3152 			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3153 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3154 				   S_008F0C_ELEMENT_SIZE(1);
3155 		}
3156 
3157 		/* GS entry for ES->GS ring */
3158 		/* stride 0, num records - size, elsize0,
3159 		   index stride 0 */
3160 		desc[4] = esgs_va;
3161 		desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3162 		desc[6] = esgs_ring_size;
3163 		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3164 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3165 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3166 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3167 
3168 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3169 			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3170 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3171 				   S_008F0C_RESOURCE_LEVEL(1);
3172 		} else {
3173 			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3174 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3175 		}
3176 	}
3177 
3178 	desc += 8;
3179 
3180 	if (gsvs_ring_bo) {
3181 		uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3182 
3183 		/* VS entry for GS->VS ring */
3184 		/* stride 0, num records - size, elsize0,
3185 		   index stride 0 */
3186 		desc[0] = gsvs_va;
3187 		desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3188 		desc[2] = gsvs_ring_size;
3189 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3190 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3191 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3192 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3193 
3194 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3195 			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3196 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3197 				   S_008F0C_RESOURCE_LEVEL(1);
3198 		} else {
3199 			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3200 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3201 		}
3202 
3203 		/* stride gsvs_itemsize, num records 64
3204 		   elsize 4, index stride 16 */
3205 		/* shader will patch stride and desc[2] */
3206 		desc[4] = gsvs_va;
3207 		desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
3208 			  S_008F04_SWIZZLE_ENABLE(1);
3209 		desc[6] = 0;
3210 		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3211 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3212 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3213 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3214 			  S_008F0C_INDEX_STRIDE(1) |
3215 			  S_008F0C_ADD_TID_ENABLE(true);
3216 
3217 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3218 			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3219 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3220 				   S_008F0C_RESOURCE_LEVEL(1);
3221 		} else {
3222 			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3223 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3224 				   S_008F0C_ELEMENT_SIZE(1);
3225 		}
3226 
3227 	}
3228 
3229 	desc += 8;
3230 
3231 	if (tess_rings_bo) {
3232 		uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3233 		uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
3234 
3235 		desc[0] = tess_va;
3236 		desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3237 		desc[2] = tess_factor_ring_size;
3238 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3239 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3240 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3241 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3242 
3243 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3244 			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3245 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3246 				   S_008F0C_RESOURCE_LEVEL(1);
3247 		} else {
3248 			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3249 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3250 		}
3251 
3252 		desc[4] = tess_offchip_va;
3253 		desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3254 		desc[6] = tess_offchip_ring_size;
3255 		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3256 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3257 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3258 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3259 
3260 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3261 			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3262 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3263 				   S_008F0C_RESOURCE_LEVEL(1);
3264 		} else {
3265 			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3266 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3267 		}
3268 	}
3269 
3270 	desc += 8;
3271 
3272 	if (add_sample_positions) {
3273 		/* add sample positions after all rings */
3274 		memcpy(desc, queue->device->sample_locations_1x, 8);
3275 		desc += 2;
3276 		memcpy(desc, queue->device->sample_locations_2x, 16);
3277 		desc += 4;
3278 		memcpy(desc, queue->device->sample_locations_4x, 32);
3279 		desc += 8;
3280 		memcpy(desc, queue->device->sample_locations_8x, 64);
3281 	}
3282 }
3283 
3284 static unsigned
radv_get_hs_offchip_param(struct radv_device * device,uint32_t * max_offchip_buffers_p)3285 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
3286 {
3287 	bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
3288 		device->physical_device->rad_info.family != CHIP_CARRIZO &&
3289 		device->physical_device->rad_info.family != CHIP_STONEY;
3290 	unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
3291 	unsigned max_offchip_buffers;
3292 	unsigned offchip_granularity;
3293 	unsigned hs_offchip_param;
3294 
3295 	/*
3296 	 * Per RadeonSI:
3297 	 * This must be one less than the maximum number due to a hw limitation.
3298          * Various hardware bugs need thGFX7
3299 	 *
3300 	 * Per AMDVLK:
3301 	 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3302 	 * Gfx7 should limit max_offchip_buffers to 508
3303 	 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3304 	 *
3305 	 * Follow AMDVLK here.
3306 	 */
3307 	if (device->physical_device->rad_info.chip_class >= GFX10) {
3308 		max_offchip_buffers_per_se = 256;
3309 	} else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
3310 		   device->physical_device->rad_info.chip_class == GFX7 ||
3311 		   device->physical_device->rad_info.chip_class == GFX6)
3312 		--max_offchip_buffers_per_se;
3313 
3314 	max_offchip_buffers = max_offchip_buffers_per_se *
3315 		device->physical_device->rad_info.max_se;
3316 
3317 	/* Hawaii has a bug with offchip buffers > 256 that can be worked
3318 	 * around by setting 4K granularity.
3319 	 */
3320 	if (device->tess_offchip_block_dw_size == 4096) {
3321 		assert(device->physical_device->rad_info.family == CHIP_HAWAII);
3322 		offchip_granularity = V_03093C_X_4K_DWORDS;
3323 	} else {
3324 		assert(device->tess_offchip_block_dw_size == 8192);
3325 		offchip_granularity = V_03093C_X_8K_DWORDS;
3326 	}
3327 
3328 	switch (device->physical_device->rad_info.chip_class) {
3329 	case GFX6:
3330 		max_offchip_buffers = MIN2(max_offchip_buffers, 126);
3331 		break;
3332 	case GFX7:
3333 	case GFX8:
3334 	case GFX9:
3335 		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
3336 		break;
3337 	case GFX10:
3338 		break;
3339 	default:
3340 		break;
3341 	}
3342 
3343 	*max_offchip_buffers_p = max_offchip_buffers;
3344 	if (device->physical_device->rad_info.chip_class >= GFX10_3) {
3345 		hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
3346 				   S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
3347 	} else if (device->physical_device->rad_info.chip_class >= GFX7) {
3348 		if (device->physical_device->rad_info.chip_class >= GFX8)
3349 			--max_offchip_buffers;
3350 		hs_offchip_param =
3351 			S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
3352 			S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
3353 	} else {
3354 		hs_offchip_param =
3355 			S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
3356 	}
3357 	return hs_offchip_param;
3358 }
3359 
3360 static void
radv_emit_gs_ring_sizes(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * esgs_ring_bo,uint32_t esgs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t gsvs_ring_size)3361 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3362 			struct radeon_winsys_bo *esgs_ring_bo,
3363 			uint32_t esgs_ring_size,
3364 			struct radeon_winsys_bo *gsvs_ring_bo,
3365 			uint32_t gsvs_ring_size)
3366 {
3367 	if (!esgs_ring_bo && !gsvs_ring_bo)
3368 		return;
3369 
3370 	if (esgs_ring_bo)
3371 		radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
3372 
3373 	if (gsvs_ring_bo)
3374 		radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
3375 
3376 	if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3377 		radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3378 		radeon_emit(cs, esgs_ring_size >> 8);
3379 		radeon_emit(cs, gsvs_ring_size >> 8);
3380 	} else {
3381 		radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3382 		radeon_emit(cs, esgs_ring_size >> 8);
3383 		radeon_emit(cs, gsvs_ring_size >> 8);
3384 	}
3385 }
3386 
3387 static void
radv_emit_tess_factor_ring(struct radv_queue * queue,struct radeon_cmdbuf * cs,unsigned hs_offchip_param,unsigned tf_ring_size,struct radeon_winsys_bo * tess_rings_bo)3388 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3389 			   unsigned hs_offchip_param, unsigned tf_ring_size,
3390 			   struct radeon_winsys_bo *tess_rings_bo)
3391 {
3392 	uint64_t tf_va;
3393 
3394 	if (!tess_rings_bo)
3395 		return;
3396 
3397 	tf_va = radv_buffer_get_va(tess_rings_bo);
3398 
3399 	radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
3400 
3401 	if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3402 		radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
3403 				       S_030938_SIZE(tf_ring_size / 4));
3404 		radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
3405 				       tf_va >> 8);
3406 
3407 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3408 			radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
3409 					       S_030984_BASE_HI(tf_va >> 40));
3410 		} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3411 			radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
3412 					       S_030944_BASE_HI(tf_va >> 40));
3413 		}
3414 		radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
3415 				       hs_offchip_param);
3416 	} else {
3417 		radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
3418 				      S_008988_SIZE(tf_ring_size / 4));
3419 		radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
3420 				      tf_va >> 8);
3421 		radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
3422 				     hs_offchip_param);
3423 	}
3424 }
3425 
3426 static void
radv_emit_graphics_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * scratch_bo)3427 radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3428                            uint32_t size_per_wave, uint32_t waves,
3429                            struct radeon_winsys_bo *scratch_bo)
3430 {
3431 	if (queue->queue_family_index != RADV_QUEUE_GENERAL)
3432 		return;
3433 
3434 	if (!scratch_bo)
3435 		return;
3436 
3437 	radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3438 
3439 	radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
3440 	                       S_0286E8_WAVES(waves) |
3441 	                       S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3442 }
3443 
3444 static void
radv_emit_compute_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * compute_scratch_bo)3445 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3446                           uint32_t size_per_wave, uint32_t waves,
3447                           struct radeon_winsys_bo *compute_scratch_bo)
3448 {
3449 	uint64_t scratch_va;
3450 
3451 	if (!compute_scratch_bo)
3452 		return;
3453 
3454 	scratch_va = radv_buffer_get_va(compute_scratch_bo);
3455 
3456 	radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
3457 
3458 	radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
3459 	radeon_emit(cs, scratch_va);
3460 	radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
3461 			S_008F04_SWIZZLE_ENABLE(1));
3462 
3463 	radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
3464 	                 S_00B860_WAVES(waves) |
3465 	                 S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3466 }
3467 
3468 static void
radv_emit_global_shader_pointers(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * descriptor_bo)3469 radv_emit_global_shader_pointers(struct radv_queue *queue,
3470 				 struct radeon_cmdbuf *cs,
3471 				 struct radeon_winsys_bo *descriptor_bo)
3472 {
3473 	uint64_t va;
3474 
3475 	if (!descriptor_bo)
3476 		return;
3477 
3478 	va = radv_buffer_get_va(descriptor_bo);
3479 
3480 	radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
3481 
3482 	if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3483 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3484 				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
3485 				   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3486 				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3487 
3488 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3489 			radv_emit_shader_pointer(queue->device, cs, regs[i],
3490 						 va, true);
3491 		}
3492 	} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3493 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3494 				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
3495 				   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3496 				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3497 
3498 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3499 			radv_emit_shader_pointer(queue->device, cs, regs[i],
3500 						 va, true);
3501 		}
3502 	} else {
3503 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3504 				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
3505 				   R_00B230_SPI_SHADER_USER_DATA_GS_0,
3506 				   R_00B330_SPI_SHADER_USER_DATA_ES_0,
3507 				   R_00B430_SPI_SHADER_USER_DATA_HS_0,
3508 				   R_00B530_SPI_SHADER_USER_DATA_LS_0};
3509 
3510 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3511 			radv_emit_shader_pointer(queue->device, cs, regs[i],
3512 						 va, true);
3513 		}
3514 	}
3515 }
3516 
3517 static void
radv_emit_trap_handler(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * tma_bo)3518 radv_emit_trap_handler(struct radv_queue *queue,
3519 		       struct radeon_cmdbuf *cs,
3520 		       struct radeon_winsys_bo *tma_bo)
3521 {
3522 	struct radv_device *device = queue->device;
3523 	struct radeon_winsys_bo *tba_bo;
3524 	uint64_t tba_va, tma_va;
3525 
3526 	if (!device->trap_handler_shader || !tma_bo)
3527 		return;
3528 
3529 	tba_bo = device->trap_handler_shader->bo;
3530 
3531 	tba_va = radv_buffer_get_va(tba_bo) + device->trap_handler_shader->bo_offset;
3532 	tma_va = radv_buffer_get_va(tma_bo);
3533 
3534 	radv_cs_add_buffer(queue->device->ws, cs, tba_bo);
3535 	radv_cs_add_buffer(queue->device->ws, cs, tma_bo);
3536 
3537 	if (queue->queue_family_index == RADV_QUEUE_GENERAL) {
3538 		uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS,
3539 				   R_00B100_SPI_SHADER_TBA_LO_VS,
3540 				   R_00B200_SPI_SHADER_TBA_LO_GS,
3541 				   R_00B300_SPI_SHADER_TBA_LO_ES,
3542 				   R_00B400_SPI_SHADER_TBA_LO_HS,
3543 				   R_00B500_SPI_SHADER_TBA_LO_LS};
3544 
3545 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3546 			radeon_set_sh_reg_seq(cs, regs[i], 4);
3547 			radeon_emit(cs, tba_va >> 8);
3548 			radeon_emit(cs, tba_va >> 40);
3549 			radeon_emit(cs, tma_va >> 8);
3550 			radeon_emit(cs, tma_va >> 40);
3551 		}
3552 	} else {
3553 		radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
3554 		radeon_emit(cs, tba_va >> 8);
3555 		radeon_emit(cs, tba_va >> 40);
3556 		radeon_emit(cs, tma_va >> 8);
3557 		radeon_emit(cs, tma_va >> 40);
3558 	}
3559 }
3560 
3561 static void
radv_init_graphics_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3562 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3563 {
3564 	struct radv_device *device = queue->device;
3565 
3566 	if (device->gfx_init) {
3567 		uint64_t va = radv_buffer_get_va(device->gfx_init);
3568 
3569 		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
3570 		radeon_emit(cs, va);
3571 		radeon_emit(cs, va >> 32);
3572 		radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
3573 
3574 		radv_cs_add_buffer(device->ws, cs, device->gfx_init);
3575 	} else {
3576 		si_emit_graphics(device, cs);
3577 	}
3578 }
3579 
3580 static void
radv_init_compute_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3581 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3582 {
3583 	si_emit_compute(queue->device, cs);
3584 }
3585 
3586 static VkResult
radv_get_preamble_cs(struct radv_queue * queue,uint32_t scratch_size_per_wave,uint32_t scratch_waves,uint32_t compute_scratch_size_per_wave,uint32_t compute_scratch_waves,uint32_t esgs_ring_size,uint32_t gsvs_ring_size,bool needs_tess_rings,bool needs_gds,bool needs_gds_oa,bool needs_sample_positions,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)3587 radv_get_preamble_cs(struct radv_queue *queue,
3588 		     uint32_t scratch_size_per_wave,
3589 		     uint32_t scratch_waves,
3590 		     uint32_t compute_scratch_size_per_wave,
3591 		     uint32_t compute_scratch_waves,
3592 		     uint32_t esgs_ring_size,
3593 		     uint32_t gsvs_ring_size,
3594 		     bool needs_tess_rings,
3595 		     bool needs_gds,
3596 		     bool needs_gds_oa,
3597 		     bool needs_sample_positions,
3598 		     struct radeon_cmdbuf **initial_full_flush_preamble_cs,
3599                      struct radeon_cmdbuf **initial_preamble_cs,
3600                      struct radeon_cmdbuf **continue_preamble_cs)
3601 {
3602 	struct radeon_winsys_bo *scratch_bo = NULL;
3603 	struct radeon_winsys_bo *descriptor_bo = NULL;
3604 	struct radeon_winsys_bo *compute_scratch_bo = NULL;
3605 	struct radeon_winsys_bo *esgs_ring_bo = NULL;
3606 	struct radeon_winsys_bo *gsvs_ring_bo = NULL;
3607 	struct radeon_winsys_bo *tess_rings_bo = NULL;
3608 	struct radeon_winsys_bo *gds_bo = NULL;
3609 	struct radeon_winsys_bo *gds_oa_bo = NULL;
3610 	struct radeon_cmdbuf *dest_cs[3] = {0};
3611 	bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
3612 	unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
3613 	unsigned max_offchip_buffers;
3614 	unsigned hs_offchip_param = 0;
3615 	unsigned tess_offchip_ring_offset;
3616 	uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
3617 	if (!queue->has_tess_rings) {
3618 		if (needs_tess_rings)
3619 			add_tess_rings = true;
3620 	}
3621 	if (!queue->has_gds) {
3622 		if (needs_gds)
3623 			add_gds = true;
3624 	}
3625 	if (!queue->has_gds_oa) {
3626 		if (needs_gds_oa)
3627 			add_gds_oa = true;
3628 	}
3629 	if (!queue->has_sample_positions) {
3630 		if (needs_sample_positions)
3631 			add_sample_positions = true;
3632 	}
3633 	tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
3634 	hs_offchip_param = radv_get_hs_offchip_param(queue->device,
3635 						     &max_offchip_buffers);
3636 	tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
3637 	tess_offchip_ring_size = max_offchip_buffers *
3638 		queue->device->tess_offchip_block_dw_size * 4;
3639 
3640 	scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
3641 	if (scratch_size_per_wave)
3642 		scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
3643 	else
3644 		scratch_waves = 0;
3645 
3646 	compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
3647 	if (compute_scratch_size_per_wave)
3648 		compute_scratch_waves = MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
3649 	else
3650 		compute_scratch_waves = 0;
3651 
3652 	if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
3653 	    scratch_waves <= queue->scratch_waves &&
3654 	    compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
3655 	    compute_scratch_waves <= queue->compute_scratch_waves &&
3656 	    esgs_ring_size <= queue->esgs_ring_size &&
3657 	    gsvs_ring_size <= queue->gsvs_ring_size &&
3658 	    !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
3659 	    queue->initial_preamble_cs) {
3660 		*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3661 		*initial_preamble_cs = queue->initial_preamble_cs;
3662 		*continue_preamble_cs = queue->continue_preamble_cs;
3663 		if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
3664 		    !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
3665 		    !needs_gds && !needs_gds_oa && !needs_sample_positions)
3666 			*continue_preamble_cs = NULL;
3667 		return VK_SUCCESS;
3668 	}
3669 
3670 	uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
3671 	uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
3672 	if (scratch_size > queue_scratch_size) {
3673 		scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3674 		                                              scratch_size,
3675 		                                              4096,
3676 		                                              RADEON_DOMAIN_VRAM,
3677 		                                              ring_bo_flags,
3678 		                                              RADV_BO_PRIORITY_SCRATCH);
3679 		if (!scratch_bo)
3680 			goto fail;
3681 	} else
3682 		scratch_bo = queue->scratch_bo;
3683 
3684 	uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
3685 	uint32_t compute_queue_scratch_size = queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
3686 	if (compute_scratch_size > compute_queue_scratch_size) {
3687 		compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3688 		                                                      compute_scratch_size,
3689 		                                                      4096,
3690 		                                                      RADEON_DOMAIN_VRAM,
3691 		                                                      ring_bo_flags,
3692 		                                                      RADV_BO_PRIORITY_SCRATCH);
3693 		if (!compute_scratch_bo)
3694 			goto fail;
3695 
3696 	} else
3697 		compute_scratch_bo = queue->compute_scratch_bo;
3698 
3699 	if (esgs_ring_size > queue->esgs_ring_size) {
3700 		esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3701 								esgs_ring_size,
3702 								4096,
3703 								RADEON_DOMAIN_VRAM,
3704 								ring_bo_flags,
3705 								RADV_BO_PRIORITY_SCRATCH);
3706 		if (!esgs_ring_bo)
3707 			goto fail;
3708 	} else {
3709 		esgs_ring_bo = queue->esgs_ring_bo;
3710 		esgs_ring_size = queue->esgs_ring_size;
3711 	}
3712 
3713 	if (gsvs_ring_size > queue->gsvs_ring_size) {
3714 		gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3715 								gsvs_ring_size,
3716 								4096,
3717 								RADEON_DOMAIN_VRAM,
3718 								ring_bo_flags,
3719 								RADV_BO_PRIORITY_SCRATCH);
3720 		if (!gsvs_ring_bo)
3721 			goto fail;
3722 	} else {
3723 		gsvs_ring_bo = queue->gsvs_ring_bo;
3724 		gsvs_ring_size = queue->gsvs_ring_size;
3725 	}
3726 
3727 	if (add_tess_rings) {
3728 		tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
3729 								 tess_offchip_ring_offset + tess_offchip_ring_size,
3730 								 256,
3731 								 RADEON_DOMAIN_VRAM,
3732 								 ring_bo_flags,
3733 								 RADV_BO_PRIORITY_SCRATCH);
3734 		if (!tess_rings_bo)
3735 			goto fail;
3736 	} else {
3737 		tess_rings_bo = queue->tess_rings_bo;
3738 	}
3739 
3740 	if (add_gds) {
3741 		assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3742 
3743 		/* 4 streamout GDS counters.
3744 		 * We need 256B (64 dw) of GDS, otherwise streamout hangs.
3745 		 */
3746 		gds_bo = queue->device->ws->buffer_create(queue->device->ws,
3747 							  256, 4,
3748 							  RADEON_DOMAIN_GDS,
3749 							  ring_bo_flags,
3750 							  RADV_BO_PRIORITY_SCRATCH);
3751 		if (!gds_bo)
3752 			goto fail;
3753 	} else {
3754 		gds_bo = queue->gds_bo;
3755 	}
3756 
3757 	if (add_gds_oa) {
3758 		assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3759 
3760 		gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
3761 							     4, 1,
3762 							     RADEON_DOMAIN_OA,
3763 							     ring_bo_flags,
3764 							     RADV_BO_PRIORITY_SCRATCH);
3765 		if (!gds_oa_bo)
3766 			goto fail;
3767 	} else {
3768 		gds_oa_bo = queue->gds_oa_bo;
3769 	}
3770 
3771 	if (scratch_bo != queue->scratch_bo ||
3772 	    esgs_ring_bo != queue->esgs_ring_bo ||
3773 	    gsvs_ring_bo != queue->gsvs_ring_bo ||
3774 	    tess_rings_bo != queue->tess_rings_bo ||
3775 	    add_sample_positions) {
3776 		uint32_t size = 0;
3777 		if (gsvs_ring_bo || esgs_ring_bo ||
3778 		    tess_rings_bo || add_sample_positions) {
3779 			size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
3780 			if (add_sample_positions)
3781 				size += 128; /* 64+32+16+8 = 120 bytes */
3782 		}
3783 		else if (scratch_bo)
3784 			size = 8; /* 2 dword */
3785 
3786 		descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
3787 		                                                 size,
3788 		                                                 4096,
3789 		                                                 RADEON_DOMAIN_VRAM,
3790 		                                                 RADEON_FLAG_CPU_ACCESS |
3791 								 RADEON_FLAG_NO_INTERPROCESS_SHARING |
3792 								 RADEON_FLAG_READ_ONLY,
3793 								 RADV_BO_PRIORITY_DESCRIPTOR);
3794 		if (!descriptor_bo)
3795 			goto fail;
3796 	} else
3797 		descriptor_bo = queue->descriptor_bo;
3798 
3799 	if (descriptor_bo != queue->descriptor_bo) {
3800 		uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
3801 		if (!map)
3802 			goto fail;
3803 
3804 		if (scratch_bo) {
3805 			uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
3806 			uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
3807 				         S_008F04_SWIZZLE_ENABLE(1);
3808 			map[0] = scratch_va;
3809 			map[1] = rsrc1;
3810 		}
3811 
3812 		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
3813 			fill_geom_tess_rings(queue, map, add_sample_positions,
3814 					     esgs_ring_size, esgs_ring_bo,
3815 					     gsvs_ring_size, gsvs_ring_bo,
3816 					     tess_factor_ring_size,
3817 					     tess_offchip_ring_offset,
3818 					     tess_offchip_ring_size,
3819 					     tess_rings_bo);
3820 
3821 		queue->device->ws->buffer_unmap(descriptor_bo);
3822 	}
3823 
3824 	for(int i = 0; i < 3; ++i) {
3825 		enum rgp_flush_bits sqtt_flush_bits = 0;
3826 		struct radeon_cmdbuf *cs = NULL;
3827 		cs = queue->device->ws->cs_create(queue->device->ws,
3828 						  queue->queue_family_index ? RING_COMPUTE : RING_GFX);
3829 		if (!cs)
3830 			goto fail;
3831 
3832 		dest_cs[i] = cs;
3833 
3834 		if (scratch_bo)
3835 			radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3836 
3837 		/* Emit initial configuration. */
3838 		switch (queue->queue_family_index) {
3839 		case RADV_QUEUE_GENERAL:
3840 			radv_init_graphics_state(cs, queue);
3841 			break;
3842 		case RADV_QUEUE_COMPUTE:
3843 			radv_init_compute_state(cs, queue);
3844 			break;
3845 		case RADV_QUEUE_TRANSFER:
3846 			break;
3847 		}
3848 
3849 		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
3850 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3851 			radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
3852 
3853 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3854 			radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
3855 		}
3856 
3857 		radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
3858 					gsvs_ring_bo, gsvs_ring_size);
3859 		radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
3860 					   tess_factor_ring_size, tess_rings_bo);
3861 		radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
3862 		radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave,
3863 		                          compute_scratch_waves, compute_scratch_bo);
3864 		radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
3865 		                           scratch_waves, scratch_bo);
3866 		radv_emit_trap_handler(queue, cs, queue->device->tma_bo);
3867 
3868 		if (gds_bo)
3869 			radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
3870 		if (gds_oa_bo)
3871 			radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
3872 
3873 		if (queue->device->trace_bo)
3874 			radv_cs_add_buffer(queue->device->ws, cs, queue->device->trace_bo);
3875 
3876 		if (queue->device->border_color_data.bo)
3877 			radv_cs_add_buffer(queue->device->ws, cs,
3878 					   queue->device->border_color_data.bo);
3879 
3880 		if (i == 0) {
3881 			si_cs_emit_cache_flush(cs,
3882 			                       queue->device->physical_device->rad_info.chip_class,
3883 					       NULL, 0,
3884 			                       queue->queue_family_index == RING_COMPUTE &&
3885 			                         queue->device->physical_device->rad_info.chip_class >= GFX7,
3886 			                       (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
3887 			                       RADV_CMD_FLAG_INV_ICACHE |
3888 			                       RADV_CMD_FLAG_INV_SCACHE |
3889 			                       RADV_CMD_FLAG_INV_VCACHE |
3890 			                       RADV_CMD_FLAG_INV_L2 |
3891 					       RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
3892 		} else if (i == 1) {
3893 			si_cs_emit_cache_flush(cs,
3894 			                       queue->device->physical_device->rad_info.chip_class,
3895 					       NULL, 0,
3896 			                       queue->queue_family_index == RING_COMPUTE &&
3897 			                         queue->device->physical_device->rad_info.chip_class >= GFX7,
3898 			                       RADV_CMD_FLAG_INV_ICACHE |
3899 			                       RADV_CMD_FLAG_INV_SCACHE |
3900 			                       RADV_CMD_FLAG_INV_VCACHE |
3901 			                       RADV_CMD_FLAG_INV_L2 |
3902 					       RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
3903 		}
3904 
3905 		if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
3906 			goto fail;
3907 	}
3908 
3909 	if (queue->initial_full_flush_preamble_cs)
3910 			queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
3911 
3912 	if (queue->initial_preamble_cs)
3913 			queue->device->ws->cs_destroy(queue->initial_preamble_cs);
3914 
3915 	if (queue->continue_preamble_cs)
3916 			queue->device->ws->cs_destroy(queue->continue_preamble_cs);
3917 
3918 	queue->initial_full_flush_preamble_cs = dest_cs[0];
3919 	queue->initial_preamble_cs = dest_cs[1];
3920 	queue->continue_preamble_cs = dest_cs[2];
3921 
3922 	if (scratch_bo != queue->scratch_bo) {
3923 		if (queue->scratch_bo)
3924 			queue->device->ws->buffer_destroy(queue->scratch_bo);
3925 		queue->scratch_bo = scratch_bo;
3926 	}
3927 	queue->scratch_size_per_wave = scratch_size_per_wave;
3928 	queue->scratch_waves = scratch_waves;
3929 
3930 	if (compute_scratch_bo != queue->compute_scratch_bo) {
3931 		if (queue->compute_scratch_bo)
3932 			queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
3933 		queue->compute_scratch_bo = compute_scratch_bo;
3934 	}
3935 	queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
3936 	queue->compute_scratch_waves = compute_scratch_waves;
3937 
3938 	if (esgs_ring_bo != queue->esgs_ring_bo) {
3939 		if (queue->esgs_ring_bo)
3940 			queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
3941 		queue->esgs_ring_bo = esgs_ring_bo;
3942 		queue->esgs_ring_size = esgs_ring_size;
3943 	}
3944 
3945 	if (gsvs_ring_bo != queue->gsvs_ring_bo) {
3946 		if (queue->gsvs_ring_bo)
3947 			queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
3948 		queue->gsvs_ring_bo = gsvs_ring_bo;
3949 		queue->gsvs_ring_size = gsvs_ring_size;
3950 	}
3951 
3952 	if (tess_rings_bo != queue->tess_rings_bo) {
3953 		queue->tess_rings_bo = tess_rings_bo;
3954 		queue->has_tess_rings = true;
3955 	}
3956 
3957 	if (gds_bo != queue->gds_bo) {
3958 		queue->gds_bo = gds_bo;
3959 		queue->has_gds = true;
3960 	}
3961 
3962 	if (gds_oa_bo != queue->gds_oa_bo) {
3963 		queue->gds_oa_bo = gds_oa_bo;
3964 		queue->has_gds_oa = true;
3965 	}
3966 
3967 	if (descriptor_bo != queue->descriptor_bo) {
3968 		if (queue->descriptor_bo)
3969 			queue->device->ws->buffer_destroy(queue->descriptor_bo);
3970 
3971 		queue->descriptor_bo = descriptor_bo;
3972 	}
3973 
3974 	if (add_sample_positions)
3975 		queue->has_sample_positions = true;
3976 
3977 	*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3978 	*initial_preamble_cs = queue->initial_preamble_cs;
3979 	*continue_preamble_cs = queue->continue_preamble_cs;
3980 	if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
3981 			*continue_preamble_cs = NULL;
3982 	return VK_SUCCESS;
3983 fail:
3984 	for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
3985 		if (dest_cs[i])
3986 			queue->device->ws->cs_destroy(dest_cs[i]);
3987 	if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
3988 		queue->device->ws->buffer_destroy(descriptor_bo);
3989 	if (scratch_bo && scratch_bo != queue->scratch_bo)
3990 		queue->device->ws->buffer_destroy(scratch_bo);
3991 	if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
3992 		queue->device->ws->buffer_destroy(compute_scratch_bo);
3993 	if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
3994 		queue->device->ws->buffer_destroy(esgs_ring_bo);
3995 	if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
3996 		queue->device->ws->buffer_destroy(gsvs_ring_bo);
3997 	if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
3998 		queue->device->ws->buffer_destroy(tess_rings_bo);
3999 	if (gds_bo && gds_bo != queue->gds_bo)
4000 		queue->device->ws->buffer_destroy(gds_bo);
4001 	if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4002 		queue->device->ws->buffer_destroy(gds_oa_bo);
4003 
4004 	return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4005 }
4006 
radv_alloc_sem_counts(struct radv_device * device,struct radv_winsys_sem_counts * counts,int num_sems,struct radv_semaphore_part ** sems,const uint64_t * timeline_values,VkFence _fence,bool is_signal)4007 static VkResult radv_alloc_sem_counts(struct radv_device *device,
4008 				      struct radv_winsys_sem_counts *counts,
4009 				      int num_sems,
4010 				      struct radv_semaphore_part **sems,
4011 				      const uint64_t *timeline_values,
4012 				      VkFence _fence,
4013 				      bool is_signal)
4014 {
4015 	int syncobj_idx = 0, non_reset_idx = 0, sem_idx = 0, timeline_idx = 0;
4016 
4017 	if (num_sems == 0 && _fence == VK_NULL_HANDLE)
4018 		return VK_SUCCESS;
4019 
4020 	for (uint32_t i = 0; i < num_sems; i++) {
4021 		switch(sems[i]->kind) {
4022 		case RADV_SEMAPHORE_SYNCOBJ:
4023 			counts->syncobj_count++;
4024 			counts->syncobj_reset_count++;
4025 			break;
4026 		case RADV_SEMAPHORE_WINSYS:
4027 			counts->sem_count++;
4028 			break;
4029 		case RADV_SEMAPHORE_NONE:
4030 			break;
4031 		case RADV_SEMAPHORE_TIMELINE:
4032 			counts->syncobj_count++;
4033 			break;
4034 		case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4035 			counts->timeline_syncobj_count++;
4036 			break;
4037 		}
4038 	}
4039 
4040 	if (_fence != VK_NULL_HANDLE) {
4041 		RADV_FROM_HANDLE(radv_fence, fence, _fence);
4042 
4043 		struct radv_fence_part *part =
4044 			fence->temporary.kind != RADV_FENCE_NONE ?
4045 			&fence->temporary : &fence->permanent;
4046 		if (part->kind == RADV_FENCE_SYNCOBJ)
4047 			counts->syncobj_count++;
4048 	}
4049 
4050 	if (counts->syncobj_count || counts->timeline_syncobj_count) {
4051 		counts->points = (uint64_t *)malloc(
4052 			sizeof(*counts->syncobj) * counts->syncobj_count +
4053 			(sizeof(*counts->syncobj) + sizeof(*counts->points)) * counts->timeline_syncobj_count);
4054 		if (!counts->points)
4055 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4056 		counts->syncobj = (uint32_t*)(counts->points + counts->timeline_syncobj_count);
4057 	}
4058 
4059 	if (counts->sem_count) {
4060 		counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
4061 		if (!counts->sem) {
4062 			free(counts->syncobj);
4063 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4064 		}
4065 	}
4066 
4067 	non_reset_idx = counts->syncobj_reset_count;
4068 
4069 	for (uint32_t i = 0; i < num_sems; i++) {
4070 		switch(sems[i]->kind) {
4071 		case RADV_SEMAPHORE_NONE:
4072 			unreachable("Empty semaphore");
4073 			break;
4074 		case RADV_SEMAPHORE_SYNCOBJ:
4075 			counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
4076 			break;
4077 		case RADV_SEMAPHORE_WINSYS:
4078 			counts->sem[sem_idx++] = sems[i]->ws_sem;
4079 			break;
4080 		case RADV_SEMAPHORE_TIMELINE: {
4081 			pthread_mutex_lock(&sems[i]->timeline.mutex);
4082 			struct radv_timeline_point *point = NULL;
4083 			if (is_signal) {
4084 				point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
4085 			} else {
4086 				point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline, timeline_values[i]);
4087 			}
4088 
4089 			pthread_mutex_unlock(&sems[i]->timeline.mutex);
4090 
4091 			if (point) {
4092 				counts->syncobj[non_reset_idx++] = point->syncobj;
4093 			} else {
4094 				/* Explicitly remove the semaphore so we might not find
4095 				 * a point later post-submit. */
4096 				sems[i] = NULL;
4097 			}
4098 			break;
4099 		}
4100 		case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4101 			counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
4102 			counts->points[timeline_idx] = timeline_values[i];
4103 			++timeline_idx;
4104 			break;
4105 		}
4106 	}
4107 
4108 	if (_fence != VK_NULL_HANDLE) {
4109 		RADV_FROM_HANDLE(radv_fence, fence, _fence);
4110 
4111 		struct radv_fence_part *part =
4112 			fence->temporary.kind != RADV_FENCE_NONE ?
4113 			&fence->temporary : &fence->permanent;
4114 		if (part->kind == RADV_FENCE_SYNCOBJ)
4115 			counts->syncobj[non_reset_idx++] = part->syncobj;
4116 	}
4117 
4118 	assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
4119 	counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
4120 
4121 	return VK_SUCCESS;
4122 }
4123 
4124 static void
radv_free_sem_info(struct radv_winsys_sem_info * sem_info)4125 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
4126 {
4127 	free(sem_info->wait.points);
4128 	free(sem_info->wait.sem);
4129 	free(sem_info->signal.points);
4130 	free(sem_info->signal.sem);
4131 }
4132 
4133 
radv_free_temp_syncobjs(struct radv_device * device,int num_sems,struct radv_semaphore_part * sems)4134 static void radv_free_temp_syncobjs(struct radv_device *device,
4135 				    int num_sems,
4136 				    struct radv_semaphore_part *sems)
4137 {
4138 	for (uint32_t i = 0; i < num_sems; i++) {
4139 		radv_destroy_semaphore_part(device, sems + i);
4140 	}
4141 }
4142 
4143 static VkResult
radv_alloc_sem_info(struct radv_device * device,struct radv_winsys_sem_info * sem_info,int num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,int num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,VkFence fence)4144 radv_alloc_sem_info(struct radv_device *device,
4145 		    struct radv_winsys_sem_info *sem_info,
4146 		    int num_wait_sems,
4147 		    struct radv_semaphore_part **wait_sems,
4148 		    const uint64_t *wait_values,
4149 		    int num_signal_sems,
4150 		    struct radv_semaphore_part **signal_sems,
4151 		    const uint64_t *signal_values,
4152 		    VkFence fence)
4153 {
4154 	VkResult ret;
4155 	memset(sem_info, 0, sizeof(*sem_info));
4156 
4157 	ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values, VK_NULL_HANDLE, false);
4158 	if (ret)
4159 		return ret;
4160 	ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems, signal_values, fence, true);
4161 	if (ret)
4162 		radv_free_sem_info(sem_info);
4163 
4164 	/* caller can override these */
4165 	sem_info->cs_emit_wait = true;
4166 	sem_info->cs_emit_signal = true;
4167 	return ret;
4168 }
4169 
4170 static void
radv_finalize_timelines(struct radv_device * device,uint32_t num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,uint32_t num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,struct list_head * processing_list)4171 radv_finalize_timelines(struct radv_device *device,
4172                         uint32_t num_wait_sems,
4173                         struct radv_semaphore_part **wait_sems,
4174                         const uint64_t *wait_values,
4175                         uint32_t num_signal_sems,
4176                         struct radv_semaphore_part **signal_sems,
4177                         const uint64_t *signal_values,
4178                         struct list_head *processing_list)
4179 {
4180 	for (uint32_t i = 0; i < num_wait_sems; ++i) {
4181 		if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4182 			pthread_mutex_lock(&wait_sems[i]->timeline.mutex);
4183 			struct radv_timeline_point *point =
4184 				radv_timeline_find_point_at_least_locked(device, &wait_sems[i]->timeline, wait_values[i]);
4185 			point->wait_count -= 2;
4186 			pthread_mutex_unlock(&wait_sems[i]->timeline.mutex);
4187 		}
4188 	}
4189 	for (uint32_t i = 0; i < num_signal_sems; ++i) {
4190 		if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4191 			pthread_mutex_lock(&signal_sems[i]->timeline.mutex);
4192 			struct radv_timeline_point *point =
4193 				radv_timeline_find_point_at_least_locked(device, &signal_sems[i]->timeline, signal_values[i]);
4194 			signal_sems[i]->timeline.highest_submitted =
4195 				MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
4196 			point->wait_count -= 2;
4197 			radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
4198 			pthread_mutex_unlock(&signal_sems[i]->timeline.mutex);
4199 		} else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
4200 			signal_sems[i]->timeline_syncobj.max_point =
4201 				MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
4202 		}
4203 	}
4204 }
4205 
4206 static VkResult
radv_sparse_buffer_bind_memory(struct radv_device * device,const VkSparseBufferMemoryBindInfo * bind)4207 radv_sparse_buffer_bind_memory(struct radv_device *device,
4208                                const VkSparseBufferMemoryBindInfo *bind)
4209 {
4210 	RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4211 	VkResult result;
4212 
4213 	for (uint32_t i = 0; i < bind->bindCount; ++i) {
4214 		struct radv_device_memory *mem = NULL;
4215 
4216 		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4217 			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4218 
4219 		result = device->ws->buffer_virtual_bind(buffer->bo,
4220 							 bind->pBinds[i].resourceOffset,
4221 							 bind->pBinds[i].size,
4222 							 mem ? mem->bo : NULL,
4223 							 bind->pBinds[i].memoryOffset);
4224 		if (result != VK_SUCCESS)
4225 			return result;
4226 	}
4227 
4228 	return VK_SUCCESS;
4229 }
4230 
4231 static VkResult
radv_sparse_image_opaque_bind_memory(struct radv_device * device,const VkSparseImageOpaqueMemoryBindInfo * bind)4232 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4233                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
4234 {
4235 	RADV_FROM_HANDLE(radv_image, image, bind->image);
4236 	VkResult result;
4237 
4238 	for (uint32_t i = 0; i < bind->bindCount; ++i) {
4239 		struct radv_device_memory *mem = NULL;
4240 
4241 		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4242 			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4243 
4244 		result = device->ws->buffer_virtual_bind(image->bo,
4245 							 bind->pBinds[i].resourceOffset,
4246 							 bind->pBinds[i].size,
4247 							 mem ? mem->bo : NULL,
4248 							 bind->pBinds[i].memoryOffset);
4249 		if (result != VK_SUCCESS)
4250 			return result;
4251 	}
4252 
4253 	return VK_SUCCESS;
4254 }
4255 
4256 static VkResult
radv_get_preambles(struct radv_queue * queue,const VkCommandBuffer * cmd_buffers,uint32_t cmd_buffer_count,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)4257 radv_get_preambles(struct radv_queue *queue,
4258                    const VkCommandBuffer *cmd_buffers,
4259                    uint32_t cmd_buffer_count,
4260                    struct radeon_cmdbuf **initial_full_flush_preamble_cs,
4261                    struct radeon_cmdbuf **initial_preamble_cs,
4262                    struct radeon_cmdbuf **continue_preamble_cs)
4263 {
4264 	uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
4265 	uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
4266 	uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
4267 	bool tess_rings_needed = false;
4268 	bool gds_needed = false;
4269 	bool gds_oa_needed = false;
4270 	bool sample_positions_needed = false;
4271 
4272 	for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4273 		RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
4274 				 cmd_buffers[j]);
4275 
4276 		scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4277 		waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
4278 		compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave,
4279 		                                     cmd_buffer->compute_scratch_size_per_wave_needed);
4280 		compute_waves_wanted = MAX2(compute_waves_wanted,
4281 		                            cmd_buffer->compute_scratch_waves_wanted);
4282 		esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4283 		gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4284 		tess_rings_needed |= cmd_buffer->tess_rings_needed;
4285 		gds_needed |= cmd_buffer->gds_needed;
4286 		gds_oa_needed |= cmd_buffer->gds_oa_needed;
4287 		sample_positions_needed |= cmd_buffer->sample_positions_needed;
4288 	}
4289 
4290 	return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
4291 	                            compute_scratch_size_per_wave, compute_waves_wanted,
4292 	                            esgs_ring_size, gsvs_ring_size, tess_rings_needed,
4293 	                            gds_needed, gds_oa_needed, sample_positions_needed,
4294 	                            initial_full_flush_preamble_cs,
4295 	                            initial_preamble_cs, continue_preamble_cs);
4296 }
4297 
4298 struct radv_deferred_queue_submission {
4299 	struct radv_queue *queue;
4300 	VkCommandBuffer *cmd_buffers;
4301 	uint32_t cmd_buffer_count;
4302 
4303 	/* Sparse bindings that happen on a queue. */
4304 	VkSparseBufferMemoryBindInfo *buffer_binds;
4305 	uint32_t buffer_bind_count;
4306 	VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4307 	uint32_t image_opaque_bind_count;
4308 
4309 	bool flush_caches;
4310 	VkShaderStageFlags wait_dst_stage_mask;
4311 	struct radv_semaphore_part **wait_semaphores;
4312 	uint32_t wait_semaphore_count;
4313 	struct radv_semaphore_part **signal_semaphores;
4314 	uint32_t signal_semaphore_count;
4315 	VkFence fence;
4316 
4317 	uint64_t *wait_values;
4318 	uint64_t *signal_values;
4319 
4320 	struct radv_semaphore_part *temporary_semaphore_parts;
4321 	uint32_t temporary_semaphore_part_count;
4322 
4323 	struct list_head queue_pending_list;
4324 	uint32_t submission_wait_count;
4325 	struct radv_timeline_waiter *wait_nodes;
4326 
4327 	struct list_head processing_list;
4328 };
4329 
4330 struct radv_queue_submission {
4331 	const VkCommandBuffer *cmd_buffers;
4332 	uint32_t cmd_buffer_count;
4333 
4334 	/* Sparse bindings that happen on a queue. */
4335 	const VkSparseBufferMemoryBindInfo *buffer_binds;
4336 	uint32_t buffer_bind_count;
4337 	const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4338 	uint32_t image_opaque_bind_count;
4339 
4340 	bool flush_caches;
4341 	VkPipelineStageFlags wait_dst_stage_mask;
4342 	const VkSemaphore *wait_semaphores;
4343 	uint32_t wait_semaphore_count;
4344 	const VkSemaphore *signal_semaphores;
4345 	uint32_t signal_semaphore_count;
4346 	VkFence fence;
4347 
4348 	const uint64_t *wait_values;
4349 	uint32_t wait_value_count;
4350 	const uint64_t *signal_values;
4351 	uint32_t signal_value_count;
4352 };
4353 
4354 static VkResult
4355 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4356                               uint32_t decrement,
4357                               struct list_head *processing_list);
4358 
4359 static VkResult
radv_create_deferred_submission(struct radv_queue * queue,const struct radv_queue_submission * submission,struct radv_deferred_queue_submission ** out)4360 radv_create_deferred_submission(struct radv_queue *queue,
4361                                 const struct radv_queue_submission *submission,
4362                                 struct radv_deferred_queue_submission **out)
4363 {
4364 	struct radv_deferred_queue_submission *deferred = NULL;
4365 	size_t size = sizeof(struct radv_deferred_queue_submission);
4366 
4367 	uint32_t temporary_count = 0;
4368 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4369 		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4370 		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
4371 			++temporary_count;
4372 	}
4373 
4374 	size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
4375 	size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
4376 	size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
4377 	size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
4378 	size += temporary_count * sizeof(struct radv_semaphore_part);
4379 	size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
4380 	size += submission->wait_value_count * sizeof(uint64_t);
4381 	size += submission->signal_value_count * sizeof(uint64_t);
4382 	size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
4383 
4384 	deferred = calloc(1, size);
4385 	if (!deferred)
4386 		return VK_ERROR_OUT_OF_HOST_MEMORY;
4387 
4388 	deferred->queue = queue;
4389 
4390 	deferred->cmd_buffers = (void*)(deferred + 1);
4391 	deferred->cmd_buffer_count = submission->cmd_buffer_count;
4392 	if (submission->cmd_buffer_count) {
4393 		memcpy(deferred->cmd_buffers, submission->cmd_buffers,
4394 		       submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
4395 	}
4396 
4397 	deferred->buffer_binds = (void*)(deferred->cmd_buffers + submission->cmd_buffer_count);
4398 	deferred->buffer_bind_count = submission->buffer_bind_count;
4399 	if (submission->buffer_bind_count) {
4400 		memcpy(deferred->buffer_binds, submission->buffer_binds,
4401 		       submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
4402 	}
4403 
4404 	deferred->image_opaque_binds = (void*)(deferred->buffer_binds + submission->buffer_bind_count);
4405 	deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
4406 	if (submission->image_opaque_bind_count) {
4407 		memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
4408 		       submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
4409 	}
4410 
4411 	deferred->flush_caches = submission->flush_caches;
4412 	deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
4413 
4414 	deferred->wait_semaphores = (void*)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
4415 	deferred->wait_semaphore_count = submission->wait_semaphore_count;
4416 
4417 	deferred->signal_semaphores = (void*)(deferred->wait_semaphores + deferred->wait_semaphore_count);
4418 	deferred->signal_semaphore_count = submission->signal_semaphore_count;
4419 
4420 	deferred->fence = submission->fence;
4421 
4422 	deferred->temporary_semaphore_parts = (void*)(deferred->signal_semaphores + deferred->signal_semaphore_count);
4423 	deferred->temporary_semaphore_part_count = temporary_count;
4424 
4425 	uint32_t temporary_idx = 0;
4426 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4427 		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4428 		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4429 			deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
4430 			deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
4431 			semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
4432 			++temporary_idx;
4433 		} else
4434 			deferred->wait_semaphores[i] = &semaphore->permanent;
4435 	}
4436 
4437 	for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
4438 		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
4439 		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4440 			deferred->signal_semaphores[i] = &semaphore->temporary;
4441 		} else {
4442 			deferred->signal_semaphores[i] = &semaphore->permanent;
4443 		}
4444 	}
4445 
4446 	deferred->wait_values = (void*)(deferred->temporary_semaphore_parts + temporary_count);
4447 	if (submission->wait_value_count) {
4448 		memcpy(deferred->wait_values, submission->wait_values, submission->wait_value_count * sizeof(uint64_t));
4449 	}
4450 	deferred->signal_values = deferred->wait_values + submission->wait_value_count;
4451 	if (submission->signal_value_count) {
4452 		memcpy(deferred->signal_values, submission->signal_values, submission->signal_value_count * sizeof(uint64_t));
4453 	}
4454 
4455 	deferred->wait_nodes = (void*)(deferred->signal_values + submission->signal_value_count);
4456 	/* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
4457 	 * ensure the submission is not accidentally triggered early when adding wait timelines. */
4458 	deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
4459 
4460 	*out = deferred;
4461 	return VK_SUCCESS;
4462 }
4463 
4464 static VkResult
radv_queue_enqueue_submission(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4465 radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
4466                               struct list_head *processing_list)
4467 {
4468 	uint32_t wait_cnt = 0;
4469 	struct radv_timeline_waiter *waiter = submission->wait_nodes;
4470 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4471 		if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4472 			pthread_mutex_lock(&submission->wait_semaphores[i]->timeline.mutex);
4473 			if (submission->wait_semaphores[i]->timeline.highest_submitted < submission->wait_values[i]) {
4474 				++wait_cnt;
4475 				waiter->value = submission->wait_values[i];
4476 				waiter->submission = submission;
4477 				list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
4478 				++waiter;
4479 			}
4480 			pthread_mutex_unlock(&submission->wait_semaphores[i]->timeline.mutex);
4481 		}
4482 	}
4483 
4484 	pthread_mutex_lock(&submission->queue->pending_mutex);
4485 
4486 	bool is_first = list_is_empty(&submission->queue->pending_submissions);
4487 	list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
4488 
4489 	pthread_mutex_unlock(&submission->queue->pending_mutex);
4490 
4491 	/* If there is already a submission in the queue, that will decrement the counter by 1 when
4492 	 * submitted, but if the queue was empty, we decrement ourselves as there is no previous
4493 	 * submission. */
4494 	uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
4495 
4496 	/* if decrement is zero, then we don't have a refcounted reference to the
4497 	 * submission anymore, so it is not safe to access the submission. */
4498 	if (!decrement)
4499 		return VK_SUCCESS;
4500 
4501 	return radv_queue_trigger_submission(submission, decrement, processing_list);
4502 }
4503 
4504 static void
radv_queue_submission_update_queue(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4505 radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
4506                                    struct list_head *processing_list)
4507 {
4508 	pthread_mutex_lock(&submission->queue->pending_mutex);
4509 	list_del(&submission->queue_pending_list);
4510 
4511 	/* trigger the next submission in the queue. */
4512 	if (!list_is_empty(&submission->queue->pending_submissions)) {
4513 		struct radv_deferred_queue_submission *next_submission =
4514 			list_first_entry(&submission->queue->pending_submissions,
4515 			                 struct radv_deferred_queue_submission,
4516 			                 queue_pending_list);
4517 		radv_queue_trigger_submission(next_submission, 1, processing_list);
4518 	}
4519 	pthread_mutex_unlock(&submission->queue->pending_mutex);
4520 
4521 	pthread_cond_broadcast(&submission->queue->device->timeline_cond);
4522 }
4523 
4524 static VkResult
radv_queue_submit_deferred(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4525 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
4526                            struct list_head *processing_list)
4527 {
4528 	RADV_FROM_HANDLE(radv_fence, fence, submission->fence);
4529 	struct radv_queue *queue = submission->queue;
4530 	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4531 	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4532 	struct radeon_winsys_fence *base_fence = NULL;
4533 	bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
4534 	bool can_patch = true;
4535 	uint32_t advance;
4536 	struct radv_winsys_sem_info sem_info;
4537 	VkResult result;
4538 	struct radeon_cmdbuf *initial_preamble_cs = NULL;
4539 	struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
4540 	struct radeon_cmdbuf *continue_preamble_cs = NULL;
4541 
4542 	if (fence) {
4543 		/* Under most circumstances, out fences won't be temporary.
4544 		 * However, the spec does allow it for opaque_fd.
4545 		 *
4546 		 * From the Vulkan 1.0.53 spec:
4547 		 *
4548 		 *    "If the import is temporary, the implementation must
4549 		 *    restore the semaphore to its prior permanent state after
4550 		 *    submitting the next semaphore wait operation."
4551 		 */
4552 		struct radv_fence_part *part =
4553 			fence->temporary.kind != RADV_FENCE_NONE ?
4554 			&fence->temporary : &fence->permanent;
4555 		if (part->kind == RADV_FENCE_WINSYS)
4556 			base_fence = part->fence;
4557 	}
4558 
4559 	result = radv_get_preambles(queue, submission->cmd_buffers,
4560 	                            submission->cmd_buffer_count,
4561 	                            &initial_preamble_cs,
4562 	                            &initial_flush_preamble_cs,
4563 	                            &continue_preamble_cs);
4564 	if (result != VK_SUCCESS)
4565 		goto fail;
4566 
4567 	result = radv_alloc_sem_info(queue->device,
4568 				     &sem_info,
4569 				     submission->wait_semaphore_count,
4570 				     submission->wait_semaphores,
4571 				     submission->wait_values,
4572 				     submission->signal_semaphore_count,
4573 				     submission->signal_semaphores,
4574 				     submission->signal_values,
4575 				     submission->fence);
4576 	if (result != VK_SUCCESS)
4577 		goto fail;
4578 
4579 	for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4580 		result = radv_sparse_buffer_bind_memory(queue->device,
4581 							submission->buffer_binds + i);
4582 		if (result != VK_SUCCESS)
4583 			goto fail;
4584 	}
4585 
4586 	for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4587 		result = radv_sparse_image_opaque_bind_memory(queue->device,
4588 							      submission->image_opaque_binds + i);
4589 		if (result != VK_SUCCESS)
4590 			goto fail;
4591 	}
4592 
4593 	if (!submission->cmd_buffer_count) {
4594 		result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
4595 						      &queue->device->empty_cs[queue->queue_family_index],
4596 						      1, NULL, NULL,
4597 						      &sem_info, NULL,
4598 						      false, base_fence);
4599 		if (result != VK_SUCCESS)
4600 			goto fail;
4601 	} else {
4602 		struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
4603 		                                         (submission->cmd_buffer_count));
4604 
4605 		for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
4606 			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
4607 			assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4608 
4609 			cs_array[j] = cmd_buffer->cs;
4610 			if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4611 				can_patch = false;
4612 
4613 			cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4614 		}
4615 
4616 		for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
4617 			struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
4618 			const struct radv_winsys_bo_list *bo_list = NULL;
4619 
4620 			advance = MIN2(max_cs_submission,
4621 			               submission->cmd_buffer_count - j);
4622 
4623 			if (queue->device->trace_bo)
4624 				*queue->device->trace_id_ptr = 0;
4625 
4626 			sem_info.cs_emit_wait = j == 0;
4627 			sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
4628 
4629 			if (unlikely(queue->device->use_global_bo_list)) {
4630 				u_rwlock_rdlock(&queue->device->bo_list.rwlock);
4631 				bo_list = &queue->device->bo_list.list;
4632 			}
4633 
4634 			result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
4635 							      advance, initial_preamble, continue_preamble_cs,
4636 							      &sem_info, bo_list,
4637 							      can_patch, base_fence);
4638 
4639 			if (unlikely(queue->device->use_global_bo_list))
4640 				u_rwlock_rdunlock(&queue->device->bo_list.rwlock);
4641 
4642 			if (result != VK_SUCCESS)
4643 				goto fail;
4644 
4645 			if (queue->device->trace_bo) {
4646 				radv_check_gpu_hangs(queue, cs_array[j]);
4647 			}
4648 
4649 			if (queue->device->tma_bo) {
4650 				radv_check_trap_handler(queue);
4651 			}
4652 		}
4653 
4654 		free(cs_array);
4655 	}
4656 
4657 	radv_free_temp_syncobjs(queue->device,
4658 				submission->temporary_semaphore_part_count,
4659 				submission->temporary_semaphore_parts);
4660 	radv_finalize_timelines(queue->device,
4661 	                        submission->wait_semaphore_count,
4662 	                        submission->wait_semaphores,
4663 	                        submission->wait_values,
4664 	                        submission->signal_semaphore_count,
4665 	                        submission->signal_semaphores,
4666 	                        submission->signal_values,
4667 	                        processing_list);
4668 	/* Has to happen after timeline finalization to make sure the
4669 	 * condition variable is only triggered when timelines and queue have
4670 	 * been updated. */
4671 	radv_queue_submission_update_queue(submission, processing_list);
4672 	radv_free_sem_info(&sem_info);
4673 	free(submission);
4674 	return VK_SUCCESS;
4675 
4676 fail:
4677 	if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4678 		/* When something bad happened during the submission, such as
4679 		 * an out of memory issue, it might be hard to recover from
4680 		 * this inconsistent state. To avoid this sort of problem, we
4681 		 * assume that we are in a really bad situation and return
4682 		 * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4683 		 * to submit the same job again to this device.
4684 		 */
4685 		result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
4686 	}
4687 
4688 	radv_free_temp_syncobjs(queue->device,
4689 				submission->temporary_semaphore_part_count,
4690 				submission->temporary_semaphore_parts);
4691 	free(submission);
4692 	return result;
4693 }
4694 
4695 static VkResult
radv_process_submissions(struct list_head * processing_list)4696 radv_process_submissions(struct list_head *processing_list)
4697 {
4698 	while(!list_is_empty(processing_list)) {
4699 		struct radv_deferred_queue_submission *submission =
4700 			list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
4701 		list_del(&submission->processing_list);
4702 
4703 		VkResult result = radv_queue_submit_deferred(submission, processing_list);
4704 		if (result != VK_SUCCESS)
4705 			return result;
4706 	}
4707 	return VK_SUCCESS;
4708 }
4709 
4710 static VkResult
wait_for_submission_timelines_available(struct radv_deferred_queue_submission * submission,uint64_t timeout)4711 wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
4712                                         uint64_t timeout)
4713 {
4714 	struct radv_device *device = submission->queue->device;
4715 	uint32_t syncobj_count = 0;
4716 	uint32_t syncobj_idx = 0;
4717 
4718 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4719 		if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4720 			continue;
4721 
4722 		if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4723 			continue;
4724 		++syncobj_count;
4725 	}
4726 
4727 	if (!syncobj_count)
4728 		return VK_SUCCESS;
4729 
4730 	uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
4731 	if (!points)
4732 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4733 
4734 	uint32_t *syncobj = (uint32_t*)(points + syncobj_count);
4735 
4736 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4737 		if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4738 			continue;
4739 
4740 		if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4741 			continue;
4742 
4743 		syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
4744 		points[syncobj_idx] = submission->wait_values[i];
4745 		++syncobj_idx;
4746 	}
4747 	bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true, true, timeout);
4748 
4749 	free(points);
4750 	return success ? VK_SUCCESS : VK_TIMEOUT;
4751 }
4752 
radv_queue_submission_thread_run(void * q)4753 static void* radv_queue_submission_thread_run(void *q)
4754 {
4755 	struct radv_queue *queue = q;
4756 
4757 	pthread_mutex_lock(&queue->thread_mutex);
4758 	while (!p_atomic_read(&queue->thread_exit)) {
4759 		struct radv_deferred_queue_submission *submission = queue->thread_submission;
4760 		struct list_head processing_list;
4761 		VkResult result = VK_SUCCESS;
4762 		if (!submission) {
4763 			pthread_cond_wait(&queue->thread_cond, &queue->thread_mutex);
4764 			continue;
4765 		}
4766 		pthread_mutex_unlock(&queue->thread_mutex);
4767 
4768 		/* Wait at most 5 seconds so we have a chance to notice shutdown when
4769 		 * a semaphore never gets signaled. If it takes longer we just retry
4770 		 * the wait next iteration. */
4771 		result = wait_for_submission_timelines_available(submission,
4772 		                                                 radv_get_absolute_timeout(5000000000));
4773 		if (result != VK_SUCCESS) {
4774 			pthread_mutex_lock(&queue->thread_mutex);
4775 			continue;
4776 		}
4777 
4778 		/* The lock isn't held but nobody will add one until we finish
4779 		 * the current submission. */
4780 		p_atomic_set(&queue->thread_submission, NULL);
4781 
4782 		list_inithead(&processing_list);
4783 		list_addtail(&submission->processing_list, &processing_list);
4784 		result = radv_process_submissions(&processing_list);
4785 
4786 		pthread_mutex_lock(&queue->thread_mutex);
4787 	}
4788 	pthread_mutex_unlock(&queue->thread_mutex);
4789 	return NULL;
4790 }
4791 
4792 static VkResult
radv_queue_trigger_submission(struct radv_deferred_queue_submission * submission,uint32_t decrement,struct list_head * processing_list)4793 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4794                               uint32_t decrement,
4795                               struct list_head *processing_list)
4796 {
4797 	struct radv_queue *queue = submission->queue;
4798 	int ret;
4799 	if  (p_atomic_add_return(&submission->submission_wait_count, -decrement))
4800 		return VK_SUCCESS;
4801 
4802 	if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) == VK_SUCCESS) {
4803 		list_addtail(&submission->processing_list, processing_list);
4804 		return VK_SUCCESS;
4805 	}
4806 
4807 	pthread_mutex_lock(&queue->thread_mutex);
4808 
4809 	/* A submission can only be ready for the thread if it doesn't have
4810 	 * any predecessors in the same queue, so there can only be one such
4811 	 * submission at a time. */
4812 	assert(queue->thread_submission == NULL);
4813 
4814 	/* Only start the thread on demand to save resources for the many games
4815 	 * which only use binary semaphores. */
4816 	if (!queue->thread_running) {
4817 		ret  = pthread_create(&queue->submission_thread, NULL,
4818 		                      radv_queue_submission_thread_run, queue);
4819 		if (ret) {
4820 			pthread_mutex_unlock(&queue->thread_mutex);
4821 			return vk_errorf(queue->device->instance,
4822 			                 VK_ERROR_DEVICE_LOST,
4823 			                 "Failed to start submission thread");
4824 		}
4825 		queue->thread_running = true;
4826 	}
4827 
4828 	queue->thread_submission = submission;
4829 	pthread_mutex_unlock(&queue->thread_mutex);
4830 
4831 	pthread_cond_signal(&queue->thread_cond);
4832 	return VK_SUCCESS;
4833 }
4834 
radv_queue_submit(struct radv_queue * queue,const struct radv_queue_submission * submission)4835 static VkResult radv_queue_submit(struct radv_queue *queue,
4836                                   const struct radv_queue_submission *submission)
4837 {
4838 	struct radv_deferred_queue_submission *deferred = NULL;
4839 
4840 	VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
4841 	if (result != VK_SUCCESS)
4842 		return result;
4843 
4844 	struct list_head processing_list;
4845 	list_inithead(&processing_list);
4846 
4847 	result = radv_queue_enqueue_submission(deferred, &processing_list);
4848 	if (result != VK_SUCCESS) {
4849 		/* If anything is in the list we leak. */
4850 		assert(list_is_empty(&processing_list));
4851 		return result;
4852 	}
4853 	return radv_process_submissions(&processing_list);
4854 }
4855 
4856 bool
radv_queue_internal_submit(struct radv_queue * queue,struct radeon_cmdbuf * cs)4857 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
4858 {
4859 	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4860 	struct radv_winsys_sem_info sem_info;
4861 	VkResult result;
4862 
4863 	result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
4864 				     0, NULL, VK_NULL_HANDLE);
4865 	if (result != VK_SUCCESS)
4866 		return false;
4867 
4868 	result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1,
4869 					      NULL, NULL, &sem_info, NULL,
4870 					      false, NULL);
4871 	radv_free_sem_info(&sem_info);
4872 	if (result != VK_SUCCESS)
4873 		return false;
4874 
4875 	return true;
4876 
4877 }
4878 
4879 /* Signals fence as soon as all the work currently put on queue is done. */
radv_signal_fence(struct radv_queue * queue,VkFence fence)4880 static VkResult radv_signal_fence(struct radv_queue *queue,
4881                               VkFence fence)
4882 {
4883 	return radv_queue_submit(queue, &(struct radv_queue_submission) {
4884 			.fence = fence
4885 		});
4886 }
4887 
radv_submit_has_effects(const VkSubmitInfo * info)4888 static bool radv_submit_has_effects(const VkSubmitInfo *info)
4889 {
4890 	return info->commandBufferCount ||
4891 	       info->waitSemaphoreCount ||
4892 	       info->signalSemaphoreCount;
4893 }
4894 
radv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)4895 VkResult radv_QueueSubmit(
4896 	VkQueue                                     _queue,
4897 	uint32_t                                    submitCount,
4898 	const VkSubmitInfo*                         pSubmits,
4899 	VkFence                                     fence)
4900 {
4901 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
4902 	VkResult result;
4903 	uint32_t fence_idx = 0;
4904 	bool flushed_caches = false;
4905 
4906 	if (radv_device_is_lost(queue->device))
4907 		return VK_ERROR_DEVICE_LOST;
4908 
4909 	if (fence != VK_NULL_HANDLE) {
4910 		for (uint32_t i = 0; i < submitCount; ++i)
4911 			if (radv_submit_has_effects(pSubmits + i))
4912 				fence_idx = i;
4913 	} else
4914 		fence_idx = UINT32_MAX;
4915 
4916 	for (uint32_t i = 0; i < submitCount; i++) {
4917 		if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
4918 			continue;
4919 
4920 		VkPipelineStageFlags wait_dst_stage_mask = 0;
4921 		for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
4922 			wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
4923 		}
4924 
4925 		const VkTimelineSemaphoreSubmitInfo *timeline_info =
4926 			vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
4927 
4928 		result = radv_queue_submit(queue, &(struct radv_queue_submission) {
4929 				.cmd_buffers = pSubmits[i].pCommandBuffers,
4930 				.cmd_buffer_count = pSubmits[i].commandBufferCount,
4931 				.wait_dst_stage_mask = wait_dst_stage_mask,
4932 				.flush_caches = !flushed_caches,
4933 				.wait_semaphores = pSubmits[i].pWaitSemaphores,
4934 				.wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
4935 				.signal_semaphores = pSubmits[i].pSignalSemaphores,
4936 				.signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
4937 				.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
4938 				.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
4939 				.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
4940 				.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
4941 				.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
4942 			});
4943 		if (result != VK_SUCCESS)
4944 			return result;
4945 
4946 		flushed_caches  = true;
4947 	}
4948 
4949 	if (fence != VK_NULL_HANDLE && !submitCount) {
4950 		result = radv_signal_fence(queue, fence);
4951 		if (result != VK_SUCCESS)
4952 			return result;
4953 	}
4954 
4955 	return VK_SUCCESS;
4956 }
4957 
4958 static const char *
radv_get_queue_family_name(struct radv_queue * queue)4959 radv_get_queue_family_name(struct radv_queue *queue)
4960 {
4961 	switch (queue->queue_family_index) {
4962 	case RADV_QUEUE_GENERAL:
4963 		return "graphics";
4964 	case RADV_QUEUE_COMPUTE:
4965 		return "compute";
4966 	case RADV_QUEUE_TRANSFER:
4967 		return "transfer";
4968 	default:
4969 		unreachable("Unknown queue family");
4970 	}
4971 }
4972 
radv_QueueWaitIdle(VkQueue _queue)4973 VkResult radv_QueueWaitIdle(
4974 	VkQueue                                     _queue)
4975 {
4976 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
4977 
4978 	if (radv_device_is_lost(queue->device))
4979 		return VK_ERROR_DEVICE_LOST;
4980 
4981 	pthread_mutex_lock(&queue->pending_mutex);
4982 	while (!list_is_empty(&queue->pending_submissions)) {
4983 		pthread_cond_wait(&queue->device->timeline_cond, &queue->pending_mutex);
4984 	}
4985 	pthread_mutex_unlock(&queue->pending_mutex);
4986 
4987 	if (!queue->device->ws->ctx_wait_idle(queue->hw_ctx,
4988 					      radv_queue_family_to_ring(queue->queue_family_index),
4989 					      queue->queue_idx)) {
4990 		return radv_device_set_lost(queue->device,
4991 					    "Failed to wait for a '%s' queue "
4992 					    "to be idle. GPU hang ?",
4993 					    radv_get_queue_family_name(queue));
4994 	}
4995 
4996 	return VK_SUCCESS;
4997 }
4998 
radv_DeviceWaitIdle(VkDevice _device)4999 VkResult radv_DeviceWaitIdle(
5000 	VkDevice                                    _device)
5001 {
5002 	RADV_FROM_HANDLE(radv_device, device, _device);
5003 
5004 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
5005 		for (unsigned q = 0; q < device->queue_count[i]; q++) {
5006 			VkResult result =
5007 				radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
5008 
5009 			if (result != VK_SUCCESS)
5010 				return result;
5011 		}
5012 	}
5013 	return VK_SUCCESS;
5014 }
5015 
radv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)5016 VkResult radv_EnumerateInstanceExtensionProperties(
5017     const char*                                 pLayerName,
5018     uint32_t*                                   pPropertyCount,
5019     VkExtensionProperties*                      pProperties)
5020 {
5021 	VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties,
5022 			       pPropertyCount);
5023 
5024 	for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
5025 		if (radv_instance_extensions_supported.extensions[i]) {
5026 			vk_outarray_append_typed(VkExtensionProperties, &out, prop) {
5027 				*prop = radv_instance_extensions[i];
5028 			}
5029 		}
5030 	}
5031 
5032 	return vk_outarray_status(&out);
5033 }
5034 
radv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice,const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)5035 VkResult radv_EnumerateDeviceExtensionProperties(
5036     VkPhysicalDevice                            physicalDevice,
5037     const char*                                 pLayerName,
5038     uint32_t*                                   pPropertyCount,
5039     VkExtensionProperties*                      pProperties)
5040 {
5041 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
5042 	VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties,
5043 			       pPropertyCount);
5044 
5045 	for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
5046 		if (device->supported_extensions.extensions[i]) {
5047 			vk_outarray_append_typed(VkExtensionProperties, &out, prop) {
5048 				*prop = radv_device_extensions[i];
5049 			}
5050 		}
5051 	}
5052 
5053 	return vk_outarray_status(&out);
5054 }
5055 
radv_GetInstanceProcAddr(VkInstance _instance,const char * pName)5056 PFN_vkVoidFunction radv_GetInstanceProcAddr(
5057 	VkInstance                                  _instance,
5058 	const char*                                 pName)
5059 {
5060 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
5061 
5062 	/* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5063 	 * when we have to return valid function pointers, NULL, or it's left
5064 	 * undefined.  See the table for exact details.
5065 	 */
5066 	if (pName == NULL)
5067 		return NULL;
5068 
5069 #define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
5070 	if (strcmp(pName, "vk" #entrypoint) == 0) \
5071 		return (PFN_vkVoidFunction)radv_##entrypoint
5072 
5073 	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5074 	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5075 	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5076 	LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5077 
5078 	/* GetInstanceProcAddr() can also be called with a NULL instance.
5079 	 * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5080 	 */
5081 	LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5082 
5083 #undef LOOKUP_RADV_ENTRYPOINT
5084 
5085 	if (instance == NULL)
5086 		return NULL;
5087 
5088 	int idx = radv_get_instance_entrypoint_index(pName);
5089 	if (idx >= 0)
5090 		return instance->dispatch.entrypoints[idx];
5091 
5092 	idx = radv_get_physical_device_entrypoint_index(pName);
5093 	if (idx >= 0)
5094 		return instance->physical_device_dispatch.entrypoints[idx];
5095 
5096 	idx = radv_get_device_entrypoint_index(pName);
5097 	if (idx >= 0)
5098 		return instance->device_dispatch.entrypoints[idx];
5099 
5100 	return NULL;
5101 }
5102 
5103 /* The loader wants us to expose a second GetInstanceProcAddr function
5104  * to work around certain LD_PRELOAD issues seen in apps.
5105  */
5106 PUBLIC
5107 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
5108 	VkInstance                                  instance,
5109 	const char*                                 pName);
5110 
5111 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)5112 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
5113 	VkInstance                                  instance,
5114 	const char*                                 pName)
5115 {
5116 	return radv_GetInstanceProcAddr(instance, pName);
5117 }
5118 
5119 PUBLIC
5120 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
5121 	VkInstance                                  _instance,
5122 	const char*                                 pName);
5123 
5124 PUBLIC
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)5125 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
5126 	VkInstance                                  _instance,
5127 	const char*                                 pName)
5128 {
5129 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
5130 
5131 	if (!pName || !instance)
5132 		return NULL;
5133 
5134 	int idx = radv_get_physical_device_entrypoint_index(pName);
5135 	if (idx < 0)
5136 		return NULL;
5137 
5138 	return instance->physical_device_dispatch.entrypoints[idx];
5139 }
5140 
radv_GetDeviceProcAddr(VkDevice _device,const char * pName)5141 PFN_vkVoidFunction radv_GetDeviceProcAddr(
5142 	VkDevice                                    _device,
5143 	const char*                                 pName)
5144 {
5145 	RADV_FROM_HANDLE(radv_device, device, _device);
5146 
5147 	if (!device || !pName)
5148 		return NULL;
5149 
5150 	int idx = radv_get_device_entrypoint_index(pName);
5151 	if (idx < 0)
5152 		return NULL;
5153 
5154 	return device->dispatch.entrypoints[idx];
5155 }
5156 
radv_get_memory_fd(struct radv_device * device,struct radv_device_memory * memory,int * pFD)5157 bool radv_get_memory_fd(struct radv_device *device,
5158 			struct radv_device_memory *memory,
5159 			int *pFD)
5160 {
5161 	struct radeon_bo_metadata metadata;
5162 
5163 	if (memory->image && memory->image->tiling != VK_IMAGE_TILING_LINEAR) {
5164 		radv_init_metadata(device, memory->image, &metadata);
5165 		device->ws->buffer_set_metadata(memory->bo, &metadata);
5166 	}
5167 
5168 	return device->ws->buffer_get_fd(device->ws, memory->bo,
5169 					 pFD);
5170 }
5171 
5172 
5173 void
radv_free_memory(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_device_memory * mem)5174 radv_free_memory(struct radv_device *device,
5175 		 const VkAllocationCallbacks* pAllocator,
5176 		 struct radv_device_memory *mem)
5177 {
5178 	if (mem == NULL)
5179 		return;
5180 
5181 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5182 	if (mem->android_hardware_buffer)
5183 		AHardwareBuffer_release(mem->android_hardware_buffer);
5184 #endif
5185 
5186 	if (mem->bo) {
5187 		if (device->overallocation_disallowed) {
5188 			mtx_lock(&device->overallocation_mutex);
5189 			device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5190 			mtx_unlock(&device->overallocation_mutex);
5191 		}
5192 
5193 		radv_bo_list_remove(device, mem->bo);
5194 		device->ws->buffer_destroy(mem->bo);
5195 		mem->bo = NULL;
5196 	}
5197 
5198 	vk_object_base_finish(&mem->base);
5199 	vk_free2(&device->vk.alloc, pAllocator, mem);
5200 }
5201 
radv_alloc_memory(struct radv_device * device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5202 static VkResult radv_alloc_memory(struct radv_device *device,
5203 				  const VkMemoryAllocateInfo*     pAllocateInfo,
5204 				  const VkAllocationCallbacks*    pAllocator,
5205 				  VkDeviceMemory*                 pMem)
5206 {
5207 	struct radv_device_memory *mem;
5208 	VkResult result;
5209 	enum radeon_bo_domain domain;
5210 	uint32_t flags = 0;
5211 
5212 	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5213 
5214 	const VkImportMemoryFdInfoKHR *import_info =
5215 		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5216 	const VkMemoryDedicatedAllocateInfo *dedicate_info =
5217 		vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5218 	const VkExportMemoryAllocateInfo *export_info =
5219 		vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5220 	const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5221 		vk_find_struct_const(pAllocateInfo->pNext,
5222 		                     IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5223 	const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5224 		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5225 
5226 	const struct wsi_memory_allocate_info *wsi_info =
5227 		vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5228 
5229 	if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5230 	    !(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5231 		/* Apparently, this is allowed */
5232 		*pMem = VK_NULL_HANDLE;
5233 		return VK_SUCCESS;
5234 	}
5235 
5236 	mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
5237 			  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5238 	if (mem == NULL)
5239 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5240 
5241 	vk_object_base_init(&device->vk, &mem->base,
5242 			    VK_OBJECT_TYPE_DEVICE_MEMORY);
5243 
5244 	if (wsi_info && wsi_info->implicit_sync)
5245 		flags |= RADEON_FLAG_IMPLICIT_SYNC;
5246 
5247 	if (dedicate_info) {
5248 		mem->image = radv_image_from_handle(dedicate_info->image);
5249 		mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5250 	} else {
5251 		mem->image = NULL;
5252 		mem->buffer = NULL;
5253 	}
5254 
5255 	float priority_float = 0.5;
5256 	const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5257 		vk_find_struct_const(pAllocateInfo->pNext,
5258 				     MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5259 	if (priority_ext)
5260 		priority_float = priority_ext->priority;
5261 
5262 	unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5263 	                         (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5264 
5265 	mem->user_ptr = NULL;
5266 	mem->bo = NULL;
5267 
5268 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5269 	mem->android_hardware_buffer = NULL;
5270 #endif
5271 
5272 	if (ahb_import_info) {
5273 		result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5274 		if (result != VK_SUCCESS)
5275 			goto fail;
5276 	} else if(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5277 		result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5278 		if (result != VK_SUCCESS)
5279 			goto fail;
5280 	} else if (import_info) {
5281 		assert(import_info->handleType ==
5282 		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5283 		       import_info->handleType ==
5284 		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5285 		mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
5286 						     priority, NULL);
5287 		if (!mem->bo) {
5288 			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5289 			goto fail;
5290 		} else {
5291 			close(import_info->fd);
5292 		}
5293 
5294 		if (mem->image && mem->image->plane_count == 1 &&
5295 		    !vk_format_is_depth_or_stencil(mem->image->vk_format) &&
5296 		    mem->image->info.samples == 1) {
5297 			struct radeon_bo_metadata metadata;
5298 			device->ws->buffer_get_metadata(mem->bo, &metadata);
5299 
5300 			struct radv_image_create_info create_info = {
5301 				.no_metadata_planes = true,
5302 				.bo_metadata = &metadata
5303 			};
5304 
5305 			/* This gives a basic ability to import radeonsi images
5306 			 * that don't have DCC. This is not guaranteed by any
5307 			 * spec and can be removed after we support modifiers. */
5308 			result = radv_image_create_layout(device, create_info, mem->image);
5309 			if (result != VK_SUCCESS) {
5310 				device->ws->buffer_destroy(mem->bo);
5311 				goto fail;
5312 			}
5313 		}
5314 	} else if (host_ptr_info) {
5315 		assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5316 		mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5317 		                                      pAllocateInfo->allocationSize,
5318 		                                      priority);
5319 		if (!mem->bo) {
5320 			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5321 			goto fail;
5322 		} else {
5323 			mem->user_ptr = host_ptr_info->pHostPointer;
5324 		}
5325 	} else {
5326 		uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5327 		uint32_t heap_index;
5328 
5329 		heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
5330 		domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5331 		flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5332 
5333 		if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
5334 			flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5335 			if (device->use_global_bo_list) {
5336 				flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5337 			}
5338 		}
5339 
5340 		if (device->overallocation_disallowed) {
5341 			uint64_t total_size =
5342 				device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5343 
5344 			mtx_lock(&device->overallocation_mutex);
5345 			if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5346 				mtx_unlock(&device->overallocation_mutex);
5347 				result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5348 				goto fail;
5349 			}
5350 			device->allocated_memory_size[heap_index] += alloc_size;
5351 			mtx_unlock(&device->overallocation_mutex);
5352 		}
5353 
5354 		mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
5355 		                                    domain, flags, priority);
5356 
5357 		if (!mem->bo) {
5358 			if (device->overallocation_disallowed) {
5359 				mtx_lock(&device->overallocation_mutex);
5360 				device->allocated_memory_size[heap_index] -= alloc_size;
5361 				mtx_unlock(&device->overallocation_mutex);
5362 			}
5363 			result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5364 			goto fail;
5365 		}
5366 
5367 		mem->heap_index = heap_index;
5368 		mem->alloc_size = alloc_size;
5369 	}
5370 
5371 	if (!wsi_info) {
5372 		result = radv_bo_list_add(device, mem->bo);
5373 		if (result != VK_SUCCESS)
5374 			goto fail;
5375 	}
5376 
5377 	*pMem = radv_device_memory_to_handle(mem);
5378 
5379 	return VK_SUCCESS;
5380 
5381 fail:
5382 	radv_free_memory(device, pAllocator,mem);
5383 
5384 	return result;
5385 }
5386 
radv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5387 VkResult radv_AllocateMemory(
5388 	VkDevice                                    _device,
5389 	const VkMemoryAllocateInfo*                 pAllocateInfo,
5390 	const VkAllocationCallbacks*                pAllocator,
5391 	VkDeviceMemory*                             pMem)
5392 {
5393 	RADV_FROM_HANDLE(radv_device, device, _device);
5394 	return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5395 }
5396 
radv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)5397 void radv_FreeMemory(
5398 	VkDevice                                    _device,
5399 	VkDeviceMemory                              _mem,
5400 	const VkAllocationCallbacks*                pAllocator)
5401 {
5402 	RADV_FROM_HANDLE(radv_device, device, _device);
5403 	RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5404 
5405 	radv_free_memory(device, pAllocator, mem);
5406 }
5407 
radv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)5408 VkResult radv_MapMemory(
5409 	VkDevice                                    _device,
5410 	VkDeviceMemory                              _memory,
5411 	VkDeviceSize                                offset,
5412 	VkDeviceSize                                size,
5413 	VkMemoryMapFlags                            flags,
5414 	void**                                      ppData)
5415 {
5416 	RADV_FROM_HANDLE(radv_device, device, _device);
5417 	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5418 
5419 	if (mem == NULL) {
5420 		*ppData = NULL;
5421 		return VK_SUCCESS;
5422 	}
5423 
5424 	if (mem->user_ptr)
5425 		*ppData = mem->user_ptr;
5426 	else
5427 		*ppData = device->ws->buffer_map(mem->bo);
5428 
5429 	if (*ppData) {
5430 		*ppData += offset;
5431 		return VK_SUCCESS;
5432 	}
5433 
5434 	return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
5435 }
5436 
radv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)5437 void radv_UnmapMemory(
5438 	VkDevice                                    _device,
5439 	VkDeviceMemory                              _memory)
5440 {
5441 	RADV_FROM_HANDLE(radv_device, device, _device);
5442 	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5443 
5444 	if (mem == NULL)
5445 		return;
5446 
5447 	if (mem->user_ptr == NULL)
5448 		device->ws->buffer_unmap(mem->bo);
5449 }
5450 
radv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5451 VkResult radv_FlushMappedMemoryRanges(
5452 	VkDevice                                    _device,
5453 	uint32_t                                    memoryRangeCount,
5454 	const VkMappedMemoryRange*                  pMemoryRanges)
5455 {
5456 	return VK_SUCCESS;
5457 }
5458 
radv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5459 VkResult radv_InvalidateMappedMemoryRanges(
5460 	VkDevice                                    _device,
5461 	uint32_t                                    memoryRangeCount,
5462 	const VkMappedMemoryRange*                  pMemoryRanges)
5463 {
5464 	return VK_SUCCESS;
5465 }
5466 
radv_GetBufferMemoryRequirements(VkDevice _device,VkBuffer _buffer,VkMemoryRequirements * pMemoryRequirements)5467 void radv_GetBufferMemoryRequirements(
5468 	VkDevice                                    _device,
5469 	VkBuffer                                    _buffer,
5470 	VkMemoryRequirements*                       pMemoryRequirements)
5471 {
5472 	RADV_FROM_HANDLE(radv_device, device, _device);
5473 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
5474 
5475 	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5476 
5477 	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5478 		pMemoryRequirements->alignment = 4096;
5479 	else
5480 		pMemoryRequirements->alignment = 16;
5481 
5482 	pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
5483 }
5484 
radv_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5485 void radv_GetBufferMemoryRequirements2(
5486 	VkDevice                                     device,
5487 	const VkBufferMemoryRequirementsInfo2       *pInfo,
5488 	VkMemoryRequirements2                       *pMemoryRequirements)
5489 {
5490 	radv_GetBufferMemoryRequirements(device, pInfo->buffer,
5491                                         &pMemoryRequirements->memoryRequirements);
5492 	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5493 		switch (ext->sType) {
5494 		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5495 			VkMemoryDedicatedRequirements *req =
5496 			               (VkMemoryDedicatedRequirements *) ext;
5497 			req->requiresDedicatedAllocation = false;
5498 			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5499 			break;
5500 		}
5501 		default:
5502 			break;
5503 		}
5504 	}
5505 }
5506 
radv_GetImageMemoryRequirements(VkDevice _device,VkImage _image,VkMemoryRequirements * pMemoryRequirements)5507 void radv_GetImageMemoryRequirements(
5508 	VkDevice                                    _device,
5509 	VkImage                                     _image,
5510 	VkMemoryRequirements*                       pMemoryRequirements)
5511 {
5512 	RADV_FROM_HANDLE(radv_device, device, _device);
5513 	RADV_FROM_HANDLE(radv_image, image, _image);
5514 
5515 	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5516 
5517 	pMemoryRequirements->size = image->size;
5518 	pMemoryRequirements->alignment = image->alignment;
5519 }
5520 
radv_GetImageMemoryRequirements2(VkDevice device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5521 void radv_GetImageMemoryRequirements2(
5522 	VkDevice                                    device,
5523 	const VkImageMemoryRequirementsInfo2       *pInfo,
5524 	VkMemoryRequirements2                      *pMemoryRequirements)
5525 {
5526 	radv_GetImageMemoryRequirements(device, pInfo->image,
5527                                         &pMemoryRequirements->memoryRequirements);
5528 
5529 	RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5530 
5531 	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5532 		switch (ext->sType) {
5533 		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5534 			VkMemoryDedicatedRequirements *req =
5535 			               (VkMemoryDedicatedRequirements *) ext;
5536 			req->requiresDedicatedAllocation = image->shareable &&
5537 			                                   image->tiling != VK_IMAGE_TILING_LINEAR;
5538 			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5539 			break;
5540 		}
5541 		default:
5542 			break;
5543 		}
5544 	}
5545 }
5546 
radv_GetImageSparseMemoryRequirements(VkDevice device,VkImage image,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements * pSparseMemoryRequirements)5547 void radv_GetImageSparseMemoryRequirements(
5548 	VkDevice                                    device,
5549 	VkImage                                     image,
5550 	uint32_t*                                   pSparseMemoryRequirementCount,
5551 	VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
5552 {
5553 	stub();
5554 }
5555 
radv_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)5556 void radv_GetImageSparseMemoryRequirements2(
5557 	VkDevice                                    device,
5558 	const VkImageSparseMemoryRequirementsInfo2 *pInfo,
5559 	uint32_t*                                   pSparseMemoryRequirementCount,
5560 	VkSparseImageMemoryRequirements2           *pSparseMemoryRequirements)
5561 {
5562 	stub();
5563 }
5564 
radv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)5565 void radv_GetDeviceMemoryCommitment(
5566 	VkDevice                                    device,
5567 	VkDeviceMemory                              memory,
5568 	VkDeviceSize*                               pCommittedMemoryInBytes)
5569 {
5570 	*pCommittedMemoryInBytes = 0;
5571 }
5572 
radv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)5573 VkResult radv_BindBufferMemory2(VkDevice device,
5574                                 uint32_t bindInfoCount,
5575                                 const VkBindBufferMemoryInfo *pBindInfos)
5576 {
5577 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
5578 		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5579 		RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5580 
5581 		if (mem) {
5582 			buffer->bo = mem->bo;
5583 			buffer->offset = pBindInfos[i].memoryOffset;
5584 		} else {
5585 			buffer->bo = NULL;
5586 		}
5587 	}
5588 	return VK_SUCCESS;
5589 }
5590 
radv_BindBufferMemory(VkDevice device,VkBuffer buffer,VkDeviceMemory memory,VkDeviceSize memoryOffset)5591 VkResult radv_BindBufferMemory(
5592 	VkDevice                                    device,
5593 	VkBuffer                                    buffer,
5594 	VkDeviceMemory                              memory,
5595 	VkDeviceSize                                memoryOffset)
5596 {
5597 	const VkBindBufferMemoryInfo info = {
5598 		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5599 		.buffer = buffer,
5600 		.memory = memory,
5601 		.memoryOffset = memoryOffset
5602 	};
5603 
5604 	return radv_BindBufferMemory2(device, 1, &info);
5605 }
5606 
radv_BindImageMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)5607 VkResult radv_BindImageMemory2(VkDevice device,
5608                                uint32_t bindInfoCount,
5609                                const VkBindImageMemoryInfo *pBindInfos)
5610 {
5611 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
5612 		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5613 		RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5614 
5615 		if (mem) {
5616 			image->bo = mem->bo;
5617 			image->offset = pBindInfos[i].memoryOffset;
5618 		} else {
5619 			image->bo = NULL;
5620 			image->offset = 0;
5621 		}
5622 	}
5623 	return VK_SUCCESS;
5624 }
5625 
5626 
radv_BindImageMemory(VkDevice device,VkImage image,VkDeviceMemory memory,VkDeviceSize memoryOffset)5627 VkResult radv_BindImageMemory(
5628 	VkDevice                                    device,
5629 	VkImage                                     image,
5630 	VkDeviceMemory                              memory,
5631 	VkDeviceSize                                memoryOffset)
5632 {
5633 	const VkBindImageMemoryInfo info = {
5634 		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5635 		.image = image,
5636 		.memory = memory,
5637 		.memoryOffset = memoryOffset
5638 	};
5639 
5640 	return radv_BindImageMemory2(device, 1, &info);
5641 }
5642 
radv_sparse_bind_has_effects(const VkBindSparseInfo * info)5643 static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
5644 {
5645 	return info->bufferBindCount ||
5646 	       info->imageOpaqueBindCount ||
5647 	       info->imageBindCount ||
5648 	       info->waitSemaphoreCount ||
5649 	       info->signalSemaphoreCount;
5650 }
5651 
radv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)5652  VkResult radv_QueueBindSparse(
5653 	VkQueue                                     _queue,
5654 	uint32_t                                    bindInfoCount,
5655 	const VkBindSparseInfo*                     pBindInfo,
5656 	VkFence                                     fence)
5657 {
5658 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
5659 	VkResult result;
5660 	uint32_t fence_idx = 0;
5661 
5662 	if (radv_device_is_lost(queue->device))
5663 		return VK_ERROR_DEVICE_LOST;
5664 
5665 	if (fence != VK_NULL_HANDLE) {
5666 		for (uint32_t i = 0; i < bindInfoCount; ++i)
5667 			if (radv_sparse_bind_has_effects(pBindInfo + i))
5668 				fence_idx = i;
5669 	} else
5670 		fence_idx = UINT32_MAX;
5671 
5672 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
5673 		if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
5674 			continue;
5675 
5676 		const VkTimelineSemaphoreSubmitInfo *timeline_info =
5677 			vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5678 
5679 		VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
5680 				.buffer_binds = pBindInfo[i].pBufferBinds,
5681 				.buffer_bind_count = pBindInfo[i].bufferBindCount,
5682 				.image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
5683 				.image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
5684 				.wait_semaphores = pBindInfo[i].pWaitSemaphores,
5685 				.wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
5686 				.signal_semaphores = pBindInfo[i].pSignalSemaphores,
5687 				.signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
5688 				.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5689 				.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5690 				.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
5691 				.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5692 				.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
5693 			});
5694 
5695 		if (result != VK_SUCCESS)
5696 			return result;
5697 	}
5698 
5699 	if (fence != VK_NULL_HANDLE && !bindInfoCount) {
5700 		result = radv_signal_fence(queue, fence);
5701 		if (result != VK_SUCCESS)
5702 			return result;
5703 	}
5704 
5705 	return VK_SUCCESS;
5706 }
5707 
5708 static void
radv_destroy_fence_part(struct radv_device * device,struct radv_fence_part * part)5709 radv_destroy_fence_part(struct radv_device *device,
5710 			struct radv_fence_part *part)
5711 {
5712 	switch (part->kind) {
5713 	case RADV_FENCE_NONE:
5714 		break;
5715 	case RADV_FENCE_WINSYS:
5716 		device->ws->destroy_fence(part->fence);
5717 		break;
5718 	case RADV_FENCE_SYNCOBJ:
5719 		device->ws->destroy_syncobj(device->ws, part->syncobj);
5720 		break;
5721 	default:
5722 		unreachable("Invalid fence type");
5723 	}
5724 
5725 	part->kind = RADV_FENCE_NONE;
5726 }
5727 
5728 static void
radv_destroy_fence(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_fence * fence)5729 radv_destroy_fence(struct radv_device *device,
5730 		   const VkAllocationCallbacks *pAllocator,
5731 		   struct radv_fence *fence)
5732 {
5733 	radv_destroy_fence_part(device, &fence->temporary);
5734 	radv_destroy_fence_part(device, &fence->permanent);
5735 
5736 	vk_object_base_finish(&fence->base);
5737 	vk_free2(&device->vk.alloc, pAllocator, fence);
5738 }
5739 
radv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)5740 VkResult radv_CreateFence(
5741 	VkDevice                                    _device,
5742 	const VkFenceCreateInfo*                    pCreateInfo,
5743 	const VkAllocationCallbacks*                pAllocator,
5744 	VkFence*                                    pFence)
5745 {
5746 	RADV_FROM_HANDLE(radv_device, device, _device);
5747 	const VkExportFenceCreateInfo *export =
5748 		vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
5749 	VkExternalFenceHandleTypeFlags handleTypes =
5750 		export ? export->handleTypes : 0;
5751 	struct radv_fence *fence;
5752 
5753 	fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
5754 			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5755 	if (!fence)
5756 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5757 
5758 	vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
5759 
5760 	if (device->always_use_syncobj || handleTypes) {
5761 		fence->permanent.kind = RADV_FENCE_SYNCOBJ;
5762 
5763 		bool create_signaled = false;
5764 		if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5765 			create_signaled = true;
5766 
5767 		int ret = device->ws->create_syncobj(device->ws, create_signaled,
5768 						     &fence->permanent.syncobj);
5769 		if (ret) {
5770 			radv_destroy_fence(device, pAllocator, fence);
5771 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5772 		}
5773 	} else {
5774 		fence->permanent.kind = RADV_FENCE_WINSYS;
5775 
5776 		fence->permanent.fence = device->ws->create_fence();
5777 		if (!fence->permanent.fence) {
5778 			vk_free2(&device->vk.alloc, pAllocator, fence);
5779 			radv_destroy_fence(device, pAllocator, fence);
5780 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5781 		}
5782 		if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5783 			device->ws->signal_fence(fence->permanent.fence);
5784 	}
5785 
5786 	*pFence = radv_fence_to_handle(fence);
5787 
5788 	return VK_SUCCESS;
5789 }
5790 
5791 
radv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)5792 void radv_DestroyFence(
5793 	VkDevice                                    _device,
5794 	VkFence                                     _fence,
5795 	const VkAllocationCallbacks*                pAllocator)
5796 {
5797 	RADV_FROM_HANDLE(radv_device, device, _device);
5798 	RADV_FROM_HANDLE(radv_fence, fence, _fence);
5799 
5800 	if (!fence)
5801 		return;
5802 
5803 	radv_destroy_fence(device, pAllocator, fence);
5804 }
5805 
radv_all_fences_plain_and_submitted(struct radv_device * device,uint32_t fenceCount,const VkFence * pFences)5806 static bool radv_all_fences_plain_and_submitted(struct radv_device *device,
5807                                                 uint32_t fenceCount, const VkFence *pFences)
5808 {
5809 	for (uint32_t i = 0; i < fenceCount; ++i) {
5810 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5811 
5812 		struct radv_fence_part *part =
5813 			fence->temporary.kind != RADV_FENCE_NONE ?
5814 			&fence->temporary : &fence->permanent;
5815 		if (part->kind != RADV_FENCE_WINSYS ||
5816 		    !device->ws->is_fence_waitable(part->fence))
5817 			return false;
5818 	}
5819 	return true;
5820 }
5821 
radv_all_fences_syncobj(uint32_t fenceCount,const VkFence * pFences)5822 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
5823 {
5824 	for (uint32_t i = 0; i < fenceCount; ++i) {
5825 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5826 
5827 		struct radv_fence_part *part =
5828 			fence->temporary.kind != RADV_FENCE_NONE ?
5829 			&fence->temporary : &fence->permanent;
5830 		if (part->kind != RADV_FENCE_SYNCOBJ)
5831 			return false;
5832 	}
5833 	return true;
5834 }
5835 
radv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)5836 VkResult radv_WaitForFences(
5837 	VkDevice                                    _device,
5838 	uint32_t                                    fenceCount,
5839 	const VkFence*                              pFences,
5840 	VkBool32                                    waitAll,
5841 	uint64_t                                    timeout)
5842 {
5843 	RADV_FROM_HANDLE(radv_device, device, _device);
5844 
5845 	if (radv_device_is_lost(device))
5846 		return VK_ERROR_DEVICE_LOST;
5847 
5848 	timeout = radv_get_absolute_timeout(timeout);
5849 
5850 	if (device->always_use_syncobj &&
5851 	    radv_all_fences_syncobj(fenceCount, pFences))
5852 	{
5853 		uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
5854 		if (!handles)
5855 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5856 
5857 		for (uint32_t i = 0; i < fenceCount; ++i) {
5858 			RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5859 
5860 			struct radv_fence_part *part =
5861 				fence->temporary.kind != RADV_FENCE_NONE ?
5862 				&fence->temporary : &fence->permanent;
5863 
5864 			assert(part->kind == RADV_FENCE_SYNCOBJ);
5865 			handles[i] = part->syncobj;
5866 		}
5867 
5868 		bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
5869 
5870 		free(handles);
5871 		return success ? VK_SUCCESS : VK_TIMEOUT;
5872 	}
5873 
5874 	if (!waitAll && fenceCount > 1) {
5875 		/* Not doing this by default for waitAll, due to needing to allocate twice. */
5876 		if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(device, fenceCount, pFences)) {
5877 			uint32_t wait_count = 0;
5878 			struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
5879 			if (!fences)
5880 				return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5881 
5882 			for (uint32_t i = 0; i < fenceCount; ++i) {
5883 				RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5884 
5885 				struct radv_fence_part *part =
5886 					fence->temporary.kind != RADV_FENCE_NONE ?
5887 					&fence->temporary : &fence->permanent;
5888 				assert(part->kind == RADV_FENCE_WINSYS);
5889 
5890 				if (device->ws->fence_wait(device->ws, part->fence, false, 0)) {
5891 					free(fences);
5892 					return VK_SUCCESS;
5893 				}
5894 
5895 				fences[wait_count++] = part->fence;
5896 			}
5897 
5898 			bool success = device->ws->fences_wait(device->ws, fences, wait_count,
5899 							       waitAll, timeout - radv_get_current_time());
5900 
5901 			free(fences);
5902 			return success ? VK_SUCCESS : VK_TIMEOUT;
5903 		}
5904 
5905 		while(radv_get_current_time() <= timeout) {
5906 			for (uint32_t i = 0; i < fenceCount; ++i) {
5907 				if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
5908 					return VK_SUCCESS;
5909 			}
5910 		}
5911 		return VK_TIMEOUT;
5912 	}
5913 
5914 	for (uint32_t i = 0; i < fenceCount; ++i) {
5915 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5916 		bool expired = false;
5917 
5918 		struct radv_fence_part *part =
5919 			fence->temporary.kind != RADV_FENCE_NONE ?
5920 			&fence->temporary : &fence->permanent;
5921 
5922 		switch (part->kind) {
5923 		case RADV_FENCE_NONE:
5924 			break;
5925 		case RADV_FENCE_WINSYS:
5926 			if (!device->ws->is_fence_waitable(part->fence)) {
5927 				while (!device->ws->is_fence_waitable(part->fence) &&
5928 				      radv_get_current_time() <= timeout)
5929 					/* Do nothing */;
5930 			}
5931 
5932 			expired = device->ws->fence_wait(device->ws,
5933 							 part->fence,
5934 							 true, timeout);
5935 			if (!expired)
5936 				return VK_TIMEOUT;
5937 			break;
5938 		case RADV_FENCE_SYNCOBJ:
5939 			if (!device->ws->wait_syncobj(device->ws,
5940 						      &part->syncobj, 1, true,
5941 						      timeout))
5942 				return VK_TIMEOUT;
5943 			break;
5944 		default:
5945 			unreachable("Invalid fence type");
5946 		}
5947 	}
5948 
5949 	return VK_SUCCESS;
5950 }
5951 
radv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)5952 VkResult radv_ResetFences(VkDevice _device,
5953 			  uint32_t fenceCount,
5954 			  const VkFence *pFences)
5955 {
5956 	RADV_FROM_HANDLE(radv_device, device, _device);
5957 
5958 	for (unsigned i = 0; i < fenceCount; ++i) {
5959 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5960 
5961 		/* From the Vulkan 1.0.53 spec:
5962 		 *
5963 		 *    "If any member of pFences currently has its payload
5964 		 *    imported with temporary permanence, that fence’s prior
5965 		 *    permanent payload is irst restored. The remaining
5966 		 *    operations described therefore operate on the restored
5967 		 *    payload."
5968 		 */
5969 		if (fence->temporary.kind != RADV_FENCE_NONE)
5970 			radv_destroy_fence_part(device, &fence->temporary);
5971 
5972 		struct radv_fence_part *part = &fence->permanent;
5973 
5974 		switch (part->kind) {
5975 		case RADV_FENCE_WINSYS:
5976 			device->ws->reset_fence(part->fence);
5977 			break;
5978 		case RADV_FENCE_SYNCOBJ:
5979 			device->ws->reset_syncobj(device->ws, part->syncobj);
5980 			break;
5981 		default:
5982 			unreachable("Invalid fence type");
5983 		}
5984 	}
5985 
5986 	return VK_SUCCESS;
5987 }
5988 
radv_GetFenceStatus(VkDevice _device,VkFence _fence)5989 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
5990 {
5991 	RADV_FROM_HANDLE(radv_device, device, _device);
5992 	RADV_FROM_HANDLE(radv_fence, fence, _fence);
5993 
5994 	struct radv_fence_part *part =
5995 		fence->temporary.kind != RADV_FENCE_NONE ?
5996 		&fence->temporary : &fence->permanent;
5997 
5998 	if (radv_device_is_lost(device))
5999 		return VK_ERROR_DEVICE_LOST;
6000 
6001 	switch (part->kind) {
6002 	case RADV_FENCE_NONE:
6003 		break;
6004 	case RADV_FENCE_WINSYS:
6005 		if (!device->ws->fence_wait(device->ws, part->fence, false, 0))
6006 			return VK_NOT_READY;
6007 		break;
6008 	case RADV_FENCE_SYNCOBJ: {
6009 		bool success = device->ws->wait_syncobj(device->ws,
6010 							&part->syncobj, 1, true, 0);
6011 		if (!success)
6012 			return VK_NOT_READY;
6013 		break;
6014 	}
6015 	default:
6016 		unreachable("Invalid fence type");
6017 	}
6018 
6019 	return VK_SUCCESS;
6020 }
6021 
6022 
6023 // Queue semaphore functions
6024 
6025 static void
radv_create_timeline(struct radv_timeline * timeline,uint64_t value)6026 radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
6027 {
6028 	timeline->highest_signaled = value;
6029 	timeline->highest_submitted = value;
6030 	list_inithead(&timeline->points);
6031 	list_inithead(&timeline->free_points);
6032 	list_inithead(&timeline->waiters);
6033 	pthread_mutex_init(&timeline->mutex, NULL);
6034 }
6035 
6036 static void
radv_destroy_timeline(struct radv_device * device,struct radv_timeline * timeline)6037 radv_destroy_timeline(struct radv_device *device,
6038                       struct radv_timeline *timeline)
6039 {
6040 	list_for_each_entry_safe(struct radv_timeline_point, point,
6041 	                         &timeline->free_points, list) {
6042 		list_del(&point->list);
6043 		device->ws->destroy_syncobj(device->ws, point->syncobj);
6044 		free(point);
6045 	}
6046 	list_for_each_entry_safe(struct radv_timeline_point, point,
6047 	                         &timeline->points, list) {
6048 		list_del(&point->list);
6049 		device->ws->destroy_syncobj(device->ws, point->syncobj);
6050 		free(point);
6051 	}
6052 	pthread_mutex_destroy(&timeline->mutex);
6053 }
6054 
6055 static void
radv_timeline_gc_locked(struct radv_device * device,struct radv_timeline * timeline)6056 radv_timeline_gc_locked(struct radv_device *device,
6057                         struct radv_timeline *timeline)
6058 {
6059 	list_for_each_entry_safe(struct radv_timeline_point, point,
6060 	                         &timeline->points, list) {
6061 		if (point->wait_count || point->value > timeline->highest_submitted)
6062 			return;
6063 
6064 		if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
6065 			timeline->highest_signaled = point->value;
6066 			list_del(&point->list);
6067 			list_add(&point->list, &timeline->free_points);
6068 		}
6069 	}
6070 }
6071 
6072 static struct radv_timeline_point *
radv_timeline_find_point_at_least_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)6073 radv_timeline_find_point_at_least_locked(struct radv_device *device,
6074                                          struct radv_timeline *timeline,
6075                                          uint64_t p)
6076 {
6077 	radv_timeline_gc_locked(device, timeline);
6078 
6079 	if (p <= timeline->highest_signaled)
6080 		return NULL;
6081 
6082 	list_for_each_entry(struct radv_timeline_point, point,
6083 	                    &timeline->points, list) {
6084 		if (point->value >= p) {
6085 			++point->wait_count;
6086 			return point;
6087 		}
6088 	}
6089 	return NULL;
6090 }
6091 
6092 static struct radv_timeline_point *
radv_timeline_add_point_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)6093 radv_timeline_add_point_locked(struct radv_device *device,
6094                                struct radv_timeline *timeline,
6095                                uint64_t p)
6096 {
6097 	radv_timeline_gc_locked(device, timeline);
6098 
6099 	struct radv_timeline_point *ret = NULL;
6100 	struct radv_timeline_point *prev = NULL;
6101 	int r;
6102 
6103 	if (p <= timeline->highest_signaled)
6104 		return NULL;
6105 
6106 	list_for_each_entry(struct radv_timeline_point, point,
6107 	                    &timeline->points, list) {
6108 		if (point->value == p) {
6109 			return NULL;
6110 		}
6111 
6112 		if (point->value < p)
6113 			prev = point;
6114 	}
6115 
6116 	if (list_is_empty(&timeline->free_points)) {
6117 		ret = malloc(sizeof(struct radv_timeline_point));
6118 		r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
6119 		if (r) {
6120 			free(ret);
6121 			return NULL;
6122 		}
6123 	} else {
6124 		ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
6125 		list_del(&ret->list);
6126 
6127 		device->ws->reset_syncobj(device->ws, ret->syncobj);
6128 	}
6129 
6130 	ret->value = p;
6131 	ret->wait_count = 1;
6132 
6133 	if (prev) {
6134 		list_add(&ret->list, &prev->list);
6135 	} else {
6136 		list_addtail(&ret->list, &timeline->points);
6137 	}
6138 	return ret;
6139 }
6140 
6141 
6142 static VkResult
radv_timeline_wait(struct radv_device * device,struct radv_timeline * timeline,uint64_t value,uint64_t abs_timeout)6143 radv_timeline_wait(struct radv_device *device,
6144                    struct radv_timeline *timeline,
6145                    uint64_t value,
6146                    uint64_t abs_timeout)
6147 {
6148 	pthread_mutex_lock(&timeline->mutex);
6149 
6150 	while(timeline->highest_submitted < value) {
6151 		struct timespec abstime;
6152 		timespec_from_nsec(&abstime, abs_timeout);
6153 
6154 		pthread_cond_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
6155 
6156 		if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
6157 			pthread_mutex_unlock(&timeline->mutex);
6158 			return VK_TIMEOUT;
6159 		}
6160 	}
6161 
6162 	struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(device, timeline, value);
6163 	pthread_mutex_unlock(&timeline->mutex);
6164 	if (!point)
6165 		return VK_SUCCESS;
6166 
6167 	bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
6168 
6169 	pthread_mutex_lock(&timeline->mutex);
6170 	point->wait_count--;
6171 	pthread_mutex_unlock(&timeline->mutex);
6172 	return success ? VK_SUCCESS : VK_TIMEOUT;
6173 }
6174 
6175 static void
radv_timeline_trigger_waiters_locked(struct radv_timeline * timeline,struct list_head * processing_list)6176 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
6177                                      struct list_head *processing_list)
6178 {
6179 	list_for_each_entry_safe(struct radv_timeline_waiter, waiter,
6180 	                         &timeline->waiters, list) {
6181 		if (waiter->value > timeline->highest_submitted)
6182 			continue;
6183 
6184 		radv_queue_trigger_submission(waiter->submission, 1, processing_list);
6185 		list_del(&waiter->list);
6186 	}
6187 }
6188 
6189 static
radv_destroy_semaphore_part(struct radv_device * device,struct radv_semaphore_part * part)6190 void radv_destroy_semaphore_part(struct radv_device *device,
6191                                  struct radv_semaphore_part *part)
6192 {
6193 	switch(part->kind) {
6194 	case RADV_SEMAPHORE_NONE:
6195 		break;
6196 	case RADV_SEMAPHORE_WINSYS:
6197 		device->ws->destroy_sem(part->ws_sem);
6198 		break;
6199 	case RADV_SEMAPHORE_TIMELINE:
6200 		radv_destroy_timeline(device, &part->timeline);
6201 		break;
6202 	case RADV_SEMAPHORE_SYNCOBJ:
6203 	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
6204 		device->ws->destroy_syncobj(device->ws, part->syncobj);
6205 		break;
6206 	}
6207 	part->kind = RADV_SEMAPHORE_NONE;
6208 }
6209 
6210 static VkSemaphoreTypeKHR
radv_get_semaphore_type(const void * pNext,uint64_t * initial_value)6211 radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
6212 {
6213 	const VkSemaphoreTypeCreateInfo *type_info =
6214 		vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
6215 
6216 	if (!type_info)
6217 		return VK_SEMAPHORE_TYPE_BINARY;
6218 
6219 	if (initial_value)
6220 		*initial_value = type_info->initialValue;
6221 	return type_info->semaphoreType;
6222 }
6223 
6224 static void
radv_destroy_semaphore(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_semaphore * sem)6225 radv_destroy_semaphore(struct radv_device *device,
6226 		       const VkAllocationCallbacks *pAllocator,
6227 		       struct radv_semaphore *sem)
6228 {
6229 	radv_destroy_semaphore_part(device, &sem->temporary);
6230 	radv_destroy_semaphore_part(device, &sem->permanent);
6231 	vk_object_base_finish(&sem->base);
6232 	vk_free2(&device->vk.alloc, pAllocator, sem);
6233 }
6234 
radv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)6235 VkResult radv_CreateSemaphore(
6236 	VkDevice                                    _device,
6237 	const VkSemaphoreCreateInfo*                pCreateInfo,
6238 	const VkAllocationCallbacks*                pAllocator,
6239 	VkSemaphore*                                pSemaphore)
6240 {
6241 	RADV_FROM_HANDLE(radv_device, device, _device);
6242 	const VkExportSemaphoreCreateInfo *export =
6243 		vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
6244 	VkExternalSemaphoreHandleTypeFlags handleTypes =
6245 		export ? export->handleTypes : 0;
6246 	uint64_t initial_value = 0;
6247 	VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
6248 
6249 	struct radv_semaphore *sem = vk_alloc2(&device->vk.alloc, pAllocator,
6250 					       sizeof(*sem), 8,
6251 					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6252 	if (!sem)
6253 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6254 
6255 	vk_object_base_init(&device->vk, &sem->base,
6256 			    VK_OBJECT_TYPE_SEMAPHORE);
6257 
6258 	sem->temporary.kind = RADV_SEMAPHORE_NONE;
6259 	sem->permanent.kind = RADV_SEMAPHORE_NONE;
6260 
6261 	if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
6262 	    device->physical_device->rad_info.has_timeline_syncobj) {
6263 		int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6264 		if (ret) {
6265 			radv_destroy_semaphore(device, pAllocator, sem);
6266 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6267 		}
6268 		device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
6269 		sem->permanent.timeline_syncobj.max_point = initial_value;
6270 		sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
6271 	} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
6272 		radv_create_timeline(&sem->permanent.timeline, initial_value);
6273 		sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
6274 	} else if (device->always_use_syncobj || handleTypes) {
6275 		assert (device->physical_device->rad_info.has_syncobj);
6276 		int ret = device->ws->create_syncobj(device->ws, false,
6277 						     &sem->permanent.syncobj);
6278 		if (ret) {
6279 			radv_destroy_semaphore(device, pAllocator, sem);
6280 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6281 		}
6282 		sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
6283 	} else {
6284 		sem->permanent.ws_sem = device->ws->create_sem(device->ws);
6285 		if (!sem->permanent.ws_sem) {
6286 			radv_destroy_semaphore(device, pAllocator, sem);
6287 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6288 		}
6289 		sem->permanent.kind = RADV_SEMAPHORE_WINSYS;
6290 	}
6291 
6292 	*pSemaphore = radv_semaphore_to_handle(sem);
6293 	return VK_SUCCESS;
6294 }
6295 
radv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)6296 void radv_DestroySemaphore(
6297 	VkDevice                                    _device,
6298 	VkSemaphore                                 _semaphore,
6299 	const VkAllocationCallbacks*                pAllocator)
6300 {
6301 	RADV_FROM_HANDLE(radv_device, device, _device);
6302 	RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
6303 	if (!_semaphore)
6304 		return;
6305 
6306 	radv_destroy_semaphore(device, pAllocator, sem);
6307 }
6308 
6309 VkResult
radv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)6310 radv_GetSemaphoreCounterValue(VkDevice _device,
6311 			      VkSemaphore _semaphore,
6312 			      uint64_t* pValue)
6313 {
6314 	RADV_FROM_HANDLE(radv_device, device, _device);
6315 	RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
6316 
6317 	if (radv_device_is_lost(device))
6318 		return VK_ERROR_DEVICE_LOST;
6319 
6320 	struct radv_semaphore_part *part =
6321 		semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6322 
6323 	switch (part->kind) {
6324 	case RADV_SEMAPHORE_TIMELINE: {
6325 		pthread_mutex_lock(&part->timeline.mutex);
6326 		radv_timeline_gc_locked(device, &part->timeline);
6327 		*pValue = part->timeline.highest_signaled;
6328 		pthread_mutex_unlock(&part->timeline.mutex);
6329 		return VK_SUCCESS;
6330 	}
6331 	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6332 		return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
6333 	}
6334 	case RADV_SEMAPHORE_NONE:
6335 	case RADV_SEMAPHORE_SYNCOBJ:
6336 	case RADV_SEMAPHORE_WINSYS:
6337 		unreachable("Invalid semaphore type");
6338 	}
6339 	unreachable("Unhandled semaphore type");
6340 }
6341 
6342 
6343 static VkResult
radv_wait_timelines(struct radv_device * device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t abs_timeout)6344 radv_wait_timelines(struct radv_device *device,
6345                     const VkSemaphoreWaitInfo* pWaitInfo,
6346                     uint64_t abs_timeout)
6347 {
6348 	if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
6349 		for (;;) {
6350 			for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6351 				RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6352 				VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
6353 
6354 				if (result == VK_SUCCESS)
6355 					return VK_SUCCESS;
6356 			}
6357 			if (radv_get_current_time() > abs_timeout)
6358 				return VK_TIMEOUT;
6359 		}
6360 	}
6361 
6362 	for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6363 		RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6364 		VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], abs_timeout);
6365 
6366 		if (result != VK_SUCCESS)
6367 			return result;
6368 	}
6369 	return VK_SUCCESS;
6370 }
6371 VkResult
radv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t timeout)6372 radv_WaitSemaphores(VkDevice _device,
6373 		    const VkSemaphoreWaitInfo* pWaitInfo,
6374 		    uint64_t timeout)
6375 {
6376 	RADV_FROM_HANDLE(radv_device, device, _device);
6377 
6378 	if (radv_device_is_lost(device))
6379 		return VK_ERROR_DEVICE_LOST;
6380 
6381 	uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
6382 
6383 	if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind == RADV_SEMAPHORE_TIMELINE)
6384 		return radv_wait_timelines(device, pWaitInfo, abs_timeout);
6385 
6386 	if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
6387 		return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "semaphoreCount integer overflow");
6388 
6389 	bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
6390 	uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
6391 	if (!handles)
6392 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6393 
6394 	for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6395 		RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6396 		handles[i] = semaphore->permanent.syncobj;
6397 	}
6398 
6399 	bool success = device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
6400 	                                                 pWaitInfo->semaphoreCount, wait_all, false,
6401 	                                                 abs_timeout);
6402 	free(handles);
6403 	return success ? VK_SUCCESS : VK_TIMEOUT;
6404 }
6405 
6406 VkResult
radv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfo * pSignalInfo)6407 radv_SignalSemaphore(VkDevice _device,
6408                      const VkSemaphoreSignalInfo* pSignalInfo)
6409 {
6410 	RADV_FROM_HANDLE(radv_device, device, _device);
6411 	RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
6412 
6413 	struct radv_semaphore_part *part =
6414 		semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6415 
6416 	switch(part->kind) {
6417 	case RADV_SEMAPHORE_TIMELINE: {
6418 		pthread_mutex_lock(&part->timeline.mutex);
6419 		radv_timeline_gc_locked(device, &part->timeline);
6420 		part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
6421 		part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
6422 
6423 		struct list_head processing_list;
6424 		list_inithead(&processing_list);
6425 		radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
6426 		pthread_mutex_unlock(&part->timeline.mutex);
6427 
6428 		VkResult result = radv_process_submissions(&processing_list);
6429 
6430 		/* This needs to happen after radv_process_submissions, so
6431 		 * that any submitted submissions that are now unblocked get
6432 		 * processed before we wake the application. This way we
6433 		 * ensure that any binary semaphores that are now unblocked
6434 		 * are usable by the application. */
6435 		pthread_cond_broadcast(&device->timeline_cond);
6436 
6437 		return result;
6438 	}
6439 	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6440 		part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
6441 		device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
6442 		break;
6443 	}
6444 	case RADV_SEMAPHORE_NONE:
6445 	case RADV_SEMAPHORE_SYNCOBJ:
6446 	case RADV_SEMAPHORE_WINSYS:
6447 		unreachable("Invalid semaphore type");
6448 	}
6449 	return VK_SUCCESS;
6450 }
6451 
radv_destroy_event(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_event * event)6452 static void radv_destroy_event(struct radv_device *device,
6453                                const VkAllocationCallbacks* pAllocator,
6454                                struct radv_event *event)
6455 {
6456 	if (event->bo)
6457 		device->ws->buffer_destroy(event->bo);
6458 
6459 	vk_object_base_finish(&event->base);
6460 	vk_free2(&device->vk.alloc, pAllocator, event);
6461 }
6462 
radv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)6463 VkResult radv_CreateEvent(
6464 	VkDevice                                    _device,
6465 	const VkEventCreateInfo*                    pCreateInfo,
6466 	const VkAllocationCallbacks*                pAllocator,
6467 	VkEvent*                                    pEvent)
6468 {
6469 	RADV_FROM_HANDLE(radv_device, device, _device);
6470 	struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator,
6471 					       sizeof(*event), 8,
6472 					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6473 
6474 	if (!event)
6475 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6476 
6477 	vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
6478 
6479 	event->bo = device->ws->buffer_create(device->ws, 8, 8,
6480 					      RADEON_DOMAIN_GTT,
6481 					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
6482 					      RADV_BO_PRIORITY_FENCE);
6483 	if (!event->bo) {
6484 		radv_destroy_event(device, pAllocator, event);
6485 		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6486 	}
6487 
6488 	event->map = (uint64_t*)device->ws->buffer_map(event->bo);
6489 	if (!event->map) {
6490 		radv_destroy_event(device, pAllocator, event);
6491 		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6492 	}
6493 
6494 	*pEvent = radv_event_to_handle(event);
6495 
6496 	return VK_SUCCESS;
6497 }
6498 
radv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)6499 void radv_DestroyEvent(
6500 	VkDevice                                    _device,
6501 	VkEvent                                     _event,
6502 	const VkAllocationCallbacks*                pAllocator)
6503 {
6504 	RADV_FROM_HANDLE(radv_device, device, _device);
6505 	RADV_FROM_HANDLE(radv_event, event, _event);
6506 
6507 	if (!event)
6508 		return;
6509 
6510 	radv_destroy_event(device, pAllocator, event);
6511 }
6512 
radv_GetEventStatus(VkDevice _device,VkEvent _event)6513 VkResult radv_GetEventStatus(
6514 	VkDevice                                    _device,
6515 	VkEvent                                     _event)
6516 {
6517 	RADV_FROM_HANDLE(radv_device, device, _device);
6518 	RADV_FROM_HANDLE(radv_event, event, _event);
6519 
6520 	if (radv_device_is_lost(device))
6521 		return VK_ERROR_DEVICE_LOST;
6522 
6523 	if (*event->map == 1)
6524 		return VK_EVENT_SET;
6525 	return VK_EVENT_RESET;
6526 }
6527 
radv_SetEvent(VkDevice _device,VkEvent _event)6528 VkResult radv_SetEvent(
6529 	VkDevice                                    _device,
6530 	VkEvent                                     _event)
6531 {
6532 	RADV_FROM_HANDLE(radv_event, event, _event);
6533 	*event->map = 1;
6534 
6535 	return VK_SUCCESS;
6536 }
6537 
radv_ResetEvent(VkDevice _device,VkEvent _event)6538 VkResult radv_ResetEvent(
6539     VkDevice                                    _device,
6540     VkEvent                                     _event)
6541 {
6542 	RADV_FROM_HANDLE(radv_event, event, _event);
6543 	*event->map = 0;
6544 
6545 	return VK_SUCCESS;
6546 }
6547 
6548 static void
radv_destroy_buffer(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_buffer * buffer)6549 radv_destroy_buffer(struct radv_device *device,
6550 		    const VkAllocationCallbacks *pAllocator,
6551 		    struct radv_buffer *buffer)
6552 {
6553 	if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
6554 		device->ws->buffer_destroy(buffer->bo);
6555 
6556 	vk_object_base_finish(&buffer->base);
6557 	vk_free2(&device->vk.alloc, pAllocator, buffer);
6558 }
6559 
radv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)6560 VkResult radv_CreateBuffer(
6561 	VkDevice                                    _device,
6562 	const VkBufferCreateInfo*                   pCreateInfo,
6563 	const VkAllocationCallbacks*                pAllocator,
6564 	VkBuffer*                                   pBuffer)
6565 {
6566 	RADV_FROM_HANDLE(radv_device, device, _device);
6567 	struct radv_buffer *buffer;
6568 
6569 	if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
6570 		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
6571 
6572 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
6573 
6574 	buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
6575 			     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6576 	if (buffer == NULL)
6577 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6578 
6579 	vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
6580 
6581 	buffer->size = pCreateInfo->size;
6582 	buffer->usage = pCreateInfo->usage;
6583 	buffer->bo = NULL;
6584 	buffer->offset = 0;
6585 	buffer->flags = pCreateInfo->flags;
6586 
6587 	buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
6588 						 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
6589 
6590 	if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
6591 		buffer->bo = device->ws->buffer_create(device->ws,
6592 		                                       align64(buffer->size, 4096),
6593 		                                       4096, 0, RADEON_FLAG_VIRTUAL,
6594 		                                       RADV_BO_PRIORITY_VIRTUAL);
6595 		if (!buffer->bo) {
6596 			radv_destroy_buffer(device, pAllocator, buffer);
6597 			return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6598 		}
6599 	}
6600 
6601 	*pBuffer = radv_buffer_to_handle(buffer);
6602 
6603 	return VK_SUCCESS;
6604 }
6605 
radv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)6606 void radv_DestroyBuffer(
6607 	VkDevice                                    _device,
6608 	VkBuffer                                    _buffer,
6609 	const VkAllocationCallbacks*                pAllocator)
6610 {
6611 	RADV_FROM_HANDLE(radv_device, device, _device);
6612 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
6613 
6614 	if (!buffer)
6615 		return;
6616 
6617 	radv_destroy_buffer(device, pAllocator, buffer);
6618 }
6619 
radv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6620 VkDeviceAddress radv_GetBufferDeviceAddress(
6621 	VkDevice                                    device,
6622 	const VkBufferDeviceAddressInfo*         pInfo)
6623 {
6624 	RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6625 	return radv_buffer_get_va(buffer->bo) + buffer->offset;
6626 }
6627 
6628 
radv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6629 uint64_t radv_GetBufferOpaqueCaptureAddress(VkDevice device,
6630 					    const VkBufferDeviceAddressInfo* pInfo)
6631 {
6632 	return 0;
6633 }
6634 
radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)6635 uint64_t radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
6636 						  const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
6637 {
6638 	return 0;
6639 }
6640 
6641 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)6642 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
6643 {
6644 	if (stencil)
6645 		return plane->surface.u.legacy.stencil_tiling_index[level];
6646 	else
6647 		return plane->surface.u.legacy.tiling_index[level];
6648 }
6649 
radv_surface_max_layer_count(struct radv_image_view * iview)6650 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
6651 {
6652 	return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
6653 }
6654 
6655 static uint32_t
radv_init_dcc_control_reg(struct radv_device * device,struct radv_image_view * iview)6656 radv_init_dcc_control_reg(struct radv_device *device,
6657 			  struct radv_image_view *iview)
6658 {
6659 	unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
6660 	unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
6661 	unsigned max_compressed_block_size;
6662 	unsigned independent_128b_blocks;
6663 	unsigned independent_64b_blocks;
6664 
6665 	if (!radv_dcc_enabled(iview->image, iview->base_mip))
6666 		return 0;
6667 
6668 	if (!device->physical_device->rad_info.has_dedicated_vram) {
6669 		/* amdvlk: [min-compressed-block-size] should be set to 32 for
6670 		 * dGPU and 64 for APU because all of our APUs to date use
6671 		 * DIMMs which have a request granularity size of 64B while all
6672 		 * other chips have a 32B request size.
6673 		 */
6674 		min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
6675 	}
6676 
6677 	if (device->physical_device->rad_info.chip_class >= GFX10) {
6678 		max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6679 		independent_64b_blocks = 0;
6680 		independent_128b_blocks = 1;
6681 	} else {
6682 		independent_128b_blocks = 0;
6683 
6684 		if (iview->image->info.samples > 1) {
6685 			if (iview->image->planes[0].surface.bpe == 1)
6686 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6687 			else if (iview->image->planes[0].surface.bpe == 2)
6688 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6689 		}
6690 
6691 		if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
6692 					   VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
6693 					   VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
6694 			/* If this DCC image is potentially going to be used in texture
6695 			 * fetches, we need some special settings.
6696 			 */
6697 			independent_64b_blocks = 1;
6698 			max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6699 		} else {
6700 			/* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
6701 			 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
6702 			 * big as possible for better compression state.
6703 			 */
6704 			independent_64b_blocks = 0;
6705 			max_compressed_block_size = max_uncompressed_block_size;
6706 		}
6707 	}
6708 
6709 	return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
6710 	       S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
6711 	       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
6712 	       S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
6713 	       S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
6714 }
6715 
6716 void
radv_initialise_color_surface(struct radv_device * device,struct radv_color_buffer_info * cb,struct radv_image_view * iview)6717 radv_initialise_color_surface(struct radv_device *device,
6718 			      struct radv_color_buffer_info *cb,
6719 			      struct radv_image_view *iview)
6720 {
6721 	const struct vk_format_description *desc;
6722 	unsigned ntype, format, swap, endian;
6723 	unsigned blend_clamp = 0, blend_bypass = 0;
6724 	uint64_t va;
6725 	const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
6726 	const struct radeon_surf *surf = &plane->surface;
6727 
6728 	desc = vk_format_description(iview->vk_format);
6729 
6730 	memset(cb, 0, sizeof(*cb));
6731 
6732 	/* Intensity is implemented as Red, so treat it that way. */
6733 	cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
6734 
6735 	va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset;
6736 
6737 	cb->cb_color_base = va >> 8;
6738 
6739 	if (device->physical_device->rad_info.chip_class >= GFX9) {
6740 		if (device->physical_device->rad_info.chip_class >= GFX10) {
6741 			cb->cb_color_attrib3 |=	S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6742 				S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6743 				S_028EE0_CMASK_PIPE_ALIGNED(1) |
6744 				S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
6745 		} else {
6746 			struct gfx9_surf_meta_flags meta = {
6747 				.rb_aligned = 1,
6748 				.pipe_aligned = 1,
6749 			};
6750 
6751 			if (surf->dcc_offset)
6752 				meta = surf->u.gfx9.dcc;
6753 
6754 			cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6755 				S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6756 				S_028C74_RB_ALIGNED(meta.rb_aligned) |
6757 				S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
6758 			cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
6759 		}
6760 
6761 		cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
6762 		cb->cb_color_base |= surf->tile_swizzle;
6763 	} else {
6764 		const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
6765 		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
6766 
6767 		cb->cb_color_base += level_info->offset >> 8;
6768 		if (level_info->mode == RADEON_SURF_MODE_2D)
6769 			cb->cb_color_base |= surf->tile_swizzle;
6770 
6771 		pitch_tile_max = level_info->nblk_x / 8 - 1;
6772 		slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
6773 		tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
6774 
6775 		cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
6776 		cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
6777 		cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
6778 
6779 		cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
6780 
6781 		if (radv_image_has_fmask(iview->image)) {
6782 			if (device->physical_device->rad_info.chip_class >= GFX7)
6783 				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
6784 			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
6785 			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
6786 		} else {
6787 			/* This must be set for fast clear to work without FMASK. */
6788 			if (device->physical_device->rad_info.chip_class >= GFX7)
6789 				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
6790 			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6791 			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6792 		}
6793 	}
6794 
6795 	/* CMASK variables */
6796 	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6797 	va += surf->cmask_offset;
6798 	cb->cb_color_cmask = va >> 8;
6799 
6800 	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6801 	va += surf->dcc_offset;
6802 
6803 	if (radv_dcc_enabled(iview->image, iview->base_mip) &&
6804 	    device->physical_device->rad_info.chip_class <= GFX8)
6805 		va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
6806 
6807 	unsigned dcc_tile_swizzle = surf->tile_swizzle;
6808 	dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
6809 
6810 	cb->cb_dcc_base = va >> 8;
6811 	cb->cb_dcc_base |= dcc_tile_swizzle;
6812 
6813 	/* GFX10 field has the same base shift as the GFX6 field. */
6814 	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6815 	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
6816 		S_028C6C_SLICE_MAX_GFX10(max_slice);
6817 
6818 	if (iview->image->info.samples > 1) {
6819 		unsigned log_samples = util_logbase2(iview->image->info.samples);
6820 
6821 		cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
6822 			S_028C74_NUM_FRAGMENTS(log_samples);
6823 	}
6824 
6825 	if (radv_image_has_fmask(iview->image)) {
6826 		va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
6827 		cb->cb_color_fmask = va >> 8;
6828 		cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6829 	} else {
6830 		cb->cb_color_fmask = cb->cb_color_base;
6831 	}
6832 
6833 	ntype = radv_translate_color_numformat(iview->vk_format,
6834 					       desc,
6835 					       vk_format_get_first_non_void_channel(iview->vk_format));
6836 	format = radv_translate_colorformat(iview->vk_format);
6837 	if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
6838 		radv_finishme("Illegal color\n");
6839 	swap = radv_translate_colorswap(iview->vk_format, false);
6840 	endian = radv_colorformat_endian_swap(format);
6841 
6842 	/* blend clamp should be set for all NORM/SRGB types */
6843 	if (ntype == V_028C70_NUMBER_UNORM ||
6844 	    ntype == V_028C70_NUMBER_SNORM ||
6845 	    ntype == V_028C70_NUMBER_SRGB)
6846 		blend_clamp = 1;
6847 
6848 	/* set blend bypass according to docs if SINT/UINT or
6849 	   8/24 COLOR variants */
6850 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6851 	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6852 	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
6853 		blend_clamp = 0;
6854 		blend_bypass = 1;
6855 	}
6856 #if 0
6857 	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6858 	    (format == V_028C70_COLOR_8 ||
6859 	     format == V_028C70_COLOR_8_8 ||
6860 	     format == V_028C70_COLOR_8_8_8_8))
6861 		->color_is_int8 = true;
6862 #endif
6863 	cb->cb_color_info = S_028C70_FORMAT(format) |
6864 		S_028C70_COMP_SWAP(swap) |
6865 		S_028C70_BLEND_CLAMP(blend_clamp) |
6866 		S_028C70_BLEND_BYPASS(blend_bypass) |
6867 		S_028C70_SIMPLE_FLOAT(1) |
6868 		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
6869 				    ntype != V_028C70_NUMBER_SNORM &&
6870 				    ntype != V_028C70_NUMBER_SRGB &&
6871 				    format != V_028C70_COLOR_8_24 &&
6872 				    format != V_028C70_COLOR_24_8) |
6873 		S_028C70_NUMBER_TYPE(ntype) |
6874 		S_028C70_ENDIAN(endian);
6875 	if (radv_image_has_fmask(iview->image)) {
6876 		cb->cb_color_info |= S_028C70_COMPRESSION(1);
6877 		if (device->physical_device->rad_info.chip_class == GFX6) {
6878 			unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
6879 			cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6880 		}
6881 
6882 		if (radv_image_is_tc_compat_cmask(iview->image)) {
6883 			/* Allow the texture block to read FMASK directly
6884 			 * without decompressing it. This bit must be cleared
6885 			 * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6886 			 * otherwise the operation doesn't happen.
6887 			 */
6888 			cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6889 
6890 			/* Set CMASK into a tiling format that allows the
6891 			 * texture block to read it.
6892 			 */
6893 			cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6894 		}
6895 	}
6896 
6897 	if (radv_image_has_cmask(iview->image) &&
6898 	    !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6899 		cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6900 
6901 	if (radv_dcc_enabled(iview->image, iview->base_mip))
6902 		cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6903 
6904 	cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6905 
6906 	/* This must be set for fast clear to work without FMASK. */
6907 	if (!radv_image_has_fmask(iview->image) &&
6908 	    device->physical_device->rad_info.chip_class == GFX6) {
6909 		unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6910 		cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6911 	}
6912 
6913 	if (device->physical_device->rad_info.chip_class >= GFX9) {
6914 		const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format);
6915 
6916 		unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
6917 		  (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
6918 		unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
6919 		unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
6920 
6921 		if (device->physical_device->rad_info.chip_class >= GFX10) {
6922 			cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
6923 
6924 			cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
6925 					        S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6926 					        S_028EE0_RESOURCE_LEVEL(1);
6927 		} else {
6928 			cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
6929 			cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
6930 					       S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6931 		}
6932 
6933 		cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
6934 			S_028C68_MIP0_HEIGHT(height - 1) |
6935 			S_028C68_MAX_MIP(iview->image->info.levels - 1);
6936 	}
6937 }
6938 
6939 static unsigned
radv_calc_decompress_on_z_planes(struct radv_device * device,struct radv_image_view * iview)6940 radv_calc_decompress_on_z_planes(struct radv_device *device,
6941 				 struct radv_image_view *iview)
6942 {
6943 	unsigned max_zplanes = 0;
6944 
6945 	assert(radv_image_is_tc_compat_htile(iview->image));
6946 
6947 	if (device->physical_device->rad_info.chip_class >= GFX9) {
6948 		/* Default value for 32-bit depth surfaces. */
6949 		max_zplanes = 4;
6950 
6951 		if (iview->vk_format == VK_FORMAT_D16_UNORM &&
6952 		    iview->image->info.samples > 1)
6953 			max_zplanes = 2;
6954 
6955 		max_zplanes = max_zplanes + 1;
6956 	} else {
6957 		if (iview->vk_format == VK_FORMAT_D16_UNORM) {
6958 			/* Do not enable Z plane compression for 16-bit depth
6959 			 * surfaces because isn't supported on GFX8. Only
6960 			 * 32-bit depth surfaces are supported by the hardware.
6961 			 * This allows to maintain shader compatibility and to
6962 			 * reduce the number of depth decompressions.
6963 			 */
6964 			max_zplanes = 1;
6965 		} else {
6966 			if (iview->image->info.samples <= 1)
6967 				max_zplanes = 5;
6968 			else if (iview->image->info.samples <= 4)
6969 				max_zplanes = 3;
6970 			else
6971 				max_zplanes = 2;
6972 		}
6973 	}
6974 
6975 	return max_zplanes;
6976 }
6977 
6978 void
radv_initialise_ds_surface(struct radv_device * device,struct radv_ds_buffer_info * ds,struct radv_image_view * iview)6979 radv_initialise_ds_surface(struct radv_device *device,
6980 			   struct radv_ds_buffer_info *ds,
6981 			   struct radv_image_view *iview)
6982 {
6983 	unsigned level = iview->base_mip;
6984 	unsigned format, stencil_format;
6985 	uint64_t va, s_offs, z_offs;
6986 	bool stencil_only = false;
6987 	const struct radv_image_plane *plane = &iview->image->planes[0];
6988 	const struct radeon_surf *surf = &plane->surface;
6989 
6990 	assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
6991 
6992 	memset(ds, 0, sizeof(*ds));
6993 	switch (iview->image->vk_format) {
6994 	case VK_FORMAT_D24_UNORM_S8_UINT:
6995 	case VK_FORMAT_X8_D24_UNORM_PACK32:
6996 		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6997 		ds->offset_scale = 2.0f;
6998 		break;
6999 	case VK_FORMAT_D16_UNORM:
7000 	case VK_FORMAT_D16_UNORM_S8_UINT:
7001 		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
7002 		ds->offset_scale = 4.0f;
7003 		break;
7004 	case VK_FORMAT_D32_SFLOAT:
7005 	case VK_FORMAT_D32_SFLOAT_S8_UINT:
7006 		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
7007 			S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
7008 		ds->offset_scale = 1.0f;
7009 		break;
7010 	case VK_FORMAT_S8_UINT:
7011 		stencil_only = true;
7012 		break;
7013 	default:
7014 		break;
7015 	}
7016 
7017 	format = radv_translate_dbformat(iview->image->vk_format);
7018 	stencil_format = surf->has_stencil ?
7019 		V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
7020 
7021 	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
7022 	ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
7023 		S_028008_SLICE_MAX(max_slice);
7024 	if (device->physical_device->rad_info.chip_class >= GFX10) {
7025 		ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
7026 				     S_028008_SLICE_MAX_HI(max_slice >> 11);
7027 	}
7028 
7029 	ds->db_htile_data_base = 0;
7030 	ds->db_htile_surface = 0;
7031 
7032 	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
7033 	s_offs = z_offs = va;
7034 
7035 	if (device->physical_device->rad_info.chip_class >= GFX9) {
7036 		assert(surf->u.gfx9.surf_offset == 0);
7037 		s_offs += surf->u.gfx9.stencil_offset;
7038 
7039 		ds->db_z_info = S_028038_FORMAT(format) |
7040 			S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
7041 			S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
7042 			S_028038_MAXMIP(iview->image->info.levels - 1) |
7043 			S_028038_ZRANGE_PRECISION(1);
7044 		ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
7045 			S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
7046 
7047 		if (device->physical_device->rad_info.chip_class == GFX9) {
7048 			ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
7049 			ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
7050 		}
7051 
7052 		ds->db_depth_view |= S_028008_MIPID(level);
7053 		ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
7054 			S_02801C_Y_MAX(iview->image->info.height - 1);
7055 
7056 		if (radv_htile_enabled(iview->image, level)) {
7057 			ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
7058 
7059 			if (radv_image_is_tc_compat_htile(iview->image)) {
7060 				unsigned max_zplanes =
7061 					radv_calc_decompress_on_z_planes(device, iview);
7062 
7063 				ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7064 
7065 				if (device->physical_device->rad_info.chip_class >= GFX10) {
7066 					ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
7067 					ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
7068 				} else {
7069 					ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
7070 					ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
7071 				}
7072 			}
7073 
7074 			if (!surf->has_stencil)
7075 				/* Use all of the htile_buffer for depth if there's no stencil. */
7076 				ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
7077 			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
7078 				surf->htile_offset;
7079 			ds->db_htile_data_base = va >> 8;
7080 			ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
7081 				S_028ABC_PIPE_ALIGNED(1);
7082 
7083 			if (device->physical_device->rad_info.chip_class == GFX9) {
7084 				ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
7085 			}
7086 		}
7087 	} else {
7088 		const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
7089 
7090 		if (stencil_only)
7091 			level_info = &surf->u.legacy.stencil_level[level];
7092 
7093 		z_offs += surf->u.legacy.level[level].offset;
7094 		s_offs += surf->u.legacy.stencil_level[level].offset;
7095 
7096 		ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
7097 		ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
7098 		ds->db_stencil_info = S_028044_FORMAT(stencil_format);
7099 
7100 		if (iview->image->info.samples > 1)
7101 			ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
7102 
7103 		if (device->physical_device->rad_info.chip_class >= GFX7) {
7104 			struct radeon_info *info = &device->physical_device->rad_info;
7105 			unsigned tiling_index = surf->u.legacy.tiling_index[level];
7106 			unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
7107 			unsigned macro_index = surf->u.legacy.macro_tile_index;
7108 			unsigned tile_mode = info->si_tile_mode_array[tiling_index];
7109 			unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
7110 			unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
7111 
7112 			if (stencil_only)
7113 				tile_mode = stencil_tile_mode;
7114 
7115 			ds->db_depth_info |=
7116 				S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
7117 				S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
7118 				S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
7119 				S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
7120 				S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
7121 				S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
7122 			ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
7123 			ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
7124 		} else {
7125 			unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
7126 			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7127 			tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
7128 			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
7129 			if (stencil_only)
7130 				ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7131 		}
7132 
7133 		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
7134 			S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
7135 		ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
7136 
7137 		if (radv_htile_enabled(iview->image, level)) {
7138 			ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
7139 
7140 			if (!surf->has_stencil &&
7141 			    !radv_image_is_tc_compat_htile(iview->image))
7142 				/* Use all of the htile_buffer for depth if there's no stencil. */
7143 				ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
7144 
7145 			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
7146 				surf->htile_offset;
7147 			ds->db_htile_data_base = va >> 8;
7148 			ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
7149 
7150 			if (radv_image_is_tc_compat_htile(iview->image)) {
7151 				unsigned max_zplanes =
7152 					radv_calc_decompress_on_z_planes(device, iview);
7153 
7154 				ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
7155 				ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7156 			}
7157 		}
7158 	}
7159 
7160 	ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
7161 	ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
7162 }
7163 
radv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)7164 VkResult radv_CreateFramebuffer(
7165 	VkDevice                                    _device,
7166 	const VkFramebufferCreateInfo*              pCreateInfo,
7167 	const VkAllocationCallbacks*                pAllocator,
7168 	VkFramebuffer*                              pFramebuffer)
7169 {
7170 	RADV_FROM_HANDLE(radv_device, device, _device);
7171 	struct radv_framebuffer *framebuffer;
7172 	const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
7173 		vk_find_struct_const(pCreateInfo->pNext,
7174 			FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
7175 
7176 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
7177 
7178 	size_t size = sizeof(*framebuffer);
7179 	if (!imageless_create_info)
7180 		size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
7181 	framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
7182 				  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7183 	if (framebuffer == NULL)
7184 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7185 
7186 	vk_object_base_init(&device->vk, &framebuffer->base,
7187 			    VK_OBJECT_TYPE_FRAMEBUFFER);
7188 
7189 	framebuffer->attachment_count = pCreateInfo->attachmentCount;
7190 	framebuffer->width = pCreateInfo->width;
7191 	framebuffer->height = pCreateInfo->height;
7192 	framebuffer->layers = pCreateInfo->layers;
7193 	if (imageless_create_info) {
7194 		for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
7195 			const VkFramebufferAttachmentImageInfo *attachment =
7196 				imageless_create_info->pAttachmentImageInfos + i;
7197 			framebuffer->width = MIN2(framebuffer->width, attachment->width);
7198 			framebuffer->height = MIN2(framebuffer->height, attachment->height);
7199 			framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
7200 		}
7201 	} else {
7202 		for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
7203 			VkImageView _iview = pCreateInfo->pAttachments[i];
7204 			struct radv_image_view *iview = radv_image_view_from_handle(_iview);
7205 			framebuffer->attachments[i] = iview;
7206 			framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
7207 			framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
7208 			framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
7209 		}
7210 	}
7211 
7212 	*pFramebuffer = radv_framebuffer_to_handle(framebuffer);
7213 	return VK_SUCCESS;
7214 }
7215 
radv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)7216 void radv_DestroyFramebuffer(
7217 	VkDevice                                    _device,
7218 	VkFramebuffer                               _fb,
7219 	const VkAllocationCallbacks*                pAllocator)
7220 {
7221 	RADV_FROM_HANDLE(radv_device, device, _device);
7222 	RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
7223 
7224 	if (!fb)
7225 		return;
7226 	vk_object_base_finish(&fb->base);
7227 	vk_free2(&device->vk.alloc, pAllocator, fb);
7228 }
7229 
radv_tex_wrap(VkSamplerAddressMode address_mode)7230 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
7231 {
7232 	switch (address_mode) {
7233 	case VK_SAMPLER_ADDRESS_MODE_REPEAT:
7234 		return V_008F30_SQ_TEX_WRAP;
7235 	case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
7236 		return V_008F30_SQ_TEX_MIRROR;
7237 	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
7238 		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
7239 	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
7240 		return V_008F30_SQ_TEX_CLAMP_BORDER;
7241 	case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
7242 		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
7243 	default:
7244 		unreachable("illegal tex wrap mode");
7245 		break;
7246 	}
7247 }
7248 
7249 static unsigned
radv_tex_compare(VkCompareOp op)7250 radv_tex_compare(VkCompareOp op)
7251 {
7252 	switch (op) {
7253 	case VK_COMPARE_OP_NEVER:
7254 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7255 	case VK_COMPARE_OP_LESS:
7256 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
7257 	case VK_COMPARE_OP_EQUAL:
7258 		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
7259 	case VK_COMPARE_OP_LESS_OR_EQUAL:
7260 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
7261 	case VK_COMPARE_OP_GREATER:
7262 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
7263 	case VK_COMPARE_OP_NOT_EQUAL:
7264 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
7265 	case VK_COMPARE_OP_GREATER_OR_EQUAL:
7266 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
7267 	case VK_COMPARE_OP_ALWAYS:
7268 		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
7269 	default:
7270 		unreachable("illegal compare mode");
7271 		break;
7272 	}
7273 }
7274 
7275 static unsigned
radv_tex_filter(VkFilter filter,unsigned max_ansio)7276 radv_tex_filter(VkFilter filter, unsigned max_ansio)
7277 {
7278 	switch (filter) {
7279 	case VK_FILTER_NEAREST:
7280 		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
7281 			V_008F38_SQ_TEX_XY_FILTER_POINT);
7282 	case VK_FILTER_LINEAR:
7283 		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
7284 			V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
7285 	case VK_FILTER_CUBIC_IMG:
7286 	default:
7287 		fprintf(stderr, "illegal texture filter");
7288 		return 0;
7289 	}
7290 }
7291 
7292 static unsigned
radv_tex_mipfilter(VkSamplerMipmapMode mode)7293 radv_tex_mipfilter(VkSamplerMipmapMode mode)
7294 {
7295 	switch (mode) {
7296 	case VK_SAMPLER_MIPMAP_MODE_NEAREST:
7297 		return V_008F38_SQ_TEX_Z_FILTER_POINT;
7298 	case VK_SAMPLER_MIPMAP_MODE_LINEAR:
7299 		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
7300 	default:
7301 		return V_008F38_SQ_TEX_Z_FILTER_NONE;
7302 	}
7303 }
7304 
7305 static unsigned
radv_tex_bordercolor(VkBorderColor bcolor)7306 radv_tex_bordercolor(VkBorderColor bcolor)
7307 {
7308 	switch (bcolor) {
7309 	case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
7310 	case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
7311 		return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
7312 	case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
7313 	case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
7314 		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
7315 	case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
7316 	case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
7317 		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
7318 	case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
7319 	case VK_BORDER_COLOR_INT_CUSTOM_EXT:
7320 		return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
7321 	default:
7322 		break;
7323 	}
7324 	return 0;
7325 }
7326 
7327 static unsigned
radv_tex_aniso_filter(unsigned filter)7328 radv_tex_aniso_filter(unsigned filter)
7329 {
7330 	if (filter < 2)
7331 		return 0;
7332 	if (filter < 4)
7333 		return 1;
7334 	if (filter < 8)
7335 		return 2;
7336 	if (filter < 16)
7337 		return 3;
7338 	return 4;
7339 }
7340 
7341 static unsigned
radv_tex_filter_mode(VkSamplerReductionMode mode)7342 radv_tex_filter_mode(VkSamplerReductionMode mode)
7343 {
7344 	switch (mode) {
7345 	case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
7346 		return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7347 	case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
7348 		return V_008F30_SQ_IMG_FILTER_MODE_MIN;
7349 	case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
7350 		return V_008F30_SQ_IMG_FILTER_MODE_MAX;
7351 	default:
7352 		break;
7353 	}
7354 	return 0;
7355 }
7356 
7357 static uint32_t
radv_get_max_anisotropy(struct radv_device * device,const VkSamplerCreateInfo * pCreateInfo)7358 radv_get_max_anisotropy(struct radv_device *device,
7359 			const VkSamplerCreateInfo *pCreateInfo)
7360 {
7361 	if (device->force_aniso >= 0)
7362 		return device->force_aniso;
7363 
7364 	if (pCreateInfo->anisotropyEnable &&
7365 	    pCreateInfo->maxAnisotropy > 1.0f)
7366 		return (uint32_t)pCreateInfo->maxAnisotropy;
7367 
7368 	return 0;
7369 }
7370 
S_FIXED(float value,unsigned frac_bits)7371 static inline int S_FIXED(float value, unsigned frac_bits)
7372 {
7373 	return value * (1 << frac_bits);
7374 }
7375 
radv_register_border_color(struct radv_device * device,VkClearColorValue value)7376 static uint32_t radv_register_border_color(struct radv_device *device,
7377 					   VkClearColorValue   value)
7378 {
7379 	uint32_t slot;
7380 
7381 	pthread_mutex_lock(&device->border_color_data.mutex);
7382 
7383 	for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
7384 		if (!device->border_color_data.used[slot]) {
7385 			/* Copy to the GPU wrt endian-ness. */
7386 			util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot],
7387 						&value,
7388 						sizeof(VkClearColorValue));
7389 
7390 			device->border_color_data.used[slot] = true;
7391 			break;
7392 		}
7393 	}
7394 
7395 	pthread_mutex_unlock(&device->border_color_data.mutex);
7396 
7397 	return slot;
7398 }
7399 
radv_unregister_border_color(struct radv_device * device,uint32_t slot)7400 static void radv_unregister_border_color(struct radv_device *device,
7401 					 uint32_t            slot)
7402 {
7403 	pthread_mutex_lock(&device->border_color_data.mutex);
7404 
7405 	device->border_color_data.used[slot] = false;
7406 
7407 	pthread_mutex_unlock(&device->border_color_data.mutex);
7408 }
7409 
7410 static void
radv_init_sampler(struct radv_device * device,struct radv_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)7411 radv_init_sampler(struct radv_device *device,
7412 		  struct radv_sampler *sampler,
7413 		  const VkSamplerCreateInfo *pCreateInfo)
7414 {
7415 	uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
7416 	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
7417 	bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
7418 			   device->physical_device->rad_info.chip_class == GFX9;
7419 	unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7420 	unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7421 	bool trunc_coord = pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
7422 	bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7423 				 pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7424 				 pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
7425 	VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7426 	uint32_t border_color_ptr;
7427 
7428 	const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
7429 		vk_find_struct_const(pCreateInfo->pNext,
7430 				     SAMPLER_REDUCTION_MODE_CREATE_INFO);
7431 	if (sampler_reduction)
7432 		filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
7433 
7434 	if (pCreateInfo->compareEnable)
7435 		depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
7436 
7437 	sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
7438 
7439 	if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
7440 		const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
7441 			vk_find_struct_const(pCreateInfo->pNext,
7442 					     SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
7443 
7444 		assert(custom_border_color);
7445 
7446 		sampler->border_color_slot =
7447 			radv_register_border_color(device, custom_border_color->customBorderColor);
7448 
7449 		/* Did we fail to find a slot? */
7450 		if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
7451 			fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
7452 			border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7453 		}
7454 	}
7455 
7456 	/* If we don't have a custom color, set the ptr to 0 */
7457 	border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT
7458 		? sampler->border_color_slot
7459 		: 0;
7460 
7461 	sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
7462 			     S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
7463 			     S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
7464 			     S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
7465 			     S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
7466 			     S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
7467 			     S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
7468 			     S_008F30_ANISO_BIAS(max_aniso_ratio) |
7469 			     S_008F30_DISABLE_CUBE_WRAP(0) |
7470 			     S_008F30_COMPAT_MODE(compat_mode) |
7471 			     S_008F30_FILTER_MODE(filter_mode) |
7472 			     S_008F30_TRUNC_COORD(trunc_coord));
7473 	sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
7474 			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
7475 			     S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
7476 	sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
7477 			     S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
7478 			     S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
7479 			     S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
7480 			     S_008F38_MIP_POINT_PRECLAMP(0));
7481 	sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
7482 			     S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
7483 
7484 	if (device->physical_device->rad_info.chip_class >= GFX10) {
7485 		sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
7486 	} else {
7487 		sampler->state[2] |=
7488 			S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
7489 			S_008F38_FILTER_PREC_FIX(1) |
7490 			S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
7491 	}
7492 }
7493 
radv_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)7494 VkResult radv_CreateSampler(
7495 	VkDevice                                    _device,
7496 	const VkSamplerCreateInfo*                  pCreateInfo,
7497 	const VkAllocationCallbacks*                pAllocator,
7498 	VkSampler*                                  pSampler)
7499 {
7500 	RADV_FROM_HANDLE(radv_device, device, _device);
7501 	struct radv_sampler *sampler;
7502 
7503 	const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
7504 		vk_find_struct_const(pCreateInfo->pNext,
7505 				     SAMPLER_YCBCR_CONVERSION_INFO);
7506 
7507 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
7508 
7509 	sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
7510 			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7511 	if (!sampler)
7512 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7513 
7514 	vk_object_base_init(&device->vk, &sampler->base,
7515 			    VK_OBJECT_TYPE_SAMPLER);
7516 
7517 	radv_init_sampler(device, sampler, pCreateInfo);
7518 
7519 	sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
7520 	*pSampler = radv_sampler_to_handle(sampler);
7521 
7522 	return VK_SUCCESS;
7523 }
7524 
radv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)7525 void radv_DestroySampler(
7526 	VkDevice                                    _device,
7527 	VkSampler                                   _sampler,
7528 	const VkAllocationCallbacks*                pAllocator)
7529 {
7530 	RADV_FROM_HANDLE(radv_device, device, _device);
7531 	RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
7532 
7533 	if (!sampler)
7534 		return;
7535 
7536 	if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
7537 		radv_unregister_border_color(device, sampler->border_color_slot);
7538 
7539 	vk_object_base_finish(&sampler->base);
7540 	vk_free2(&device->vk.alloc, pAllocator, sampler);
7541 }
7542 
7543 /* vk_icd.h does not declare this function, so we declare it here to
7544  * suppress Wmissing-prototypes.
7545  */
7546 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
7547 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
7548 
7549 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)7550 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
7551 {
7552 	/* For the full details on loader interface versioning, see
7553 	* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
7554 	* What follows is a condensed summary, to help you navigate the large and
7555 	* confusing official doc.
7556 	*
7557 	*   - Loader interface v0 is incompatible with later versions. We don't
7558 	*     support it.
7559 	*
7560 	*   - In loader interface v1:
7561 	*       - The first ICD entrypoint called by the loader is
7562 	*         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
7563 	*         entrypoint.
7564 	*       - The ICD must statically expose no other Vulkan symbol unless it is
7565 	*         linked with -Bsymbolic.
7566 	*       - Each dispatchable Vulkan handle created by the ICD must be
7567 	*         a pointer to a struct whose first member is VK_LOADER_DATA. The
7568 	*         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
7569 	*       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
7570 	*         vkDestroySurfaceKHR(). The ICD must be capable of working with
7571 	*         such loader-managed surfaces.
7572 	*
7573 	*    - Loader interface v2 differs from v1 in:
7574 	*       - The first ICD entrypoint called by the loader is
7575 	*         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
7576 	*         statically expose this entrypoint.
7577 	*
7578 	*    - Loader interface v3 differs from v2 in:
7579 	*        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
7580 	*          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
7581 	*          because the loader no longer does so.
7582 	*/
7583 	*pSupportedVersion = MIN2(*pSupportedVersion, 4u);
7584 	return VK_SUCCESS;
7585 }
7586 
radv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFD)7587 VkResult radv_GetMemoryFdKHR(VkDevice _device,
7588 			     const VkMemoryGetFdInfoKHR *pGetFdInfo,
7589 			     int *pFD)
7590 {
7591 	RADV_FROM_HANDLE(radv_device, device, _device);
7592 	RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
7593 
7594 	assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
7595 
7596 	/* At the moment, we support only the below handle types. */
7597 	assert(pGetFdInfo->handleType ==
7598 	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
7599 	       pGetFdInfo->handleType ==
7600 	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
7601 
7602 	bool ret = radv_get_memory_fd(device, memory, pFD);
7603 	if (ret == false)
7604 		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
7605 	return VK_SUCCESS;
7606 }
7607 
radv_compute_valid_memory_types_attempt(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags,enum radeon_bo_flag ignore_flags)7608 static uint32_t radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
7609                                                         enum radeon_bo_domain domains,
7610                                                         enum radeon_bo_flag flags,
7611                                                         enum radeon_bo_flag ignore_flags)
7612 {
7613 	/* Don't count GTT/CPU as relevant:
7614 	 *
7615 	 * - We're not fully consistent between the two.
7616 	 * - Sometimes VRAM gets VRAM|GTT.
7617 	 */
7618 	const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM |
7619 	                                               RADEON_DOMAIN_GDS |
7620 	                                               RADEON_DOMAIN_OA;
7621 	uint32_t bits = 0;
7622 	for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
7623 		if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
7624 			continue;
7625 
7626 		if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
7627 			continue;
7628 
7629 		bits |= 1u << i;
7630 	}
7631 
7632 	return bits;
7633 }
7634 
radv_compute_valid_memory_types(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags)7635 static uint32_t radv_compute_valid_memory_types(struct radv_physical_device *dev,
7636                                                 enum radeon_bo_domain domains,
7637                                                 enum radeon_bo_flag flags)
7638 {
7639 	enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
7640 	uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7641 
7642 	if (!bits) {
7643 		ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
7644 		bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7645 	}
7646 
7647 	return bits;
7648 }
radv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)7649 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
7650 				       VkExternalMemoryHandleTypeFlagBits handleType,
7651 				       int fd,
7652 				       VkMemoryFdPropertiesKHR *pMemoryFdProperties)
7653 {
7654 	RADV_FROM_HANDLE(radv_device, device, _device);
7655 
7656 	switch (handleType) {
7657 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
7658 		enum radeon_bo_domain domains;
7659 		enum radeon_bo_flag flags;
7660 		if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
7661 			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7662 
7663 		pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
7664 		return VK_SUCCESS;
7665 	}
7666 	default:
7667 		/* The valid usage section for this function says:
7668 		 *
7669 		 *    "handleType must not be one of the handle types defined as
7670 		 *    opaque."
7671 		 *
7672 		 * So opaque handle types fall into the default "unsupported" case.
7673 		 */
7674 		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7675 	}
7676 }
7677 
radv_import_opaque_fd(struct radv_device * device,int fd,uint32_t * syncobj)7678 static VkResult radv_import_opaque_fd(struct radv_device *device,
7679                                       int fd,
7680                                       uint32_t *syncobj)
7681 {
7682 	uint32_t syncobj_handle = 0;
7683 	int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
7684 	if (ret != 0)
7685 		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7686 
7687 	if (*syncobj)
7688 		device->ws->destroy_syncobj(device->ws, *syncobj);
7689 
7690 	*syncobj = syncobj_handle;
7691 	close(fd);
7692 
7693 	return VK_SUCCESS;
7694 }
7695 
radv_import_sync_fd(struct radv_device * device,int fd,uint32_t * syncobj)7696 static VkResult radv_import_sync_fd(struct radv_device *device,
7697                                     int fd,
7698                                     uint32_t *syncobj)
7699 {
7700 	/* If we create a syncobj we do it locally so that if we have an error, we don't
7701 	 * leave a syncobj in an undetermined state in the fence. */
7702 	uint32_t syncobj_handle =  *syncobj;
7703 	if (!syncobj_handle) {
7704 		bool create_signaled = fd == -1 ? true : false;
7705 
7706 		int ret = device->ws->create_syncobj(device->ws, create_signaled,
7707 						     &syncobj_handle);
7708 		if (ret) {
7709 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7710 		}
7711 	} else {
7712 		if (fd == -1)
7713 			device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
7714 	}
7715 
7716 	if (fd != -1) {
7717 		int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
7718 		if (ret)
7719 			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7720 		close(fd);
7721 	}
7722 
7723 	*syncobj = syncobj_handle;
7724 
7725 	return VK_SUCCESS;
7726 }
7727 
radv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)7728 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
7729 				   const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
7730 {
7731 	RADV_FROM_HANDLE(radv_device, device, _device);
7732 	RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
7733 	VkResult result;
7734 	struct radv_semaphore_part *dst = NULL;
7735 	bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7736 
7737 	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
7738 		assert(!timeline);
7739 		dst = &sem->temporary;
7740 	} else {
7741 		dst = &sem->permanent;
7742 	}
7743 
7744 	uint32_t syncobj = (dst->kind == RADV_SEMAPHORE_SYNCOBJ ||
7745 	                    dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) ? dst->syncobj : 0;
7746 
7747 	switch(pImportSemaphoreFdInfo->handleType) {
7748 		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7749 			result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7750 			break;
7751 		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7752 			assert(!timeline);
7753 			result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7754 			break;
7755 		default:
7756 			unreachable("Unhandled semaphore handle type");
7757 	}
7758 
7759 	if (result == VK_SUCCESS) {
7760 		dst->syncobj = syncobj;
7761 		dst->kind = RADV_SEMAPHORE_SYNCOBJ;
7762 		if (timeline) {
7763 			dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7764 			dst->timeline_syncobj.max_point = 0;
7765 		}
7766 	}
7767 
7768 	return result;
7769 }
7770 
radv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)7771 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
7772 				const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
7773 				int *pFd)
7774 {
7775 	RADV_FROM_HANDLE(radv_device, device, _device);
7776 	RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
7777 	int ret;
7778 	uint32_t syncobj_handle;
7779 
7780 	if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7781 		assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
7782 		       sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7783 		syncobj_handle = sem->temporary.syncobj;
7784 	} else {
7785 		assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
7786 		       sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7787 		syncobj_handle = sem->permanent.syncobj;
7788 	}
7789 
7790 	switch(pGetFdInfo->handleType) {
7791 	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7792 		ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7793 		if (ret)
7794 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7795 		break;
7796 	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7797 		ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7798 		if (ret)
7799 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7800 
7801 		if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7802 			radv_destroy_semaphore_part(device, &sem->temporary);
7803 		} else {
7804 			device->ws->reset_syncobj(device->ws, syncobj_handle);
7805 		}
7806 		break;
7807 	default:
7808 		unreachable("Unhandled semaphore handle type");
7809 	}
7810 
7811 	return VK_SUCCESS;
7812 }
7813 
radv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)7814 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
7815 	VkPhysicalDevice                            physicalDevice,
7816 	const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
7817 	VkExternalSemaphoreProperties               *pExternalSemaphoreProperties)
7818 {
7819 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7820 	VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
7821 
7822 	if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
7823 	    pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7824 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7825 		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7826 		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7827 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7828 	} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
7829 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7830 		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7831 		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7832 
7833 	/* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
7834 	} else if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7835 	           (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7836 	            pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
7837 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7838 		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7839 		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7840 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7841 	} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7842 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7843 		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7844 		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7845 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7846 	} else {
7847 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7848 		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7849 		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7850 	}
7851 }
7852 
radv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)7853 VkResult radv_ImportFenceFdKHR(VkDevice _device,
7854 				   const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
7855 {
7856 	RADV_FROM_HANDLE(radv_device, device, _device);
7857 	RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
7858 	struct radv_fence_part *dst = NULL;
7859 	VkResult result;
7860 
7861 	if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
7862 		dst = &fence->temporary;
7863 	} else {
7864 		dst = &fence->permanent;
7865 	}
7866 
7867 	uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
7868 
7869 	switch(pImportFenceFdInfo->handleType) {
7870 		case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7871 			result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
7872 			break;
7873 		case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7874 			result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
7875 			break;
7876 		default:
7877 			unreachable("Unhandled fence handle type");
7878 	}
7879 
7880 	if (result == VK_SUCCESS) {
7881 		dst->syncobj = syncobj;
7882 		dst->kind = RADV_FENCE_SYNCOBJ;
7883 	}
7884 
7885 	return result;
7886 }
7887 
radv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)7888 VkResult radv_GetFenceFdKHR(VkDevice _device,
7889 				const VkFenceGetFdInfoKHR *pGetFdInfo,
7890 				int *pFd)
7891 {
7892 	RADV_FROM_HANDLE(radv_device, device, _device);
7893 	RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
7894 	int ret;
7895 
7896 	struct radv_fence_part *part =
7897 		fence->temporary.kind != RADV_FENCE_NONE ?
7898 		&fence->temporary : &fence->permanent;
7899 
7900 	switch(pGetFdInfo->handleType) {
7901 	case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7902 		ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
7903 		if (ret)
7904 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7905 		break;
7906 	case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7907 		ret = device->ws->export_syncobj_to_sync_file(device->ws,
7908 							      part->syncobj, pFd);
7909 		if (ret)
7910 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7911 
7912 		if (part == &fence->temporary) {
7913 			radv_destroy_fence_part(device, part);
7914 		} else {
7915 			device->ws->reset_syncobj(device->ws, part->syncobj);
7916 		}
7917 		break;
7918 	default:
7919 		unreachable("Unhandled fence handle type");
7920 	}
7921 
7922 	return VK_SUCCESS;
7923 }
7924 
radv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)7925 void radv_GetPhysicalDeviceExternalFenceProperties(
7926 	VkPhysicalDevice                            physicalDevice,
7927 	const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
7928 	VkExternalFenceProperties               *pExternalFenceProperties)
7929 {
7930 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7931 
7932 	if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7933 	    (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7934 	     pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
7935 		pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7936 		pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7937 		pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
7938 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7939 	} else {
7940 		pExternalFenceProperties->exportFromImportedHandleTypes = 0;
7941 		pExternalFenceProperties->compatibleHandleTypes = 0;
7942 		pExternalFenceProperties->externalFenceFeatures = 0;
7943 	}
7944 }
7945 
7946 VkResult
radv_CreateDebugReportCallbackEXT(VkInstance _instance,const VkDebugReportCallbackCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDebugReportCallbackEXT * pCallback)7947 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
7948                                  const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
7949                                  const VkAllocationCallbacks* pAllocator,
7950                                  VkDebugReportCallbackEXT* pCallback)
7951 {
7952 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
7953 	return vk_create_debug_report_callback(&instance->debug_report_callbacks,
7954 	                                       pCreateInfo, pAllocator, &instance->alloc,
7955 	                                       pCallback);
7956 }
7957 
7958 void
radv_DestroyDebugReportCallbackEXT(VkInstance _instance,VkDebugReportCallbackEXT _callback,const VkAllocationCallbacks * pAllocator)7959 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
7960                                   VkDebugReportCallbackEXT _callback,
7961                                   const VkAllocationCallbacks* pAllocator)
7962 {
7963 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
7964 	vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
7965 	                                 _callback, pAllocator, &instance->alloc);
7966 }
7967 
7968 void
radv_DebugReportMessageEXT(VkInstance _instance,VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT objectType,uint64_t object,size_t location,int32_t messageCode,const char * pLayerPrefix,const char * pMessage)7969 radv_DebugReportMessageEXT(VkInstance _instance,
7970                           VkDebugReportFlagsEXT flags,
7971                           VkDebugReportObjectTypeEXT objectType,
7972                           uint64_t object,
7973                           size_t location,
7974                           int32_t messageCode,
7975                           const char* pLayerPrefix,
7976                           const char* pMessage)
7977 {
7978 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
7979 	vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
7980 	                object, location, messageCode, pLayerPrefix, pMessage);
7981 }
7982 
7983 void
radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)7984 radv_GetDeviceGroupPeerMemoryFeatures(
7985     VkDevice                                    device,
7986     uint32_t                                    heapIndex,
7987     uint32_t                                    localDeviceIndex,
7988     uint32_t                                    remoteDeviceIndex,
7989     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
7990 {
7991 	assert(localDeviceIndex == remoteDeviceIndex);
7992 
7993 	*pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
7994 	                       VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
7995 	                       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
7996 	                       VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
7997 }
7998 
7999 static const VkTimeDomainEXT radv_time_domains[] = {
8000 	VK_TIME_DOMAIN_DEVICE_EXT,
8001 	VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
8002 #ifdef CLOCK_MONOTONIC_RAW
8003 	VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
8004 #endif
8005 };
8006 
radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)8007 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
8008 	VkPhysicalDevice                             physicalDevice,
8009 	uint32_t                                     *pTimeDomainCount,
8010 	VkTimeDomainEXT                              *pTimeDomains)
8011 {
8012 	int d;
8013 	VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains,
8014 			       pTimeDomainCount);
8015 
8016 	for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
8017 		vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
8018 			*i = radv_time_domains[d];
8019 		}
8020 	}
8021 
8022 	return vk_outarray_status(&out);
8023 }
8024 
8025 static uint64_t
radv_clock_gettime(clockid_t clock_id)8026 radv_clock_gettime(clockid_t clock_id)
8027 {
8028 	struct timespec current;
8029 	int ret;
8030 
8031 	ret = clock_gettime(clock_id, &current);
8032 #ifdef CLOCK_MONOTONIC_RAW
8033 	if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
8034 		ret = clock_gettime(CLOCK_MONOTONIC, &current);
8035 #endif
8036 	if (ret < 0)
8037 		return 0;
8038 
8039 	return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
8040 }
8041 
radv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)8042 VkResult radv_GetCalibratedTimestampsEXT(
8043 	VkDevice                                     _device,
8044 	uint32_t                                     timestampCount,
8045 	const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
8046 	uint64_t                                     *pTimestamps,
8047 	uint64_t                                     *pMaxDeviation)
8048 {
8049 	RADV_FROM_HANDLE(radv_device, device, _device);
8050 	uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
8051 	int d;
8052 	uint64_t begin, end;
8053         uint64_t max_clock_period = 0;
8054 
8055 #ifdef CLOCK_MONOTONIC_RAW
8056 	begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
8057 #else
8058 	begin = radv_clock_gettime(CLOCK_MONOTONIC);
8059 #endif
8060 
8061 	for (d = 0; d < timestampCount; d++) {
8062 		switch (pTimestampInfos[d].timeDomain) {
8063 		case VK_TIME_DOMAIN_DEVICE_EXT:
8064 			pTimestamps[d] = device->ws->query_value(device->ws,
8065 								 RADEON_TIMESTAMP);
8066                         uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
8067                         max_clock_period = MAX2(max_clock_period, device_period);
8068 			break;
8069 		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
8070 			pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
8071                         max_clock_period = MAX2(max_clock_period, 1);
8072 			break;
8073 
8074 #ifdef CLOCK_MONOTONIC_RAW
8075 		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
8076 			pTimestamps[d] = begin;
8077 			break;
8078 #endif
8079 		default:
8080 			pTimestamps[d] = 0;
8081 			break;
8082 		}
8083 	}
8084 
8085 #ifdef CLOCK_MONOTONIC_RAW
8086 	end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
8087 #else
8088 	end = radv_clock_gettime(CLOCK_MONOTONIC);
8089 #endif
8090 
8091         /*
8092          * The maximum deviation is the sum of the interval over which we
8093          * perform the sampling and the maximum period of any sampled
8094          * clock. That's because the maximum skew between any two sampled
8095          * clock edges is when the sampled clock with the largest period is
8096          * sampled at the end of that period but right at the beginning of the
8097          * sampling interval and some other clock is sampled right at the
8098          * begining of its sampling period and right at the end of the
8099          * sampling interval. Let's assume the GPU has the longest clock
8100          * period and that the application is sampling GPU and monotonic:
8101          *
8102          *                               s                 e
8103          *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
8104          *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
8105          *
8106          *                               g
8107          *		  0         1         2         3
8108          *	GPU       -----_____-----_____-----_____-----_____
8109          *
8110          *                                                m
8111          *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
8112          *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
8113          *
8114          *	Interval                     <----------------->
8115          *	Deviation           <-------------------------->
8116          *
8117          *		s  = read(raw)       2
8118          *		g  = read(GPU)       1
8119          *		m  = read(monotonic) 2
8120          *		e  = read(raw)       b
8121          *
8122          * We round the sample interval up by one tick to cover sampling error
8123          * in the interval clock
8124          */
8125 
8126         uint64_t sample_interval = end - begin + 1;
8127 
8128         *pMaxDeviation = sample_interval + max_clock_period;
8129 
8130 	return VK_SUCCESS;
8131 }
8132 
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)8133 void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
8134     VkPhysicalDevice                            physicalDevice,
8135     VkSampleCountFlagBits                       samples,
8136     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
8137 {
8138 	if (samples & (VK_SAMPLE_COUNT_2_BIT |
8139 		       VK_SAMPLE_COUNT_4_BIT |
8140 		       VK_SAMPLE_COUNT_8_BIT)) {
8141 		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
8142 	} else {
8143 		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
8144 	}
8145 }
8146 
radv_CreatePrivateDataSlotEXT(VkDevice _device,const VkPrivateDataSlotCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPrivateDataSlotEXT * pPrivateDataSlot)8147 VkResult radv_CreatePrivateDataSlotEXT(
8148     VkDevice                                    _device,
8149     const VkPrivateDataSlotCreateInfoEXT*       pCreateInfo,
8150     const VkAllocationCallbacks*                pAllocator,
8151     VkPrivateDataSlotEXT*                       pPrivateDataSlot)
8152 {
8153 	RADV_FROM_HANDLE(radv_device, device, _device);
8154 	return vk_private_data_slot_create(&device->vk, pCreateInfo, pAllocator,
8155 					   pPrivateDataSlot);
8156 }
8157 
radv_DestroyPrivateDataSlotEXT(VkDevice _device,VkPrivateDataSlotEXT privateDataSlot,const VkAllocationCallbacks * pAllocator)8158 void radv_DestroyPrivateDataSlotEXT(
8159     VkDevice                                    _device,
8160     VkPrivateDataSlotEXT                        privateDataSlot,
8161     const VkAllocationCallbacks*                pAllocator)
8162 {
8163 	RADV_FROM_HANDLE(radv_device, device, _device);
8164 	vk_private_data_slot_destroy(&device->vk, privateDataSlot, pAllocator);
8165 }
8166 
radv_SetPrivateDataEXT(VkDevice _device,VkObjectType objectType,uint64_t objectHandle,VkPrivateDataSlotEXT privateDataSlot,uint64_t data)8167 VkResult radv_SetPrivateDataEXT(
8168     VkDevice                                    _device,
8169     VkObjectType                                objectType,
8170     uint64_t                                    objectHandle,
8171     VkPrivateDataSlotEXT                        privateDataSlot,
8172     uint64_t                                    data)
8173 {
8174 	RADV_FROM_HANDLE(radv_device, device, _device);
8175 	return vk_object_base_set_private_data(&device->vk, objectType,
8176 					       objectHandle, privateDataSlot,
8177 					       data);
8178 }
8179 
radv_GetPrivateDataEXT(VkDevice _device,VkObjectType objectType,uint64_t objectHandle,VkPrivateDataSlotEXT privateDataSlot,uint64_t * pData)8180 void radv_GetPrivateDataEXT(
8181     VkDevice                                    _device,
8182     VkObjectType                                objectType,
8183     uint64_t                                    objectHandle,
8184     VkPrivateDataSlotEXT                        privateDataSlot,
8185     uint64_t*                                   pData)
8186 {
8187 	RADV_FROM_HANDLE(radv_device, device, _device);
8188 	vk_object_base_get_private_data(&device->vk, objectType, objectHandle,
8189 					privateDataSlot, pData);
8190 }
8191