1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include <stdlib.h>
26 #include <amdgpu.h>
27 #include <amdgpu_drm.h>
28 #include <assert.h>
29 
30 #include "ac_debug.h"
31 #include "radv_radeon_winsys.h"
32 #include "radv_amdgpu_cs.h"
33 #include "radv_amdgpu_bo.h"
34 #include "sid.h"
35 
36 
37 enum {
38 	VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
39 };
40 
41 struct radv_amdgpu_cs {
42 	struct radeon_winsys_cs base;
43 	struct radv_amdgpu_winsys *ws;
44 
45 	struct amdgpu_cs_ib_info    ib;
46 
47 	struct radeon_winsys_bo     *ib_buffer;
48 	uint8_t                 *ib_mapped;
49 	unsigned                    max_num_buffers;
50 	unsigned                    num_buffers;
51 	amdgpu_bo_handle            *handles;
52 	uint8_t                     *priorities;
53 
54 	struct radeon_winsys_bo     **old_ib_buffers;
55 	unsigned                    num_old_ib_buffers;
56 	unsigned                    max_num_old_ib_buffers;
57 	unsigned                    *ib_size_ptr;
58 	bool                        failed;
59 	bool                        is_chained;
60 
61 	int                         buffer_hash_table[1024];
62 	unsigned                    hw_ip;
63 
64 	unsigned                    num_virtual_buffers;
65 	unsigned                    max_num_virtual_buffers;
66 	struct radeon_winsys_bo     **virtual_buffers;
67 	uint8_t                     *virtual_buffer_priorities;
68 	int                         *virtual_buffer_hash_table;
69 
70 	/* For chips that don't support chaining. */
71 	struct radeon_winsys_cs     *old_cs_buffers;
72 	unsigned                    num_old_cs_buffers;
73 };
74 
75 static inline struct radv_amdgpu_cs *
radv_amdgpu_cs(struct radeon_winsys_cs * base)76 radv_amdgpu_cs(struct radeon_winsys_cs *base)
77 {
78 	return (struct radv_amdgpu_cs*)base;
79 }
80 
ring_to_hw_ip(enum ring_type ring)81 static int ring_to_hw_ip(enum ring_type ring)
82 {
83 	switch (ring) {
84 	case RING_GFX:
85 		return AMDGPU_HW_IP_GFX;
86 	case RING_DMA:
87 		return AMDGPU_HW_IP_DMA;
88 	case RING_COMPUTE:
89 		return AMDGPU_HW_IP_COMPUTE;
90 	default:
91 		unreachable("unsupported ring");
92 	}
93 }
94 
95 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx,
96 				   uint32_t ip_type,
97 				   uint32_t ring,
98 				   struct radv_winsys_sem_info *sem_info);
99 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
100 				 struct amdgpu_cs_request *request,
101 				 struct radv_winsys_sem_info *sem_info);
102 
radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx * ctx,struct radv_amdgpu_fence * fence,struct amdgpu_cs_request * req)103 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
104 					 struct radv_amdgpu_fence *fence,
105 					 struct amdgpu_cs_request *req)
106 {
107 	fence->fence.context = ctx->ctx;
108 	fence->fence.ip_type = req->ip_type;
109 	fence->fence.ip_instance = req->ip_instance;
110 	fence->fence.ring = req->ring;
111 	fence->fence.fence = req->seq_no;
112 	fence->user_ptr = (volatile uint64_t*)(ctx->fence_map + (req->ip_type * MAX_RINGS_PER_TYPE + req->ring) * sizeof(uint64_t));
113 }
114 
radv_amdgpu_create_fence()115 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
116 {
117 	struct radv_amdgpu_fence *fence = calloc(1, sizeof(struct radv_amdgpu_fence));
118 	return (struct radeon_winsys_fence*)fence;
119 }
120 
radv_amdgpu_destroy_fence(struct radeon_winsys_fence * _fence)121 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
122 {
123 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
124 	free(fence);
125 }
126 
radv_amdgpu_fence_wait(struct radeon_winsys * _ws,struct radeon_winsys_fence * _fence,bool absolute,uint64_t timeout)127 static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
128 			      struct radeon_winsys_fence *_fence,
129 			      bool absolute,
130 			      uint64_t timeout)
131 {
132 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
133 	unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
134 	int r;
135 	uint32_t expired = 0;
136 
137 	if (fence->user_ptr) {
138 		if (*fence->user_ptr >= fence->fence.fence)
139 			return true;
140 		if (!absolute && !timeout)
141 			return false;
142 	}
143 
144 	/* Now use the libdrm query. */
145 	r = amdgpu_cs_query_fence_status(&fence->fence,
146 	                                 timeout,
147 	                                 flags,
148 	                                 &expired);
149 
150 	if (r) {
151 		fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
152 		return false;
153 	}
154 
155 	if (expired)
156 		return true;
157 
158 	return false;
159 }
160 
radv_amdgpu_cs_destroy(struct radeon_winsys_cs * rcs)161 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
162 {
163 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
164 
165 	if (cs->ib_buffer)
166 		cs->ws->base.buffer_destroy(cs->ib_buffer);
167 	else
168 		free(cs->base.buf);
169 
170 	for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
171 		cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
172 
173 	for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
174 		struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
175 		free(rcs->buf);
176 	}
177 
178 	free(cs->old_cs_buffers);
179 	free(cs->old_ib_buffers);
180 	free(cs->virtual_buffers);
181 	free(cs->virtual_buffer_priorities);
182 	free(cs->virtual_buffer_hash_table);
183 	free(cs->handles);
184 	free(cs->priorities);
185 	free(cs);
186 }
187 
radv_amdgpu_init_cs(struct radv_amdgpu_cs * cs,enum ring_type ring_type)188 static void radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
189 				enum ring_type ring_type)
190 {
191 	for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
192 		cs->buffer_hash_table[i] = -1;
193 
194 	cs->hw_ip = ring_to_hw_ip(ring_type);
195 }
196 
197 static struct radeon_winsys_cs *
radv_amdgpu_cs_create(struct radeon_winsys * ws,enum ring_type ring_type)198 radv_amdgpu_cs_create(struct radeon_winsys *ws,
199 		      enum ring_type ring_type)
200 {
201 	struct radv_amdgpu_cs *cs;
202 	uint32_t ib_size = 20 * 1024 * 4;
203 	cs = calloc(1, sizeof(struct radv_amdgpu_cs));
204 	if (!cs)
205 		return NULL;
206 
207 	cs->ws = radv_amdgpu_winsys(ws);
208 	radv_amdgpu_init_cs(cs, ring_type);
209 
210 	if (cs->ws->use_ib_bos) {
211 		cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
212 						  RADEON_DOMAIN_GTT,
213 						  RADEON_FLAG_CPU_ACCESS |
214 						  RADEON_FLAG_NO_INTERPROCESS_SHARING |
215 						  RADEON_FLAG_READ_ONLY);
216 		if (!cs->ib_buffer) {
217 			free(cs);
218 			return NULL;
219 		}
220 
221 		cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
222 		if (!cs->ib_mapped) {
223 			ws->buffer_destroy(cs->ib_buffer);
224 			free(cs);
225 			return NULL;
226 		}
227 
228 		cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
229 		cs->base.buf = (uint32_t *)cs->ib_mapped;
230 		cs->base.max_dw = ib_size / 4 - 4;
231 		cs->ib_size_ptr = &cs->ib.size;
232 		cs->ib.size = 0;
233 
234 		ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8);
235 	} else {
236 		cs->base.buf = malloc(16384);
237 		cs->base.max_dw = 4096;
238 		if (!cs->base.buf) {
239 			free(cs);
240 			return NULL;
241 		}
242 	}
243 
244 	return &cs->base;
245 }
246 
radv_amdgpu_cs_grow(struct radeon_winsys_cs * _cs,size_t min_size)247 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
248 {
249 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
250 
251 	if (cs->failed) {
252 		cs->base.cdw = 0;
253 		return;
254 	}
255 
256 	if (!cs->ws->use_ib_bos) {
257 		const uint64_t limit_dws = 0xffff8;
258 		uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
259 				       MIN2(cs->base.max_dw * 2, limit_dws));
260 
261 		/* The total ib size cannot exceed limit_dws dwords. */
262 		if (ib_dws > limit_dws)
263 		{
264 			/* The maximum size in dwords has been reached,
265 			 * try to allocate a new one.
266 			 */
267 			if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
268 				/* TODO: Allow to submit more than 4 IBs. */
269 				fprintf(stderr, "amdgpu: Maximum number of IBs "
270 						"per submit reached.\n");
271 				cs->failed = true;
272 				cs->base.cdw = 0;
273 				return;
274 			}
275 
276 			cs->old_cs_buffers =
277 				realloc(cs->old_cs_buffers,
278 				        (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
279 			if (!cs->old_cs_buffers) {
280 				cs->failed = true;
281 				cs->base.cdw = 0;
282 				return;
283 			}
284 
285 			/* Store the current one for submitting it later. */
286 			cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
287 			cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
288 			cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
289 			cs->num_old_cs_buffers++;
290 
291 			/* Reset the cs, it will be re-allocated below. */
292 			cs->base.cdw = 0;
293 			cs->base.buf = NULL;
294 
295 			/* Re-compute the number of dwords to allocate. */
296 			ib_dws = MAX2(cs->base.cdw + min_size,
297 				      MIN2(cs->base.max_dw * 2, limit_dws));
298 			if (ib_dws > limit_dws) {
299 				fprintf(stderr, "amdgpu: Too high number of "
300 						"dwords to allocate\n");
301 				cs->failed = true;
302 				return;
303 			}
304 		}
305 
306 		uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
307 		if (new_buf) {
308 			cs->base.buf = new_buf;
309 			cs->base.max_dw = ib_dws;
310 		} else {
311 			cs->failed = true;
312 			cs->base.cdw = 0;
313 		}
314 		return;
315 	}
316 
317 	uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
318 
319 	/* max that fits in the chain size field. */
320 	ib_size = MIN2(ib_size, 0xfffff);
321 
322 	while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
323 		cs->base.buf[cs->base.cdw++] = 0xffff1000;
324 
325 	*cs->ib_size_ptr |= cs->base.cdw + 4;
326 
327 	if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
328 		cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
329 		cs->old_ib_buffers = realloc(cs->old_ib_buffers,
330 					     cs->max_num_old_ib_buffers * sizeof(void*));
331 	}
332 
333 	cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
334 
335 	cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
336 						   RADEON_DOMAIN_GTT,
337 						   RADEON_FLAG_CPU_ACCESS |
338 						   RADEON_FLAG_NO_INTERPROCESS_SHARING |
339 						   RADEON_FLAG_READ_ONLY);
340 
341 	if (!cs->ib_buffer) {
342 		cs->base.cdw = 0;
343 		cs->failed = true;
344 		cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
345 	}
346 
347 	cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
348 	if (!cs->ib_mapped) {
349 		cs->ws->base.buffer_destroy(cs->ib_buffer);
350 		cs->base.cdw = 0;
351 		cs->failed = true;
352 		cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
353 	}
354 
355 	cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
356 
357 	cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
358 	cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
359 	cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32;
360 	cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
361 	cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
362 
363 	cs->base.buf = (uint32_t *)cs->ib_mapped;
364 	cs->base.cdw = 0;
365 	cs->base.max_dw = ib_size / 4 - 4;
366 
367 }
368 
radv_amdgpu_cs_finalize(struct radeon_winsys_cs * _cs)369 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs)
370 {
371 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
372 
373 	if (cs->ws->use_ib_bos) {
374 		while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
375 			cs->base.buf[cs->base.cdw++] = 0xffff1000;
376 
377 		*cs->ib_size_ptr |= cs->base.cdw;
378 
379 		cs->is_chained = false;
380 	}
381 
382 	return !cs->failed;
383 }
384 
radv_amdgpu_cs_reset(struct radeon_winsys_cs * _cs)385 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
386 {
387 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
388 	cs->base.cdw = 0;
389 	cs->failed = false;
390 
391 	for (unsigned i = 0; i < cs->num_buffers; ++i) {
392 		unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
393 		                 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
394 		cs->buffer_hash_table[hash] = -1;
395 	}
396 
397 	for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
398 		unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
399 		cs->virtual_buffer_hash_table[hash] = -1;
400 	}
401 
402 	cs->num_buffers = 0;
403 	cs->num_virtual_buffers = 0;
404 
405 	if (cs->ws->use_ib_bos) {
406 		cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
407 
408 		for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
409 			cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
410 
411 		cs->num_old_ib_buffers = 0;
412 		cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
413 		cs->ib_size_ptr = &cs->ib.size;
414 		cs->ib.size = 0;
415 	} else {
416 		for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
417 			struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
418 			free(rcs->buf);
419 		}
420 
421 		free(cs->old_cs_buffers);
422 		cs->old_cs_buffers = NULL;
423 		cs->num_old_cs_buffers = 0;
424 	}
425 }
426 
radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs * cs,amdgpu_bo_handle bo)427 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
428 				      amdgpu_bo_handle bo)
429 {
430 	unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
431 	int index = cs->buffer_hash_table[hash];
432 
433 	if (index == -1)
434 		return -1;
435 
436 	if (cs->handles[index] == bo)
437 		return index;
438 
439 	for (unsigned i = 0; i < cs->num_buffers; ++i) {
440 		if (cs->handles[i] == bo) {
441 			cs->buffer_hash_table[hash] = i;
442 			return i;
443 		}
444 	}
445 
446 	return -1;
447 }
448 
radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs * cs,amdgpu_bo_handle bo,uint8_t priority)449 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
450 					       amdgpu_bo_handle bo,
451 					       uint8_t priority)
452 {
453 	unsigned hash;
454 	int index = radv_amdgpu_cs_find_buffer(cs, bo);
455 
456 	if (index != -1) {
457 		cs->priorities[index] = MAX2(cs->priorities[index], priority);
458 		return;
459 	}
460 
461 	if (cs->num_buffers == cs->max_num_buffers) {
462 		unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
463 		cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
464 		cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t));
465 		cs->max_num_buffers = new_count;
466 	}
467 
468 	cs->handles[cs->num_buffers] = bo;
469 	cs->priorities[cs->num_buffers] = priority;
470 
471 	hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
472 	cs->buffer_hash_table[hash] = cs->num_buffers;
473 
474 	++cs->num_buffers;
475 }
476 
radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs * _cs,struct radeon_winsys_bo * bo,uint8_t priority)477 static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs,
478                                               struct radeon_winsys_bo *bo,
479                                               uint8_t priority)
480 {
481 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
482 	unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
483 
484 
485 	if (!cs->virtual_buffer_hash_table) {
486 		cs->virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
487 		for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
488 			cs->virtual_buffer_hash_table[i] = -1;
489 	}
490 
491 	if (cs->virtual_buffer_hash_table[hash] >= 0) {
492 		int idx = cs->virtual_buffer_hash_table[hash];
493 		if (cs->virtual_buffers[idx] == bo) {
494 			cs->virtual_buffer_priorities[idx] = MAX2(cs->virtual_buffer_priorities[idx], priority);
495 			return;
496 		}
497 		for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
498 			if (cs->virtual_buffers[i] == bo) {
499 				cs->virtual_buffer_priorities[i] = MAX2(cs->virtual_buffer_priorities[i], priority);
500 				cs->virtual_buffer_hash_table[hash] = i;
501 				return;
502 			}
503 		}
504 	}
505 
506 	if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
507 		cs->max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
508 		cs->virtual_buffers = realloc(cs->virtual_buffers, sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * cs->max_num_virtual_buffers);
509 		cs->virtual_buffer_priorities = realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * cs->max_num_virtual_buffers);
510 	}
511 
512 	cs->virtual_buffers[cs->num_virtual_buffers] = bo;
513 	cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority;
514 
515 	cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
516 	++cs->num_virtual_buffers;
517 
518 }
519 
radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs * _cs,struct radeon_winsys_bo * _bo,uint8_t priority)520 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
521 				 struct radeon_winsys_bo *_bo,
522 				 uint8_t priority)
523 {
524 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
525 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
526 
527 	if (bo->is_virtual)  {
528 		radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority);
529 		return;
530 	}
531 
532 	if (bo->base.is_local)
533 		return;
534 
535 	radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
536 }
537 
radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs * _parent,struct radeon_winsys_cs * _child)538 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
539 					     struct radeon_winsys_cs *_child)
540 {
541 	struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
542 	struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
543 
544 	for (unsigned i = 0; i < child->num_buffers; ++i) {
545 		radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i],
546 						   child->priorities[i]);
547 	}
548 
549 	for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
550 		radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i],
551 		                          child->virtual_buffer_priorities[i]);
552 	}
553 
554 	if (parent->ws->use_ib_bos) {
555 		if (parent->base.cdw + 4 > parent->base.max_dw)
556 			radv_amdgpu_cs_grow(&parent->base, 4);
557 
558 		parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
559 		parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address;
560 		parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32;
561 		parent->base.buf[parent->base.cdw++] = child->ib.size;
562 	} else {
563 		if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
564 			radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
565 
566 		memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
567 		parent->base.cdw += child->base.cdw;
568 	}
569 }
570 
radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys * ws,struct radeon_winsys_cs ** cs_array,unsigned count,struct radv_amdgpu_winsys_bo ** extra_bo_array,unsigned num_extra_bo,struct radeon_winsys_cs * extra_cs,amdgpu_bo_list_handle * bo_list)571 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
572 				      struct radeon_winsys_cs **cs_array,
573 				      unsigned count,
574 				      struct radv_amdgpu_winsys_bo **extra_bo_array,
575 				      unsigned num_extra_bo,
576 				      struct radeon_winsys_cs *extra_cs,
577 				      amdgpu_bo_list_handle *bo_list)
578 {
579 	int r = 0;
580 
581 	if (ws->debug_all_bos) {
582 		struct radv_amdgpu_winsys_bo *bo;
583 		amdgpu_bo_handle *handles;
584 		unsigned num = 0;
585 
586 		pthread_mutex_lock(&ws->global_bo_list_lock);
587 
588 		handles = malloc(sizeof(handles[0]) * ws->num_buffers);
589 		if (!handles) {
590 			pthread_mutex_unlock(&ws->global_bo_list_lock);
591 			return -ENOMEM;
592 		}
593 
594 		LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
595 			assert(num < ws->num_buffers);
596 			handles[num++] = bo->bo;
597 		}
598 
599 		r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
600 					  handles, NULL,
601 					  bo_list);
602 		free(handles);
603 		pthread_mutex_unlock(&ws->global_bo_list_lock);
604 	} else if (count == 1 && !num_extra_bo && !extra_cs &&
605 	           !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
606 		struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
607 		if (cs->num_buffers == 0) {
608 			*bo_list = 0;
609 			return 0;
610 		}
611 		r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
612 					  cs->priorities, bo_list);
613 	} else {
614 		unsigned total_buffer_count = num_extra_bo;
615 		unsigned unique_bo_count = num_extra_bo;
616 		for (unsigned i = 0; i < count; ++i) {
617 			struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
618 			total_buffer_count += cs->num_buffers;
619 			for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
620 				total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
621 		}
622 
623 		if (extra_cs) {
624 			total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
625 		}
626 		if (total_buffer_count == 0) {
627 			*bo_list = 0;
628 			return 0;
629 		}
630 		amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
631 		uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
632 		if (!handles || !priorities) {
633 			free(handles);
634 			free(priorities);
635 			return -ENOMEM;
636 		}
637 
638 		for (unsigned i = 0; i < num_extra_bo; i++) {
639 			handles[i] = extra_bo_array[i]->bo;
640 			priorities[i] = 8;
641 		}
642 
643 		for (unsigned i = 0; i < count + !!extra_cs; ++i) {
644 			struct radv_amdgpu_cs *cs;
645 
646 			if (i == count)
647 				cs = (struct radv_amdgpu_cs*)extra_cs;
648 			else
649 				cs = (struct radv_amdgpu_cs*)cs_array[i];
650 
651 			if (!cs->num_buffers)
652 				continue;
653 
654 			if (unique_bo_count == 0) {
655 				memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle));
656 				memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t));
657 				unique_bo_count = cs->num_buffers;
658 				continue;
659 			}
660 			int unique_bo_so_far = unique_bo_count;
661 			for (unsigned j = 0; j < cs->num_buffers; ++j) {
662 				bool found = false;
663 				for (unsigned k = 0; k < unique_bo_so_far; ++k) {
664 					if (handles[k] == cs->handles[j]) {
665 						found = true;
666 						priorities[k] = MAX2(priorities[k],
667 								     cs->priorities[j]);
668 						break;
669 					}
670 				}
671 				if (!found) {
672 					handles[unique_bo_count] = cs->handles[j];
673 					priorities[unique_bo_count] = cs->priorities[j];
674 					++unique_bo_count;
675 				}
676 			}
677 			for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
678 				struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
679 				for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
680 					struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
681 					bool found = false;
682 					for (unsigned m = 0; m < unique_bo_count; ++m) {
683 						if (handles[m] == bo->bo) {
684 							found = true;
685 							priorities[m] = MAX2(priorities[m],
686 									cs->virtual_buffer_priorities[j]);
687 							break;
688 						}
689 					}
690 					if (!found) {
691 						handles[unique_bo_count] = bo->bo;
692 						priorities[unique_bo_count] = cs->virtual_buffer_priorities[j];
693 						++unique_bo_count;
694 					}
695 				}
696 			}
697 		}
698 
699 		if (unique_bo_count > 0) {
700 			r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
701 						  priorities, bo_list);
702 		} else {
703 			*bo_list = 0;
704 		}
705 
706 		free(handles);
707 		free(priorities);
708 	}
709 
710 	return r;
711 }
712 
radv_set_cs_fence(struct radv_amdgpu_ctx * ctx,int ip_type,int ring)713 static struct amdgpu_cs_fence_info radv_set_cs_fence(struct radv_amdgpu_ctx *ctx, int ip_type, int ring)
714 {
715 	struct amdgpu_cs_fence_info ret = {0};
716 	if (ctx->fence_map) {
717 		ret.handle = radv_amdgpu_winsys_bo(ctx->fence_bo)->bo;
718 		ret.offset = (ip_type * MAX_RINGS_PER_TYPE + ring) * sizeof(uint64_t);
719 	}
720 	return ret;
721 }
722 
radv_assign_last_submit(struct radv_amdgpu_ctx * ctx,struct amdgpu_cs_request * request)723 static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
724 				    struct amdgpu_cs_request *request)
725 {
726 	radv_amdgpu_request_to_fence(ctx,
727 	                             &ctx->last_submission[request->ip_type][request->ring],
728 	                             request);
729 }
730 
radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radv_winsys_sem_info * sem_info,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_cs * initial_preamble_cs,struct radeon_winsys_cs * continue_preamble_cs,struct radeon_winsys_fence * _fence)731 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
732 						int queue_idx,
733 						struct radv_winsys_sem_info *sem_info,
734 						struct radeon_winsys_cs **cs_array,
735 						unsigned cs_count,
736 						struct radeon_winsys_cs *initial_preamble_cs,
737 						struct radeon_winsys_cs *continue_preamble_cs,
738 						struct radeon_winsys_fence *_fence)
739 {
740 	int r;
741 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
742 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
743 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
744 	amdgpu_bo_list_handle bo_list;
745 	struct amdgpu_cs_request request = {0};
746 	struct amdgpu_cs_ib_info ibs[2];
747 
748 	for (unsigned i = cs_count; i--;) {
749 		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
750 
751 		if (cs->is_chained) {
752 			*cs->ib_size_ptr -= 4;
753 			cs->is_chained = false;
754 		}
755 
756 		if (i + 1 < cs_count) {
757 			struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
758 			assert(cs->base.cdw + 4 <= cs->base.max_dw);
759 
760 			cs->is_chained = true;
761 			*cs->ib_size_ptr += 4;
762 
763 			cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
764 			cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
765 			cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
766 			cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
767 		}
768 	}
769 
770 	r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs,
771 	                               &bo_list);
772 	if (r) {
773 		fprintf(stderr, "amdgpu: buffer list creation failed for the "
774 				"chained submission(%d)\n", r);
775 		return r;
776 	}
777 
778 	request.ip_type = cs0->hw_ip;
779 	request.ring = queue_idx;
780 	request.number_of_ibs = 1;
781 	request.ibs = &cs0->ib;
782 	request.resources = bo_list;
783 	request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
784 
785 	if (initial_preamble_cs) {
786 		request.ibs = ibs;
787 		request.number_of_ibs = 2;
788 		ibs[1] = cs0->ib;
789 		ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib;
790 	}
791 
792 	r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
793 	if (r) {
794 		if (r == -ENOMEM)
795 			fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
796 		else
797 			fprintf(stderr, "amdgpu: The CS has been rejected, "
798 					"see dmesg for more information.\n");
799 	}
800 
801 	if (bo_list)
802 		amdgpu_bo_list_destroy(bo_list);
803 
804 	if (fence)
805 		radv_amdgpu_request_to_fence(ctx, fence, &request);
806 
807 	radv_assign_last_submit(ctx, &request);
808 
809 	return r;
810 }
811 
radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radv_winsys_sem_info * sem_info,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_cs * initial_preamble_cs,struct radeon_winsys_cs * continue_preamble_cs,struct radeon_winsys_fence * _fence)812 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
813 						 int queue_idx,
814 						 struct radv_winsys_sem_info *sem_info,
815 						 struct radeon_winsys_cs **cs_array,
816 						 unsigned cs_count,
817 						 struct radeon_winsys_cs *initial_preamble_cs,
818 						 struct radeon_winsys_cs *continue_preamble_cs,
819 						 struct radeon_winsys_fence *_fence)
820 {
821 	int r;
822 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
823 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
824 	amdgpu_bo_list_handle bo_list;
825 	struct amdgpu_cs_request request;
826 	bool emit_signal_sem = sem_info->cs_emit_signal;
827 	assert(cs_count);
828 
829 	for (unsigned i = 0; i < cs_count;) {
830 		struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
831 		struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
832 		struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
833 		unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
834 		                    cs_count - i);
835 
836 		memset(&request, 0, sizeof(request));
837 
838 		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
839 		                               preamble_cs, &bo_list);
840 		if (r) {
841 			fprintf(stderr, "amdgpu: buffer list creation failed "
842 					"for the fallback submission (%d)\n", r);
843 			return r;
844 		}
845 
846 		request.ip_type = cs0->hw_ip;
847 		request.ring = queue_idx;
848 		request.resources = bo_list;
849 		request.number_of_ibs = cnt + !!preamble_cs;
850 		request.ibs = ibs;
851 		request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
852 
853 		if (preamble_cs) {
854 			ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
855 		}
856 
857 		for (unsigned j = 0; j < cnt; ++j) {
858 			struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
859 			ibs[j + !!preamble_cs] = cs->ib;
860 
861 			if (cs->is_chained) {
862 				*cs->ib_size_ptr -= 4;
863 				cs->is_chained = false;
864 			}
865 		}
866 
867 		sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
868 		r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
869 		if (r) {
870 			if (r == -ENOMEM)
871 				fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
872 			else
873 				fprintf(stderr, "amdgpu: The CS has been rejected, "
874 						"see dmesg for more information.\n");
875 		}
876 
877 		if (bo_list)
878 			amdgpu_bo_list_destroy(bo_list);
879 
880 		if (r)
881 			return r;
882 
883 		i += cnt;
884 	}
885 	if (fence)
886 		radv_amdgpu_request_to_fence(ctx, fence, &request);
887 
888 	radv_assign_last_submit(ctx, &request);
889 
890 	return 0;
891 }
892 
radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radv_winsys_sem_info * sem_info,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_cs * initial_preamble_cs,struct radeon_winsys_cs * continue_preamble_cs,struct radeon_winsys_fence * _fence)893 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
894 					       int queue_idx,
895 					       struct radv_winsys_sem_info *sem_info,
896 					       struct radeon_winsys_cs **cs_array,
897 					       unsigned cs_count,
898 					       struct radeon_winsys_cs *initial_preamble_cs,
899 					       struct radeon_winsys_cs *continue_preamble_cs,
900 					       struct radeon_winsys_fence *_fence)
901 {
902 	int r;
903 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
904 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
905 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
906 	struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
907 	amdgpu_bo_list_handle bo_list;
908 	struct amdgpu_cs_request request;
909 	uint32_t pad_word = 0xffff1000U;
910 	bool emit_signal_sem = sem_info->cs_emit_signal;
911 
912 	if (radv_amdgpu_winsys(ws)->info.chip_class == SI)
913 		pad_word = 0x80000000;
914 
915 	assert(cs_count);
916 
917 	for (unsigned i = 0; i < cs_count;) {
918 		struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
919 		unsigned number_of_ibs = 1;
920 		struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
921 		struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
922 		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
923 		uint32_t *ptr;
924 		unsigned cnt = 0;
925 		unsigned size = 0;
926 		unsigned pad_words = 0;
927 
928 		if (cs->num_old_cs_buffers > 0) {
929 			/* Special path when the maximum size in dwords has
930 			 * been reached because we need to handle more than one
931 			 * IB per submit.
932 			 */
933 			unsigned new_cs_count = cs->num_old_cs_buffers + 1;
934 			struct radeon_winsys_cs *new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
935 			unsigned idx = 0;
936 
937 			for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
938 				new_cs_array[idx++] = &cs->old_cs_buffers[j];
939 			new_cs_array[idx++] = cs_array[i];
940 
941 			for (unsigned j = 0; j < new_cs_count; j++) {
942 				struct radeon_winsys_cs *rcs = new_cs_array[j];
943 				bool needs_preamble = preamble_cs && j == 0;
944 				unsigned size = 0;
945 
946 				if (needs_preamble)
947 					size += preamble_cs->cdw;
948 				size += rcs->cdw;
949 
950 				assert(size < 0xffff8);
951 
952 				while (!size || (size & 7)) {
953 					size++;
954 					pad_words++;
955 				}
956 
957 				bos[j] = ws->buffer_create(ws, 4 * size, 4096,
958 							   RADEON_DOMAIN_GTT,
959 							   RADEON_FLAG_CPU_ACCESS |
960 							   RADEON_FLAG_NO_INTERPROCESS_SHARING |
961 							   RADEON_FLAG_READ_ONLY);
962 				ptr = ws->buffer_map(bos[j]);
963 
964 				if (needs_preamble) {
965 					memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
966 					ptr += preamble_cs->cdw;
967 				}
968 
969 				memcpy(ptr, rcs->buf, 4 * rcs->cdw);
970 				ptr += rcs->cdw;
971 
972 				for (unsigned k = 0; k < pad_words; ++k)
973 					*ptr++ = pad_word;
974 
975 				ibs[j].size = size;
976 				ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
977 			}
978 
979 			number_of_ibs = new_cs_count;
980 			cnt++;
981 		} else {
982 			if (preamble_cs)
983 				size += preamble_cs->cdw;
984 
985 			while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
986 				size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
987 				++cnt;
988 			}
989 
990 			while (!size || (size & 7)) {
991 				size++;
992 				pad_words++;
993 			}
994 			assert(cnt);
995 
996 			bos[0] = ws->buffer_create(ws, 4 * size, 4096,
997 						   RADEON_DOMAIN_GTT,
998 						   RADEON_FLAG_CPU_ACCESS |
999 						   RADEON_FLAG_NO_INTERPROCESS_SHARING |
1000 						   RADEON_FLAG_READ_ONLY);
1001 			ptr = ws->buffer_map(bos[0]);
1002 
1003 			if (preamble_cs) {
1004 				memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
1005 				ptr += preamble_cs->cdw;
1006 			}
1007 
1008 			for (unsigned j = 0; j < cnt; ++j) {
1009 				struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
1010 				memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
1011 				ptr += cs->base.cdw;
1012 
1013 			}
1014 
1015 			for (unsigned j = 0; j < pad_words; ++j)
1016 				*ptr++ = pad_word;
1017 
1018 			ibs[0].size = size;
1019 			ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
1020 		}
1021 
1022 		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
1023 			                       (struct radv_amdgpu_winsys_bo **)bos,
1024 					       number_of_ibs, preamble_cs,
1025 					       &bo_list);
1026 		if (r) {
1027 			fprintf(stderr, "amdgpu: buffer list creation failed "
1028 					"for the sysmem submission (%d)\n", r);
1029 			return r;
1030 		}
1031 
1032 		memset(&request, 0, sizeof(request));
1033 
1034 		request.ip_type = cs0->hw_ip;
1035 		request.ring = queue_idx;
1036 		request.resources = bo_list;
1037 		request.number_of_ibs = number_of_ibs;
1038 		request.ibs = ibs;
1039 		request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
1040 
1041 		sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
1042 		r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
1043 		if (r) {
1044 			if (r == -ENOMEM)
1045 				fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
1046 			else
1047 				fprintf(stderr, "amdgpu: The CS has been rejected, "
1048 						"see dmesg for more information.\n");
1049 		}
1050 
1051 		if (bo_list)
1052 			amdgpu_bo_list_destroy(bo_list);
1053 
1054 		for (unsigned j = 0; j < number_of_ibs; j++) {
1055 			ws->buffer_destroy(bos[j]);
1056 			if (r)
1057 				return r;
1058 		}
1059 
1060 		i += cnt;
1061 	}
1062 	if (fence)
1063 		radv_amdgpu_request_to_fence(ctx, fence, &request);
1064 
1065 	radv_assign_last_submit(ctx, &request);
1066 
1067 	return 0;
1068 }
1069 
radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_cs * initial_preamble_cs,struct radeon_winsys_cs * continue_preamble_cs,struct radv_winsys_sem_info * sem_info,bool can_patch,struct radeon_winsys_fence * _fence)1070 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
1071 					int queue_idx,
1072 					struct radeon_winsys_cs **cs_array,
1073 					unsigned cs_count,
1074 					struct radeon_winsys_cs *initial_preamble_cs,
1075 					struct radeon_winsys_cs *continue_preamble_cs,
1076 					struct radv_winsys_sem_info *sem_info,
1077 					bool can_patch,
1078 					struct radeon_winsys_fence *_fence)
1079 {
1080 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
1081 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
1082 	int ret;
1083 
1084 	assert(sem_info);
1085 	if (!cs->ws->use_ib_bos) {
1086 		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array,
1087 							   cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
1088 	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
1089 		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array,
1090 							    cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
1091 	} else {
1092 		ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array,
1093 							     cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
1094 	}
1095 
1096 	radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, sem_info);
1097 	return ret;
1098 }
1099 
radv_amdgpu_winsys_get_cpu_addr(void * _cs,uint64_t addr)1100 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
1101 {
1102 	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
1103 	void *ret = NULL;
1104 
1105 	if (!cs->ib_buffer)
1106 		return NULL;
1107 	for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
1108 		struct radv_amdgpu_winsys_bo *bo;
1109 
1110 		bo = (struct radv_amdgpu_winsys_bo*)
1111 		       (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
1112 		if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
1113 			if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
1114 				return (char *)ret + (addr - bo->base.va);
1115 		}
1116 	}
1117 	if(cs->ws->debug_all_bos) {
1118 		pthread_mutex_lock(&cs->ws->global_bo_list_lock);
1119 		list_for_each_entry(struct radv_amdgpu_winsys_bo, bo,
1120 		                    &cs->ws->global_bo_list, global_list_item) {
1121 			if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
1122 				if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) {
1123 					pthread_mutex_unlock(&cs->ws->global_bo_list_lock);
1124 					return (char *)ret + (addr - bo->base.va);
1125 				}
1126 			}
1127 		}
1128 		pthread_mutex_unlock(&cs->ws->global_bo_list_lock);
1129 	}
1130 	return ret;
1131 }
1132 
radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs * _cs,FILE * file,const int * trace_ids,int trace_id_count)1133 static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
1134                                        FILE* file,
1135                                        const int *trace_ids, int trace_id_count)
1136 {
1137 	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
1138 	void *ib = cs->base.buf;
1139 	int num_dw = cs->base.cdw;
1140 
1141 	if (cs->ws->use_ib_bos) {
1142 		ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
1143 		num_dw = cs->ib.size;
1144 	}
1145 	assert(ib);
1146 	ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count,  "main IB",
1147 		    cs->ws->info.chip_class, radv_amdgpu_winsys_get_cpu_addr, cs);
1148 }
1149 
radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority)1150 static uint32_t radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority)
1151 {
1152 	switch (radv_priority) {
1153 		case RADEON_CTX_PRIORITY_REALTIME:
1154 			return AMDGPU_CTX_PRIORITY_VERY_HIGH;
1155 		case RADEON_CTX_PRIORITY_HIGH:
1156 			return AMDGPU_CTX_PRIORITY_HIGH;
1157 		case RADEON_CTX_PRIORITY_MEDIUM:
1158 			return AMDGPU_CTX_PRIORITY_NORMAL;
1159 		case RADEON_CTX_PRIORITY_LOW:
1160 			return AMDGPU_CTX_PRIORITY_LOW;
1161 		default:
1162 			unreachable("Invalid context priority");
1163 	}
1164 }
1165 
radv_amdgpu_ctx_create(struct radeon_winsys * _ws,enum radeon_ctx_priority priority)1166 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws,
1167 							enum radeon_ctx_priority priority)
1168 {
1169 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1170 	struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
1171 	uint32_t amdgpu_priority = radv_to_amdgpu_priority(priority);
1172 	int r;
1173 
1174 	if (!ctx)
1175 		return NULL;
1176 
1177 	r = amdgpu_cs_ctx_create2(ws->dev, amdgpu_priority, &ctx->ctx);
1178 	if (r) {
1179 		fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r);
1180 		goto error_create;
1181 	}
1182 	ctx->ws = ws;
1183 
1184 	assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
1185 	ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
1186 	                                      RADEON_DOMAIN_GTT,
1187 	                                      RADEON_FLAG_CPU_ACCESS|
1188 					       RADEON_FLAG_NO_INTERPROCESS_SHARING);
1189 	if (ctx->fence_bo)
1190 		ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo);
1191 	if (ctx->fence_map)
1192 		memset(ctx->fence_map, 0, 4096);
1193 	return (struct radeon_winsys_ctx *)ctx;
1194 error_create:
1195 	FREE(ctx);
1196 	return NULL;
1197 }
1198 
radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx * rwctx)1199 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
1200 {
1201 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
1202 	ctx->ws->base.buffer_destroy(ctx->fence_bo);
1203 	amdgpu_cs_ctx_free(ctx->ctx);
1204 	FREE(ctx);
1205 }
1206 
radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx * rwctx,enum ring_type ring_type,int ring_index)1207 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
1208                                       enum ring_type ring_type, int ring_index)
1209 {
1210 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
1211 	int ip_type = ring_to_hw_ip(ring_type);
1212 
1213 	if (ctx->last_submission[ip_type][ring_index].fence.fence) {
1214 		uint32_t expired;
1215 		int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
1216 		                                       1000000000ull, 0, &expired);
1217 
1218 		if (ret || !expired)
1219 			return false;
1220 	}
1221 
1222 	return true;
1223 }
1224 
radv_amdgpu_create_sem(struct radeon_winsys * _ws)1225 static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws)
1226 {
1227 	struct amdgpu_cs_fence *sem = CALLOC_STRUCT(amdgpu_cs_fence);
1228 	if (!sem)
1229 		return NULL;
1230 
1231 	return (struct radeon_winsys_sem *)sem;
1232 }
1233 
radv_amdgpu_destroy_sem(struct radeon_winsys_sem * _sem)1234 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
1235 {
1236 	struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)_sem;
1237 	FREE(sem);
1238 }
1239 
radv_amdgpu_signal_sems(struct radv_amdgpu_ctx * ctx,uint32_t ip_type,uint32_t ring,struct radv_winsys_sem_info * sem_info)1240 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx,
1241 				   uint32_t ip_type,
1242 				   uint32_t ring,
1243 				   struct radv_winsys_sem_info *sem_info)
1244 {
1245 	for (unsigned i = 0; i < sem_info->signal.sem_count; i++) {
1246 		struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)(sem_info->signal.sem)[i];
1247 
1248 		if (sem->context)
1249 			return -EINVAL;
1250 
1251 		*sem = ctx->last_submission[ip_type][ring].fence;
1252 	}
1253 	return 0;
1254 }
1255 
radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts * counts,struct drm_amdgpu_cs_chunk * chunk,int chunk_id)1256 static struct drm_amdgpu_cs_chunk_sem *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
1257 									  struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
1258 {
1259 	struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
1260 	if (!syncobj)
1261 		return NULL;
1262 
1263 	for (unsigned i = 0; i < counts->syncobj_count; i++) {
1264 		struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i];
1265 		sem->handle = counts->syncobj[i];
1266 	}
1267 
1268 	chunk->chunk_id = chunk_id;
1269 	chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count;
1270 	chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
1271 	return syncobj;
1272 }
1273 
radv_amdgpu_cs_submit(struct radv_amdgpu_ctx * ctx,struct amdgpu_cs_request * request,struct radv_winsys_sem_info * sem_info)1274 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
1275 				 struct amdgpu_cs_request *request,
1276 				 struct radv_winsys_sem_info *sem_info)
1277 {
1278 	int r;
1279 	int num_chunks;
1280 	int size;
1281 	bool user_fence;
1282 	struct drm_amdgpu_cs_chunk *chunks;
1283 	struct drm_amdgpu_cs_chunk_data *chunk_data;
1284 	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
1285 	struct drm_amdgpu_cs_chunk_sem *wait_syncobj = NULL, *signal_syncobj = NULL;
1286 	int i;
1287 	struct amdgpu_cs_fence *sem;
1288 
1289 	user_fence = (request->fence_info.handle != NULL);
1290 	size = request->number_of_ibs + (user_fence ? 2 : 1) + 3;
1291 
1292 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
1293 
1294 	size = request->number_of_ibs + (user_fence ? 1 : 0);
1295 
1296 	chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
1297 
1298 	num_chunks = request->number_of_ibs;
1299 	for (i = 0; i < request->number_of_ibs; i++) {
1300 		struct amdgpu_cs_ib_info *ib;
1301 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
1302 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
1303 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
1304 
1305 		ib = &request->ibs[i];
1306 
1307 		chunk_data[i].ib_data._pad = 0;
1308 		chunk_data[i].ib_data.va_start = ib->ib_mc_address;
1309 		chunk_data[i].ib_data.ib_bytes = ib->size * 4;
1310 		chunk_data[i].ib_data.ip_type = request->ip_type;
1311 		chunk_data[i].ib_data.ip_instance = request->ip_instance;
1312 		chunk_data[i].ib_data.ring = request->ring;
1313 		chunk_data[i].ib_data.flags = ib->flags;
1314 	}
1315 
1316 	if (user_fence) {
1317 		i = num_chunks++;
1318 
1319 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
1320 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
1321 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
1322 
1323 		amdgpu_cs_chunk_fence_info_to_data(&request->fence_info,
1324 						   &chunk_data[i]);
1325 	}
1326 
1327 	if (sem_info->wait.syncobj_count && sem_info->cs_emit_wait) {
1328 		wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait,
1329 								  &chunks[num_chunks],
1330 								  AMDGPU_CHUNK_ID_SYNCOBJ_IN);
1331 		if (!wait_syncobj) {
1332 			r = -ENOMEM;
1333 			goto error_out;
1334 		}
1335 		num_chunks++;
1336 
1337 		if (sem_info->wait.sem_count == 0)
1338 			sem_info->cs_emit_wait = false;
1339 
1340 	}
1341 
1342 	if (sem_info->wait.sem_count && sem_info->cs_emit_wait) {
1343 		sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_info->wait.sem_count);
1344 		if (!sem_dependencies) {
1345 			r = -ENOMEM;
1346 			goto error_out;
1347 		}
1348 		int sem_count = 0;
1349 		for (unsigned j = 0; j < sem_info->wait.sem_count; j++) {
1350 			sem = (struct amdgpu_cs_fence *)sem_info->wait.sem[j];
1351 			if (!sem->context)
1352 				continue;
1353 			struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
1354 
1355 			amdgpu_cs_chunk_fence_to_dep(sem, dep);
1356 
1357 			sem->context = NULL;
1358 		}
1359 		i = num_chunks++;
1360 
1361 		/* dependencies chunk */
1362 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
1363 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
1364 		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
1365 
1366 		sem_info->cs_emit_wait = false;
1367 	}
1368 
1369 	if (sem_info->signal.syncobj_count && sem_info->cs_emit_signal) {
1370 		signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal,
1371 								    &chunks[num_chunks],
1372 								    AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
1373 		if (!signal_syncobj) {
1374 			r = -ENOMEM;
1375 			goto error_out;
1376 		}
1377 		num_chunks++;
1378 	}
1379 
1380 	r = amdgpu_cs_submit_raw(ctx->ws->dev,
1381 				 ctx->ctx,
1382 				 request->resources,
1383 				 num_chunks,
1384 				 chunks,
1385 				 &request->seq_no);
1386 error_out:
1387 	free(sem_dependencies);
1388 	free(wait_syncobj);
1389 	free(signal_syncobj);
1390 	return r;
1391 }
1392 
radv_amdgpu_create_syncobj(struct radeon_winsys * _ws,uint32_t * handle)1393 static int radv_amdgpu_create_syncobj(struct radeon_winsys *_ws,
1394 				      uint32_t *handle)
1395 {
1396 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1397 	return amdgpu_cs_create_syncobj(ws->dev, handle);
1398 }
1399 
radv_amdgpu_destroy_syncobj(struct radeon_winsys * _ws,uint32_t handle)1400 static void radv_amdgpu_destroy_syncobj(struct radeon_winsys *_ws,
1401 				    uint32_t handle)
1402 {
1403 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1404 	amdgpu_cs_destroy_syncobj(ws->dev, handle);
1405 }
1406 
radv_amdgpu_reset_syncobj(struct radeon_winsys * _ws,uint32_t handle)1407 static void radv_amdgpu_reset_syncobj(struct radeon_winsys *_ws,
1408 				    uint32_t handle)
1409 {
1410 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1411 	amdgpu_cs_syncobj_reset(ws->dev, &handle, 1);
1412 }
1413 
radv_amdgpu_signal_syncobj(struct radeon_winsys * _ws,uint32_t handle)1414 static void radv_amdgpu_signal_syncobj(struct radeon_winsys *_ws,
1415 				    uint32_t handle)
1416 {
1417 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1418 	amdgpu_cs_syncobj_signal(ws->dev, &handle, 1);
1419 }
1420 
radv_amdgpu_wait_syncobj(struct radeon_winsys * _ws,uint32_t handle,uint64_t timeout)1421 static bool radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws,
1422 				    uint32_t handle, uint64_t timeout)
1423 {
1424 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1425 	uint32_t tmp;
1426 
1427 	/* The timeouts are signed, while vulkan timeouts are unsigned. */
1428 	timeout = MIN2(timeout, INT64_MAX);
1429 
1430 	int ret = amdgpu_cs_syncobj_wait(ws->dev, &handle, 1, timeout,
1431 					 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
1432 					 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
1433 					 &tmp);
1434 	if (ret == 0) {
1435 		return true;
1436 	} else if (ret == -1 && errno == ETIME) {
1437 		return false;
1438 	} else {
1439 		fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
1440 		return false;
1441 	}
1442 }
1443 
radv_amdgpu_export_syncobj(struct radeon_winsys * _ws,uint32_t syncobj,int * fd)1444 static int radv_amdgpu_export_syncobj(struct radeon_winsys *_ws,
1445 				      uint32_t syncobj,
1446 				      int *fd)
1447 {
1448 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1449 
1450 	return amdgpu_cs_export_syncobj(ws->dev, syncobj, fd);
1451 }
1452 
radv_amdgpu_import_syncobj(struct radeon_winsys * _ws,int fd,uint32_t * syncobj)1453 static int radv_amdgpu_import_syncobj(struct radeon_winsys *_ws,
1454 				      int fd,
1455 				      uint32_t *syncobj)
1456 {
1457 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1458 
1459 	return amdgpu_cs_import_syncobj(ws->dev, fd, syncobj);
1460 }
1461 
1462 
radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys * _ws,uint32_t syncobj,int * fd)1463 static int radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys *_ws,
1464                                                    uint32_t syncobj,
1465                                                    int *fd)
1466 {
1467 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1468 
1469 	return amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, fd);
1470 }
1471 
radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys * _ws,uint32_t syncobj,int fd)1472 static int radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys *_ws,
1473                                                      uint32_t syncobj,
1474                                                      int fd)
1475 {
1476 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1477 
1478 	return amdgpu_cs_syncobj_import_sync_file(ws->dev, syncobj, fd);
1479 }
1480 
radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys * ws)1481 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
1482 {
1483 	ws->base.ctx_create = radv_amdgpu_ctx_create;
1484 	ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
1485 	ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
1486 	ws->base.cs_create = radv_amdgpu_cs_create;
1487 	ws->base.cs_destroy = radv_amdgpu_cs_destroy;
1488 	ws->base.cs_grow = radv_amdgpu_cs_grow;
1489 	ws->base.cs_finalize = radv_amdgpu_cs_finalize;
1490 	ws->base.cs_reset = radv_amdgpu_cs_reset;
1491 	ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
1492 	ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
1493 	ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
1494 	ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
1495 	ws->base.create_fence = radv_amdgpu_create_fence;
1496 	ws->base.destroy_fence = radv_amdgpu_destroy_fence;
1497 	ws->base.create_sem = radv_amdgpu_create_sem;
1498 	ws->base.destroy_sem = radv_amdgpu_destroy_sem;
1499 	ws->base.create_syncobj = radv_amdgpu_create_syncobj;
1500 	ws->base.destroy_syncobj = radv_amdgpu_destroy_syncobj;
1501 	ws->base.reset_syncobj = radv_amdgpu_reset_syncobj;
1502 	ws->base.signal_syncobj = radv_amdgpu_signal_syncobj;
1503 	ws->base.wait_syncobj = radv_amdgpu_wait_syncobj;
1504 	ws->base.export_syncobj = radv_amdgpu_export_syncobj;
1505 	ws->base.import_syncobj = radv_amdgpu_import_syncobj;
1506 	ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
1507 	ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
1508 	ws->base.fence_wait = radv_amdgpu_fence_wait;
1509 }
1510