1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "drm-uapi/v3d_drm.h"
26
27 #include "broadcom/clif/clif_dump.h"
28
29 #include <errno.h>
30 #include <time.h>
31
32 static void
v3dv_clif_dump(struct v3dv_device * device,struct v3dv_job * job,struct drm_v3d_submit_cl * submit)33 v3dv_clif_dump(struct v3dv_device *device,
34 struct v3dv_job *job,
35 struct drm_v3d_submit_cl *submit)
36 {
37 if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
38 return;
39
40 struct clif_dump *clif = clif_dump_init(&device->devinfo,
41 stderr,
42 V3D_DEBUG & V3D_DEBUG_CL);
43
44 set_foreach(job->bos, entry) {
45 struct v3dv_bo *bo = (void *)entry->key;
46 char *name = ralloc_asprintf(NULL, "%s_0x%x",
47 bo->name, bo->offset);
48
49 v3dv_bo_map(device, bo, bo->size);
50 clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
51
52 ralloc_free(name);
53 }
54
55 clif_dump(clif, submit);
56
57 clif_dump_destroy(clif);
58 }
59
60 static uint64_t
gettime_ns()61 gettime_ns()
62 {
63 struct timespec current;
64 clock_gettime(CLOCK_MONOTONIC, ¤t);
65 return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
66 }
67
68 static uint64_t
get_absolute_timeout(uint64_t timeout)69 get_absolute_timeout(uint64_t timeout)
70 {
71 uint64_t current_time = gettime_ns();
72 uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
73
74 timeout = MIN2(max_timeout, timeout);
75
76 return (current_time + timeout);
77 }
78
79 static VkResult
80 queue_submit_job(struct v3dv_queue *queue,
81 struct v3dv_job *job,
82 bool do_sem_wait,
83 pthread_t *wait_thread);
84
85 /* Waits for active CPU wait threads spawned before the current thread to
86 * complete and submit all their GPU jobs.
87 */
88 static void
cpu_queue_wait_idle(struct v3dv_queue * queue)89 cpu_queue_wait_idle(struct v3dv_queue *queue)
90 {
91 const pthread_t this_thread = pthread_self();
92
93 retry:
94 mtx_lock(&queue->mutex);
95 list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
96 &queue->submit_wait_list, list_link) {
97 for (uint32_t i = 0; i < info->wait_thread_count; i++) {
98 if (info->wait_threads[i].finished)
99 continue;
100
101 /* Because we are testing this against the list of spawned threads
102 * it will never match for the main thread, so when we call this from
103 * the main thread we are effectively waiting for all active threads
104 * to complete, and otherwise we are only waiting for work submitted
105 * before the wait thread that called this (a wait thread should never
106 * be waiting for work submitted after it).
107 */
108 if (info->wait_threads[i].thread == this_thread)
109 goto done;
110
111 /* Wait and try again */
112 mtx_unlock(&queue->mutex);
113 usleep(500); /* 0.5 ms */
114 goto retry;
115 }
116 }
117
118 done:
119 mtx_unlock(&queue->mutex);
120 }
121
122 static VkResult
gpu_queue_wait_idle(struct v3dv_queue * queue)123 gpu_queue_wait_idle(struct v3dv_queue *queue)
124 {
125 struct v3dv_device *device = queue->device;
126
127 mtx_lock(&device->mutex);
128 uint32_t last_job_sync = device->last_job_sync;
129 mtx_unlock(&device->mutex);
130
131 int ret = drmSyncobjWait(device->render_fd,
132 &last_job_sync, 1, INT64_MAX, 0, NULL);
133 if (ret)
134 return VK_ERROR_DEVICE_LOST;
135
136 return VK_SUCCESS;
137 }
138
139 VkResult
v3dv_QueueWaitIdle(VkQueue _queue)140 v3dv_QueueWaitIdle(VkQueue _queue)
141 {
142 V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
143
144 /* Check that we don't have any wait threads running in the CPU first,
145 * as these can spawn new GPU jobs.
146 */
147 cpu_queue_wait_idle(queue);
148
149 /* Check we don't have any GPU jobs running */
150 return gpu_queue_wait_idle(queue);
151 }
152
153 static VkResult
handle_reset_query_cpu_job(struct v3dv_job * job)154 handle_reset_query_cpu_job(struct v3dv_job *job)
155 {
156 /* We are about to reset query counters so we need to make sure that
157 * The GPU is not using them. The exception is timestamp queries, since
158 * we handle those in the CPU.
159 *
160 * FIXME: we could avoid blocking the main thread for this if we use
161 * submission thread.
162 */
163 struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
164 assert(info->pool);
165
166 if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
167 VkResult result = gpu_queue_wait_idle(&job->device->queue);
168 if (result != VK_SUCCESS)
169 return result;
170 }
171
172 for (uint32_t i = info->first; i < info->first + info->count; i++) {
173 assert(i < info->pool->query_count);
174 struct v3dv_query *query = &info->pool->queries[i];
175 query->maybe_available = false;
176 switch (info->pool->query_type) {
177 case VK_QUERY_TYPE_OCCLUSION: {
178 uint32_t *counter = (uint32_t *) query->bo->map;
179 *counter = 0;
180 break;
181 }
182 case VK_QUERY_TYPE_TIMESTAMP:
183 query->value = 0;
184 break;
185 default:
186 unreachable("Unsupported query type");
187 }
188 }
189
190 return VK_SUCCESS;
191 }
192
193 static VkResult
handle_end_query_cpu_job(struct v3dv_job * job)194 handle_end_query_cpu_job(struct v3dv_job *job)
195 {
196 struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
197 assert(info->query < info->pool->query_count);
198 struct v3dv_query *query = &info->pool->queries[info->query];
199 query->maybe_available = true;
200
201 return VK_SUCCESS;
202 }
203
204 static VkResult
handle_copy_query_results_cpu_job(struct v3dv_job * job)205 handle_copy_query_results_cpu_job(struct v3dv_job *job)
206 {
207 struct v3dv_copy_query_results_cpu_job_info *info =
208 &job->cpu.query_copy_results;
209
210 assert(info->dst && info->dst->mem && info->dst->mem->bo);
211 struct v3dv_bo *bo = info->dst->mem->bo;
212
213 /* Map the entire dst buffer for the CPU copy if needed */
214 assert(!bo->map || bo->map_size == bo->size);
215 if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
216 return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
217
218 /* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a
219 * sync wait on the CPU for the corresponding GPU jobs to finish. We might
220 * want to use a submission thread to avoid blocking on the main thread.
221 */
222 v3dv_get_query_pool_results_cpu(job->device,
223 info->pool,
224 info->first,
225 info->count,
226 bo->map + info->dst->mem_offset,
227 info->stride,
228 info->flags);
229
230 return VK_SUCCESS;
231 }
232
233 static VkResult
handle_set_event_cpu_job(struct v3dv_job * job,bool is_wait_thread)234 handle_set_event_cpu_job(struct v3dv_job *job, bool is_wait_thread)
235 {
236 /* From the Vulkan 1.0 spec:
237 *
238 * "When vkCmdSetEvent is submitted to a queue, it defines an execution
239 * dependency on commands that were submitted before it, and defines an
240 * event signal operation which sets the event to the signaled state.
241 * The first synchronization scope includes every command previously
242 * submitted to the same queue, including those in the same command
243 * buffer and batch".
244 *
245 * So we should wait for all prior work to be completed before signaling
246 * the event, this includes all active CPU wait threads spawned for any
247 * command buffer submitted *before* this.
248 *
249 * FIXME: we could avoid blocking the main thread for this if we use a
250 * submission thread.
251 */
252
253 /* If we are calling this from a wait thread it will only wait
254 * wait threads sspawned before it, otherwise it will wait for
255 * all active threads to complete.
256 */
257 cpu_queue_wait_idle(&job->device->queue);
258
259 VkResult result = gpu_queue_wait_idle(&job->device->queue);
260 if (result != VK_SUCCESS)
261 return result;
262
263 struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
264 p_atomic_set(&info->event->state, info->state);
265
266 return VK_SUCCESS;
267 }
268
269 static bool
check_wait_events_complete(struct v3dv_job * job)270 check_wait_events_complete(struct v3dv_job *job)
271 {
272 assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
273
274 struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
275 for (uint32_t i = 0; i < info->event_count; i++) {
276 if (!p_atomic_read(&info->events[i]->state))
277 return false;
278 }
279 return true;
280 }
281
282 static void
wait_thread_finish(struct v3dv_queue * queue,pthread_t thread)283 wait_thread_finish(struct v3dv_queue *queue, pthread_t thread)
284 {
285 mtx_lock(&queue->mutex);
286 list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
287 &queue->submit_wait_list, list_link) {
288 for (uint32_t i = 0; i < info->wait_thread_count; i++) {
289 if (info->wait_threads[i].thread == thread) {
290 info->wait_threads[i].finished = true;
291 goto done;
292 }
293 }
294 }
295
296 unreachable(!"Failed to finish wait thread: not found");
297
298 done:
299 mtx_unlock(&queue->mutex);
300 }
301
302 static void *
event_wait_thread_func(void * _job)303 event_wait_thread_func(void *_job)
304 {
305 struct v3dv_job *job = (struct v3dv_job *) _job;
306 assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
307 struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
308
309 /* Wait for events to be signaled */
310 const useconds_t wait_interval_ms = 1;
311 while (!check_wait_events_complete(job))
312 usleep(wait_interval_ms * 1000);
313
314 /* Now continue submitting pending jobs for the same command buffer after
315 * the wait job.
316 */
317 struct v3dv_queue *queue = &job->device->queue;
318 list_for_each_entry_from(struct v3dv_job, pjob, job->list_link.next,
319 &job->cmd_buffer->jobs, list_link) {
320 /* We don't want to spawn more than one wait thread per command buffer.
321 * If this job also requires a wait for events, we will do the wait here.
322 */
323 VkResult result = queue_submit_job(queue, pjob, info->sem_wait, NULL);
324 if (result == VK_NOT_READY) {
325 while (!check_wait_events_complete(pjob)) {
326 usleep(wait_interval_ms * 1000);
327 }
328 result = VK_SUCCESS;
329 }
330
331 if (result != VK_SUCCESS) {
332 fprintf(stderr, "Wait thread job execution failed.\n");
333 goto done;
334 }
335 }
336
337 done:
338 wait_thread_finish(queue, pthread_self());
339 return NULL;
340 }
341
342 static VkResult
spawn_event_wait_thread(struct v3dv_job * job,pthread_t * wait_thread)343 spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread)
344
345 {
346 assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
347 assert(job->cmd_buffer);
348 assert(wait_thread != NULL);
349
350 if (pthread_create(wait_thread, NULL, event_wait_thread_func, job))
351 return vk_error(job->device->instance, VK_ERROR_DEVICE_LOST);
352
353 return VK_NOT_READY;
354 }
355
356 static VkResult
handle_wait_events_cpu_job(struct v3dv_job * job,bool sem_wait,pthread_t * wait_thread)357 handle_wait_events_cpu_job(struct v3dv_job *job,
358 bool sem_wait,
359 pthread_t *wait_thread)
360 {
361 assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
362 struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
363
364 /* If all events are signaled then we are done and can continue submitting
365 * the rest of the command buffer normally.
366 */
367 if (check_wait_events_complete(job))
368 return VK_SUCCESS;
369
370 /* Otherwise, we put the rest of the command buffer on a wait thread until
371 * all events are signaled. We only spawn a new thread on the first
372 * wait job we see for a command buffer, any additional wait jobs in the
373 * same command buffer will run in that same wait thread and will get here
374 * with a NULL wait_thread pointer.
375 *
376 * Also, whether we spawn a wait thread or not, we always return
377 * VK_NOT_READY (unless an error happened), so we stop trying to submit
378 * any jobs in the same command buffer after the wait job. The wait thread
379 * will attempt to submit them after the wait completes.
380 */
381 info->sem_wait = sem_wait;
382 if (wait_thread)
383 return spawn_event_wait_thread(job, wait_thread);
384 else
385 return VK_NOT_READY;
386 }
387
388 static VkResult
handle_copy_buffer_to_image_cpu_job(struct v3dv_job * job)389 handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
390 {
391 assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
392 struct v3dv_copy_buffer_to_image_cpu_job_info *info =
393 &job->cpu.copy_buffer_to_image;
394
395 /* Wait for all GPU work to finish first, since we may be accessing
396 * the BOs involved in the operation.
397 */
398 v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
399
400 /* Map BOs */
401 struct v3dv_bo *dst_bo = info->image->mem->bo;
402 assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
403 if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
404 return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
405 void *dst_ptr = dst_bo->map;
406
407 struct v3dv_bo *src_bo = info->buffer->mem->bo;
408 assert(!src_bo->map || src_bo->map_size == src_bo->size);
409 if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
410 return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
411 void *src_ptr = src_bo->map;
412
413 const struct v3d_resource_slice *slice =
414 &info->image->slices[info->mip_level];
415
416 const struct pipe_box box = {
417 info->image_offset.x, info->image_offset.y, info->base_layer,
418 info->image_extent.width, info->image_extent.height, info->layer_count,
419 };
420
421 /* Copy each layer */
422 for (uint32_t i = 0; i < info->layer_count; i++) {
423 const uint32_t dst_offset =
424 v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);
425 const uint32_t src_offset =
426 info->buffer->mem_offset + info->buffer_offset +
427 info->buffer_layer_stride * i;
428 v3d_store_tiled_image(
429 dst_ptr + dst_offset, slice->stride,
430 src_ptr + src_offset, info->buffer_stride,
431 slice->tiling, info->image->cpp, slice->padded_height, &box);
432 }
433
434 return VK_SUCCESS;
435 }
436
437 static VkResult
handle_timestamp_query_cpu_job(struct v3dv_job * job)438 handle_timestamp_query_cpu_job(struct v3dv_job *job)
439 {
440 assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
441 struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
442
443 /* Wait for completion of all work queued before the timestamp query */
444 v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
445
446 /* Compute timestamp */
447 struct timespec t;
448 clock_gettime(CLOCK_MONOTONIC, &t);
449 assert(info->query < info->pool->query_count);
450 struct v3dv_query *query = &info->pool->queries[info->query];
451 query->maybe_available = true;
452 query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
453
454 return VK_SUCCESS;
455 }
456
457 static VkResult
458 handle_csd_job(struct v3dv_queue *queue,
459 struct v3dv_job *job,
460 bool do_sem_wait);
461
462 static VkResult
handle_csd_indirect_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)463 handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
464 struct v3dv_job *job,
465 bool do_sem_wait)
466 {
467 assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
468 struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;
469 assert(info->csd_job);
470
471 /* Make sure the GPU is no longer using the indirect buffer*/
472 assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
473 const uint64_t infinite = 0xffffffffffffffffull;
474 v3dv_bo_wait(queue->device, info->buffer->mem->bo, infinite);
475
476 /* Map the indirect buffer and read the dispatch parameters */
477 assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
478 struct v3dv_bo *bo = info->buffer->mem->bo;
479 if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
480 return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
481 assert(bo->map);
482
483 const uint32_t offset = info->buffer->mem_offset + info->offset;
484 const uint32_t *group_counts = (uint32_t *) (bo->map + offset);
485 if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)
486 return VK_SUCCESS;
487
488 if (memcmp(group_counts, info->csd_job->csd.wg_count,
489 sizeof(info->csd_job->csd.wg_count)) != 0) {
490 v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
491 }
492
493 handle_csd_job(queue, info->csd_job, do_sem_wait);
494
495 return VK_SUCCESS;
496 }
497
498 static VkResult
process_semaphores_to_signal(struct v3dv_device * device,uint32_t count,const VkSemaphore * sems)499 process_semaphores_to_signal(struct v3dv_device *device,
500 uint32_t count, const VkSemaphore *sems)
501 {
502 if (count == 0)
503 return VK_SUCCESS;
504
505 int fd;
506 mtx_lock(&device->mutex);
507 drmSyncobjExportSyncFile(device->render_fd, device->last_job_sync, &fd);
508 mtx_unlock(&device->mutex);
509 if (fd == -1)
510 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
511
512 for (uint32_t i = 0; i < count; i++) {
513 struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);
514
515 if (sem->fd >= 0)
516 close(sem->fd);
517 sem->fd = -1;
518
519 int ret = drmSyncobjImportSyncFile(device->render_fd, sem->sync, fd);
520 if (ret)
521 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
522
523 sem->fd = fd;
524 }
525
526 return VK_SUCCESS;
527 }
528
529 static VkResult
process_fence_to_signal(struct v3dv_device * device,VkFence _fence)530 process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
531 {
532 if (_fence == VK_NULL_HANDLE)
533 return VK_SUCCESS;
534
535 struct v3dv_fence *fence = v3dv_fence_from_handle(_fence);
536
537 if (fence->fd >= 0)
538 close(fence->fd);
539 fence->fd = -1;
540
541 int fd;
542 mtx_lock(&device->mutex);
543 drmSyncobjExportSyncFile(device->render_fd, device->last_job_sync, &fd);
544 mtx_unlock(&device->mutex);
545 if (fd == -1)
546 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
547
548 int ret = drmSyncobjImportSyncFile(device->render_fd, fence->sync, fd);
549 if (ret)
550 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
551
552 fence->fd = fd;
553
554 return VK_SUCCESS;
555 }
556
557 static VkResult
handle_cl_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)558 handle_cl_job(struct v3dv_queue *queue,
559 struct v3dv_job *job,
560 bool do_sem_wait)
561 {
562 struct v3dv_device *device = queue->device;
563
564 struct drm_v3d_submit_cl submit;
565
566 /* Sanity check: we should only flag a bcl sync on a job that needs to be
567 * serialized.
568 */
569 assert(job->serialize || !job->needs_bcl_sync);
570
571 /* We expect to have just one RCL per job which should fit in just one BO.
572 * Our BCL, could chain multiple BOS together though.
573 */
574 assert(list_length(&job->rcl.bo_list) == 1);
575 assert(list_length(&job->bcl.bo_list) >= 1);
576 struct v3dv_bo *bcl_fist_bo =
577 list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);
578 submit.bcl_start = bcl_fist_bo->offset;
579 submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
580 submit.rcl_start = job->rcl.bo->offset;
581 submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
582
583 submit.qma = job->tile_alloc->offset;
584 submit.qms = job->tile_alloc->size;
585 submit.qts = job->tile_state->offset;
586
587 /* FIXME: we already know that we support cache flush, as we only support
588 * hw that supports that, but would be better to just DRM-ask it
589 */
590 submit.flags = 0;
591 if (job->tmu_dirty_rcl)
592 submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
593
594 submit.bo_handle_count = job->bo_count;
595 uint32_t *bo_handles =
596 (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit.bo_handle_count * 2));
597 uint32_t bo_idx = 0;
598 set_foreach(job->bos, entry) {
599 struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
600 bo_handles[bo_idx++] = bo->handle;
601 }
602 assert(bo_idx == submit.bo_handle_count);
603 submit.bo_handles = (uintptr_t)(void *)bo_handles;
604
605 /* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or
606 * if the job comes after a pipeline barrier than involves geometry stages
607 * (needs_bcl_sync).
608 *
609 * We need a render sync if the job doesn't need a binning sync but has
610 * still been flagged for serialization. It should be noted that RCL jobs
611 * don't start until the previous RCL job has finished so we don't really
612 * need to add a fence for those, however, we might need to wait on a CSD or
613 * TFU job, which are not automatically serialized with CL jobs.
614 *
615 * FIXME: for now, if we are asked to wait on any semaphores, we just wait
616 * on the last job we submitted. In the future we might want to pass the
617 * actual syncobj of the wait semaphores so we don't block on the last RCL
618 * if we only need to wait for a previous CSD or TFU, for example, but
619 * we would have to extend our kernel interface to support the case where
620 * we have more than one semaphore to wait on.
621 */
622 const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync;
623 const bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
624
625 mtx_lock(&queue->device->mutex);
626 submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;
627 submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;
628 submit.out_sync = device->last_job_sync;
629 v3dv_clif_dump(device, job, &submit);
630 int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_CL, &submit);
631 mtx_unlock(&queue->device->mutex);
632
633 static bool warned = false;
634 if (ret && !warned) {
635 fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
636 strerror(errno));
637 warned = true;
638 }
639
640 free(bo_handles);
641
642 if (ret)
643 return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
644
645 return VK_SUCCESS;
646 }
647
648 static VkResult
handle_tfu_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)649 handle_tfu_job(struct v3dv_queue *queue,
650 struct v3dv_job *job,
651 bool do_sem_wait)
652 {
653 struct v3dv_device *device = queue->device;
654
655 const bool needs_sync = do_sem_wait || job->serialize;
656
657 mtx_lock(&device->mutex);
658 job->tfu.in_sync = needs_sync ? device->last_job_sync : 0;
659 job->tfu.out_sync = device->last_job_sync;
660 int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
661 mtx_unlock(&device->mutex);
662
663 if (ret != 0) {
664 fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
665 return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
666 }
667
668 return VK_SUCCESS;
669 }
670
671 static VkResult
handle_csd_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)672 handle_csd_job(struct v3dv_queue *queue,
673 struct v3dv_job *job,
674 bool do_sem_wait)
675 {
676 struct v3dv_device *device = queue->device;
677
678 struct drm_v3d_submit_csd *submit = &job->csd.submit;
679
680 submit->bo_handle_count = job->bo_count;
681 uint32_t *bo_handles =
682 (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
683 uint32_t bo_idx = 0;
684 set_foreach(job->bos, entry) {
685 struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
686 bo_handles[bo_idx++] = bo->handle;
687 }
688 assert(bo_idx == submit->bo_handle_count);
689 submit->bo_handles = (uintptr_t)(void *)bo_handles;
690
691 const bool needs_sync = do_sem_wait || job->serialize;
692
693 mtx_lock(&queue->device->mutex);
694 submit->in_sync = needs_sync ? device->last_job_sync : 0;
695 submit->out_sync = device->last_job_sync;
696 int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_CSD, submit);
697 mtx_unlock(&queue->device->mutex);
698
699 static bool warned = false;
700 if (ret && !warned) {
701 fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
702 strerror(errno));
703 warned = true;
704 }
705
706 free(bo_handles);
707
708 if (ret)
709 return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
710
711 return VK_SUCCESS;
712 }
713
714 static VkResult
queue_submit_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait,pthread_t * wait_thread)715 queue_submit_job(struct v3dv_queue *queue,
716 struct v3dv_job *job,
717 bool do_sem_wait,
718 pthread_t *wait_thread)
719 {
720 assert(job);
721
722 switch (job->type) {
723 case V3DV_JOB_TYPE_GPU_CL:
724 return handle_cl_job(queue, job, do_sem_wait);
725 case V3DV_JOB_TYPE_GPU_TFU:
726 return handle_tfu_job(queue, job, do_sem_wait);
727 case V3DV_JOB_TYPE_GPU_CSD:
728 return handle_csd_job(queue, job, do_sem_wait);
729 case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
730 return handle_reset_query_cpu_job(job);
731 case V3DV_JOB_TYPE_CPU_END_QUERY:
732 return handle_end_query_cpu_job(job);
733 case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
734 return handle_copy_query_results_cpu_job(job);
735 case V3DV_JOB_TYPE_CPU_SET_EVENT:
736 return handle_set_event_cpu_job(job, wait_thread != NULL);
737 case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
738 return handle_wait_events_cpu_job(job, do_sem_wait, wait_thread);
739 case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
740 return handle_copy_buffer_to_image_cpu_job(job);
741 case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
742 return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
743 case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
744 return handle_timestamp_query_cpu_job(job);
745 default:
746 unreachable("Unhandled job type");
747 }
748 }
749
750 static void
emit_noop_bin(struct v3dv_job * job)751 emit_noop_bin(struct v3dv_job *job)
752 {
753 v3dv_job_start_frame(job, 1, 1, 1, 1, V3D_INTERNAL_BPP_32, false);
754 v3dv_job_emit_binning_flush(job);
755 }
756
757 static void
emit_noop_render(struct v3dv_job * job)758 emit_noop_render(struct v3dv_job *job)
759 {
760 struct v3dv_cl *rcl = &job->rcl;
761 v3dv_cl_ensure_space_with_branch(rcl, 200 + 1 * 256 *
762 cl_packet_length(SUPERTILE_COORDINATES));
763
764 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
765 config.early_z_disable = true;
766 config.image_width_pixels = 1;
767 config.image_height_pixels = 1;
768 config.number_of_render_targets = 1;
769 config.multisample_mode_4x = false;
770 config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
771 }
772
773 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
774 rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
775 rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
776 rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
777 }
778
779 cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
780 clear.z_clear_value = 1.0f;
781 clear.stencil_clear_value = 0;
782 };
783
784 cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
785 init.use_auto_chained_tile_lists = true;
786 init.size_of_first_block_in_chained_tile_lists =
787 TILE_ALLOCATION_BLOCK_SIZE_64B;
788 }
789
790 cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
791 list.address = v3dv_cl_address(job->tile_alloc, 0);
792 }
793
794 cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
795 config.number_of_bin_tile_lists = 1;
796 config.total_frame_width_in_tiles = 1;
797 config.total_frame_height_in_tiles = 1;
798 config.supertile_width_in_tiles = 1;
799 config.supertile_height_in_tiles = 1;
800 config.total_frame_width_in_supertiles = 1;
801 config.total_frame_height_in_supertiles = 1;
802 }
803
804 struct v3dv_cl *icl = &job->indirect;
805 v3dv_cl_ensure_space(icl, 200, 1);
806 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(icl);
807
808 cl_emit(icl, TILE_COORDINATES_IMPLICIT, coords);
809
810 cl_emit(icl, END_OF_LOADS, end);
811
812 cl_emit(icl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
813
814 cl_emit(icl, STORE_TILE_BUFFER_GENERAL, store) {
815 store.buffer_to_store = NONE;
816 }
817
818 cl_emit(icl, END_OF_TILE_MARKER, end);
819
820 cl_emit(icl, RETURN_FROM_SUB_LIST, ret);
821
822 cl_emit(rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
823 branch.start = tile_list_start;
824 branch.end = v3dv_cl_get_address(icl);
825 }
826
827 cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
828 coords.column_number_in_supertiles = 0;
829 coords.row_number_in_supertiles = 0;
830 }
831
832 cl_emit(rcl, END_OF_RENDERING, end);
833 }
834
835 static VkResult
queue_create_noop_job(struct v3dv_queue * queue)836 queue_create_noop_job(struct v3dv_queue *queue)
837 {
838 struct v3dv_device *device = queue->device;
839 queue->noop_job = vk_zalloc(&device->alloc, sizeof(struct v3dv_job), 8,
840 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
841 if (!queue->noop_job)
842 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
843 v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
844
845 emit_noop_bin(queue->noop_job);
846 emit_noop_render(queue->noop_job);
847
848 return VK_SUCCESS;
849 }
850
851 static VkResult
queue_submit_noop_job(struct v3dv_queue * queue,const VkSubmitInfo * pSubmit)852 queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit)
853 {
854 /* VkQueue host access is externally synchronized so we don't need to lock
855 * here for the static variable.
856 */
857 if (!queue->noop_job) {
858 VkResult result = queue_create_noop_job(queue);
859 if (result != VK_SUCCESS)
860 return result;
861 }
862
863 return queue_submit_job(queue, queue->noop_job,
864 pSubmit->waitSemaphoreCount > 0, NULL);
865 }
866
867 static VkResult
queue_submit_cmd_buffer(struct v3dv_queue * queue,struct v3dv_cmd_buffer * cmd_buffer,const VkSubmitInfo * pSubmit,pthread_t * wait_thread)868 queue_submit_cmd_buffer(struct v3dv_queue *queue,
869 struct v3dv_cmd_buffer *cmd_buffer,
870 const VkSubmitInfo *pSubmit,
871 pthread_t *wait_thread)
872 {
873 assert(cmd_buffer);
874 assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_EXECUTABLE);
875
876 if (list_is_empty(&cmd_buffer->jobs))
877 return queue_submit_noop_job(queue, pSubmit);
878
879 list_for_each_entry_safe(struct v3dv_job, job,
880 &cmd_buffer->jobs, list_link) {
881 VkResult result = queue_submit_job(queue, job,
882 pSubmit->waitSemaphoreCount > 0,
883 wait_thread);
884 if (result != VK_SUCCESS)
885 return result;
886 }
887
888 return VK_SUCCESS;
889 }
890
891 static void
add_wait_thread_to_list(struct v3dv_device * device,pthread_t thread,struct v3dv_queue_submit_wait_info ** wait_info)892 add_wait_thread_to_list(struct v3dv_device *device,
893 pthread_t thread,
894 struct v3dv_queue_submit_wait_info **wait_info)
895 {
896 /* If this is the first time we spawn a wait thread for this queue
897 * submission create a v3dv_queue_submit_wait_info to track this and
898 * any other threads in the same submission and add it to the global list
899 * in the queue.
900 */
901 if (*wait_info == NULL) {
902 *wait_info =
903 vk_zalloc(&device->alloc, sizeof(struct v3dv_queue_submit_wait_info), 8,
904 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
905 (*wait_info)->device = device;
906 }
907
908 /* And add the thread to the list of wait threads for this submission */
909 const uint32_t thread_idx = (*wait_info)->wait_thread_count;
910 assert(thread_idx < 16);
911 (*wait_info)->wait_threads[thread_idx].thread = thread;
912 (*wait_info)->wait_threads[thread_idx].finished = false;
913 (*wait_info)->wait_thread_count++;
914 }
915
916 static void
add_signal_semaphores_to_wait_list(struct v3dv_device * device,const VkSubmitInfo * pSubmit,struct v3dv_queue_submit_wait_info * wait_info)917 add_signal_semaphores_to_wait_list(struct v3dv_device *device,
918 const VkSubmitInfo *pSubmit,
919 struct v3dv_queue_submit_wait_info *wait_info)
920 {
921 assert(wait_info);
922
923 if (pSubmit->signalSemaphoreCount == 0)
924 return;
925
926 /* FIXME: We put all the semaphores in a list and we signal all of them
927 * together from the submit master thread when the last wait thread in the
928 * submit completes. We could do better though: group the semaphores per
929 * submit and signal them as soon as all wait threads for a particular
930 * submit completes. Not sure if the extra work would be worth it though,
931 * since we only spawn waith threads for event waits and only when the
932 * event if set from the host after the queue submission.
933 */
934
935 /* Check the size of the current semaphore list */
936 const uint32_t prev_count = wait_info->signal_semaphore_count;
937 const uint32_t prev_alloc_size = prev_count * sizeof(VkSemaphore);
938 VkSemaphore *prev_list = wait_info->signal_semaphores;
939
940 /* Resize the list to hold the additional semaphores */
941 const uint32_t extra_alloc_size =
942 pSubmit->signalSemaphoreCount * sizeof(VkSemaphore);
943 wait_info->signal_semaphore_count += pSubmit->signalSemaphoreCount;
944 wait_info->signal_semaphores =
945 vk_alloc(&device->alloc, prev_alloc_size + extra_alloc_size, 8,
946 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
947
948 /* Copy the old list to the new allocation and free the old list */
949 if (prev_count > 0) {
950 memcpy(wait_info->signal_semaphores, prev_list, prev_alloc_size);
951 vk_free(&device->alloc, prev_list);
952 }
953
954 /* Add the new semaphores to the list */
955 memcpy(wait_info->signal_semaphores + prev_count,
956 pSubmit->pSignalSemaphores, extra_alloc_size);
957 }
958
959 static VkResult
queue_submit_cmd_buffer_batch(struct v3dv_queue * queue,const VkSubmitInfo * pSubmit,struct v3dv_queue_submit_wait_info ** wait_info)960 queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,
961 const VkSubmitInfo *pSubmit,
962 struct v3dv_queue_submit_wait_info **wait_info)
963 {
964 VkResult result = VK_SUCCESS;
965 bool has_wait_threads = false;
966
967 /* Even if we don't have any actual work to submit we still need to wait
968 * on the wait semaphores and signal the signal semaphores and fence, so
969 * in this scenario we just submit a trivial no-op job so we don't have
970 * to do anything special, it should not be a common case anyway.
971 */
972 if (pSubmit->commandBufferCount == 0) {
973 result = queue_submit_noop_job(queue, pSubmit);
974 } else {
975 for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {
976 pthread_t wait_thread;
977 struct v3dv_cmd_buffer *cmd_buffer =
978 v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);
979 result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit,
980 &wait_thread);
981
982 /* We get VK_NOT_READY if we had to spawn a wait thread for the
983 * command buffer. In that scenario, we want to continue submitting
984 * any pending command buffers in the batch, but we don't want to
985 * process any signal semaphores for the batch until we know we have
986 * submitted every job for every command buffer in the batch.
987 */
988 if (result == VK_NOT_READY) {
989 result = VK_SUCCESS;
990 add_wait_thread_to_list(queue->device, wait_thread, wait_info);
991 has_wait_threads = true;
992 }
993
994 if (result != VK_SUCCESS)
995 break;
996 }
997 }
998
999 if (result != VK_SUCCESS)
1000 return result;
1001
1002 /* If had to emit any wait threads in this submit we need to wait for all
1003 * of them to complete before we can signal any semaphores.
1004 */
1005 if (!has_wait_threads) {
1006 return process_semaphores_to_signal(queue->device,
1007 pSubmit->signalSemaphoreCount,
1008 pSubmit->pSignalSemaphores);
1009 } else {
1010 assert(*wait_info);
1011 add_signal_semaphores_to_wait_list(queue->device, pSubmit, *wait_info);
1012 return VK_NOT_READY;
1013 }
1014 }
1015
1016 static void *
master_wait_thread_func(void * _wait_info)1017 master_wait_thread_func(void *_wait_info)
1018 {
1019 struct v3dv_queue_submit_wait_info *wait_info =
1020 (struct v3dv_queue_submit_wait_info *) _wait_info;
1021
1022 struct v3dv_queue *queue = &wait_info->device->queue;
1023
1024 /* Wait for all command buffer wait threads to complete */
1025 for (uint32_t i = 0; i < wait_info->wait_thread_count; i++) {
1026 int res = pthread_join(wait_info->wait_threads[i].thread, NULL);
1027 if (res != 0)
1028 fprintf(stderr, "Wait thread failed to join.\n");
1029 }
1030
1031 /* Signal semaphores and fences */
1032 VkResult result;
1033 result = process_semaphores_to_signal(wait_info->device,
1034 wait_info->signal_semaphore_count,
1035 wait_info->signal_semaphores);
1036 if (result != VK_SUCCESS)
1037 fprintf(stderr, "Wait thread semaphore signaling failed.");
1038
1039 result = process_fence_to_signal(wait_info->device, wait_info->fence);
1040 if (result != VK_SUCCESS)
1041 fprintf(stderr, "Wait thread fence signaling failed.");
1042
1043 /* Release wait_info */
1044 mtx_lock(&queue->mutex);
1045 list_del(&wait_info->list_link);
1046 mtx_unlock(&queue->mutex);
1047
1048 vk_free(&wait_info->device->alloc, wait_info->signal_semaphores);
1049 vk_free(&wait_info->device->alloc, wait_info);
1050
1051 return NULL;
1052 }
1053
1054
1055 static VkResult
spawn_master_wait_thread(struct v3dv_queue * queue,struct v3dv_queue_submit_wait_info * wait_info)1056 spawn_master_wait_thread(struct v3dv_queue *queue,
1057 struct v3dv_queue_submit_wait_info *wait_info)
1058
1059 {
1060 VkResult result = VK_SUCCESS;
1061
1062 mtx_lock(&queue->mutex);
1063 if (pthread_create(&wait_info->master_wait_thread, NULL,
1064 master_wait_thread_func, wait_info)) {
1065 result = vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
1066 goto done;
1067 }
1068
1069 list_addtail(&wait_info->list_link, &queue->submit_wait_list);
1070
1071 done:
1072 mtx_unlock(&queue->mutex);
1073 return result;
1074 }
1075
1076 VkResult
v3dv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)1077 v3dv_QueueSubmit(VkQueue _queue,
1078 uint32_t submitCount,
1079 const VkSubmitInfo* pSubmits,
1080 VkFence fence)
1081 {
1082 V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1083
1084 struct v3dv_queue_submit_wait_info *wait_info = NULL;
1085
1086 VkResult result = VK_SUCCESS;
1087 for (uint32_t i = 0; i < submitCount; i++) {
1088 result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], &wait_info);
1089 if (result != VK_SUCCESS && result != VK_NOT_READY)
1090 goto done;
1091 }
1092
1093 if (!wait_info) {
1094 assert(result != VK_NOT_READY);
1095 result = process_fence_to_signal(queue->device, fence);
1096 goto done;
1097 }
1098
1099 /* We emitted wait threads, so we have to spwan a master thread for this
1100 * queue submission that waits for all other threads to complete and then
1101 * will signal any semaphores and fences.
1102 */
1103 assert(wait_info);
1104 wait_info->fence = fence;
1105 result = spawn_master_wait_thread(queue, wait_info);
1106
1107 done:
1108 return result;
1109 }
1110
1111 VkResult
v3dv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)1112 v3dv_CreateSemaphore(VkDevice _device,
1113 const VkSemaphoreCreateInfo *pCreateInfo,
1114 const VkAllocationCallbacks *pAllocator,
1115 VkSemaphore *pSemaphore)
1116 {
1117 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1118
1119 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
1120
1121 struct v3dv_semaphore *sem =
1122 vk_alloc2(&device->alloc, pAllocator, sizeof(struct v3dv_semaphore), 8,
1123 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1124 if (sem == NULL)
1125 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1126
1127 sem->fd = -1;
1128
1129 int ret = drmSyncobjCreate(device->render_fd, 0, &sem->sync);
1130 if (ret) {
1131 vk_free2(&device->alloc, pAllocator, sem);
1132 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1133 }
1134
1135 *pSemaphore = v3dv_semaphore_to_handle(sem);
1136
1137 return VK_SUCCESS;
1138 }
1139
1140 void
v3dv_DestroySemaphore(VkDevice _device,VkSemaphore semaphore,const VkAllocationCallbacks * pAllocator)1141 v3dv_DestroySemaphore(VkDevice _device,
1142 VkSemaphore semaphore,
1143 const VkAllocationCallbacks *pAllocator)
1144 {
1145 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1146 V3DV_FROM_HANDLE(v3dv_semaphore, sem, semaphore);
1147
1148 if (sem == NULL)
1149 return;
1150
1151 drmSyncobjDestroy(device->render_fd, sem->sync);
1152
1153 if (sem->fd != -1)
1154 close(sem->fd);
1155
1156 vk_free2(&device->alloc, pAllocator, sem);
1157 }
1158
1159 VkResult
v3dv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1160 v3dv_CreateFence(VkDevice _device,
1161 const VkFenceCreateInfo *pCreateInfo,
1162 const VkAllocationCallbacks *pAllocator,
1163 VkFence *pFence)
1164 {
1165 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1166
1167 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1168
1169 struct v3dv_fence *fence =
1170 vk_alloc2(&device->alloc, pAllocator, sizeof(struct v3dv_fence), 8,
1171 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1172 if (fence == NULL)
1173 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1174
1175 unsigned flags = 0;
1176 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1177 flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1178 int ret = drmSyncobjCreate(device->render_fd, flags, &fence->sync);
1179 if (ret) {
1180 vk_free2(&device->alloc, pAllocator, fence);
1181 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1182 }
1183
1184 fence->fd = -1;
1185
1186 *pFence = v3dv_fence_to_handle(fence);
1187
1188 return VK_SUCCESS;
1189 }
1190
1191 void
v3dv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)1192 v3dv_DestroyFence(VkDevice _device,
1193 VkFence _fence,
1194 const VkAllocationCallbacks *pAllocator)
1195 {
1196 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1197 V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1198
1199 if (fence == NULL)
1200 return;
1201
1202 drmSyncobjDestroy(device->render_fd, fence->sync);
1203
1204 if (fence->fd != -1)
1205 close(fence->fd);
1206
1207 vk_free2(&device->alloc, pAllocator, fence);
1208 }
1209
1210 VkResult
v3dv_GetFenceStatus(VkDevice _device,VkFence _fence)1211 v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)
1212 {
1213 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1214 V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1215
1216 int ret = drmSyncobjWait(device->render_fd, &fence->sync, 1,
1217 0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
1218 if (ret == -ETIME)
1219 return VK_NOT_READY;
1220 else if (ret)
1221 return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1222 return VK_SUCCESS;
1223 }
1224
1225 VkResult
v3dv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1226 v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1227 {
1228 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1229
1230 uint32_t *syncobjs = vk_alloc(&device->alloc,
1231 sizeof(*syncobjs) * fenceCount, 8,
1232 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1233 if (!syncobjs)
1234 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1235
1236 for (uint32_t i = 0; i < fenceCount; i++) {
1237 struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1238 syncobjs[i] = fence->sync;
1239 }
1240
1241 int ret = drmSyncobjReset(device->render_fd, syncobjs, fenceCount);
1242
1243 vk_free(&device->alloc, syncobjs);
1244
1245 if (ret)
1246 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1247 return VK_SUCCESS;
1248 }
1249
1250 VkResult
v3dv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1251 v3dv_WaitForFences(VkDevice _device,
1252 uint32_t fenceCount,
1253 const VkFence *pFences,
1254 VkBool32 waitAll,
1255 uint64_t timeout)
1256 {
1257 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1258
1259 const uint64_t abs_timeout = get_absolute_timeout(timeout);
1260
1261 uint32_t *syncobjs = vk_alloc(&device->alloc,
1262 sizeof(*syncobjs) * fenceCount, 8,
1263 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1264 if (!syncobjs)
1265 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1266
1267 for (uint32_t i = 0; i < fenceCount; i++) {
1268 struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1269 syncobjs[i] = fence->sync;
1270 }
1271
1272 unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
1273 if (waitAll)
1274 flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1275
1276 int ret;
1277 do {
1278 ret = drmSyncobjWait(device->render_fd, syncobjs, fenceCount,
1279 timeout, flags, NULL);
1280 } while (ret == -ETIME && gettime_ns() < abs_timeout);
1281
1282 vk_free(&device->alloc, syncobjs);
1283
1284 if (ret == -ETIME)
1285 return VK_TIMEOUT;
1286 else if (ret)
1287 return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1288 return VK_SUCCESS;
1289 }
1290
1291 VkResult
v3dv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)1292 v3dv_QueueBindSparse(VkQueue _queue,
1293 uint32_t bindInfoCount,
1294 const VkBindSparseInfo *pBindInfo,
1295 VkFence fence)
1296 {
1297 V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1298 return vk_error(queue->device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1299 }
1300