1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "drm-uapi/v3d_drm.h"
26 
27 #include "broadcom/clif/clif_dump.h"
28 
29 #include <errno.h>
30 #include <time.h>
31 
32 static void
v3dv_clif_dump(struct v3dv_device * device,struct v3dv_job * job,struct drm_v3d_submit_cl * submit)33 v3dv_clif_dump(struct v3dv_device *device,
34                struct v3dv_job *job,
35                struct drm_v3d_submit_cl *submit)
36 {
37    if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
38       return;
39 
40    struct clif_dump *clif = clif_dump_init(&device->devinfo,
41                                            stderr,
42                                            V3D_DEBUG & V3D_DEBUG_CL);
43 
44    set_foreach(job->bos, entry) {
45       struct v3dv_bo *bo = (void *)entry->key;
46       char *name = ralloc_asprintf(NULL, "%s_0x%x",
47                                    bo->name, bo->offset);
48 
49       v3dv_bo_map(device, bo, bo->size);
50       clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
51 
52       ralloc_free(name);
53    }
54 
55    clif_dump(clif, submit);
56 
57    clif_dump_destroy(clif);
58 }
59 
60 static uint64_t
gettime_ns()61 gettime_ns()
62 {
63    struct timespec current;
64    clock_gettime(CLOCK_MONOTONIC, &current);
65    return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
66 }
67 
68 static uint64_t
get_absolute_timeout(uint64_t timeout)69 get_absolute_timeout(uint64_t timeout)
70 {
71    uint64_t current_time = gettime_ns();
72    uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
73 
74    timeout = MIN2(max_timeout, timeout);
75 
76    return (current_time + timeout);
77 }
78 
79 static VkResult
80 queue_submit_job(struct v3dv_queue *queue,
81                  struct v3dv_job *job,
82                  bool do_sem_wait,
83                  pthread_t *wait_thread);
84 
85 /* Waits for active CPU wait threads spawned before the current thread to
86  * complete and submit all their GPU jobs.
87  */
88 static void
cpu_queue_wait_idle(struct v3dv_queue * queue)89 cpu_queue_wait_idle(struct v3dv_queue *queue)
90 {
91    const pthread_t this_thread = pthread_self();
92 
93 retry:
94    mtx_lock(&queue->mutex);
95    list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
96                        &queue->submit_wait_list, list_link) {
97       for (uint32_t  i = 0; i < info->wait_thread_count; i++) {
98          if (info->wait_threads[i].finished)
99             continue;
100 
101          /* Because we are testing this against the list of spawned threads
102           * it will never match for the main thread, so when we call this from
103           * the main thread we are effectively waiting for all active threads
104           * to complete, and otherwise we are only waiting for work submitted
105           * before the wait thread that called this (a wait thread should never
106           * be waiting for work submitted after it).
107           */
108          if (info->wait_threads[i].thread == this_thread)
109             goto done;
110 
111          /* Wait and try again */
112          mtx_unlock(&queue->mutex);
113          usleep(500); /* 0.5 ms */
114          goto retry;
115       }
116    }
117 
118 done:
119    mtx_unlock(&queue->mutex);
120 }
121 
122 static VkResult
gpu_queue_wait_idle(struct v3dv_queue * queue)123 gpu_queue_wait_idle(struct v3dv_queue *queue)
124 {
125    struct v3dv_device *device = queue->device;
126 
127    mtx_lock(&device->mutex);
128    uint32_t last_job_sync = device->last_job_sync;
129    mtx_unlock(&device->mutex);
130 
131    int ret = drmSyncobjWait(device->render_fd,
132                             &last_job_sync, 1, INT64_MAX, 0, NULL);
133    if (ret)
134       return VK_ERROR_DEVICE_LOST;
135 
136    return VK_SUCCESS;
137 }
138 
139 VkResult
v3dv_QueueWaitIdle(VkQueue _queue)140 v3dv_QueueWaitIdle(VkQueue _queue)
141 {
142    V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
143 
144    /* Check that we don't have any wait threads running in the CPU first,
145     * as these can spawn new GPU jobs.
146     */
147    cpu_queue_wait_idle(queue);
148 
149    /* Check we don't have any GPU jobs running */
150    return gpu_queue_wait_idle(queue);
151 }
152 
153 static VkResult
handle_reset_query_cpu_job(struct v3dv_job * job)154 handle_reset_query_cpu_job(struct v3dv_job *job)
155 {
156    /* We are about to reset query counters so we need to make sure that
157     * The GPU is not using them. The exception is timestamp queries, since
158     * we handle those in the CPU.
159     *
160     * FIXME: we could avoid blocking the main thread for this if we use
161     *        submission thread.
162     */
163    struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
164    assert(info->pool);
165 
166    if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
167       VkResult result = gpu_queue_wait_idle(&job->device->queue);
168       if (result != VK_SUCCESS)
169          return result;
170    }
171 
172    for (uint32_t i = info->first; i < info->first + info->count; i++) {
173       assert(i < info->pool->query_count);
174       struct v3dv_query *query = &info->pool->queries[i];
175       query->maybe_available = false;
176       switch (info->pool->query_type) {
177       case VK_QUERY_TYPE_OCCLUSION: {
178          uint32_t *counter = (uint32_t *) query->bo->map;
179          *counter = 0;
180          break;
181       }
182       case VK_QUERY_TYPE_TIMESTAMP:
183          query->value = 0;
184          break;
185       default:
186          unreachable("Unsupported query type");
187       }
188    }
189 
190    return VK_SUCCESS;
191 }
192 
193 static VkResult
handle_end_query_cpu_job(struct v3dv_job * job)194 handle_end_query_cpu_job(struct v3dv_job *job)
195 {
196    struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
197    assert(info->query < info->pool->query_count);
198    struct v3dv_query *query = &info->pool->queries[info->query];
199    query->maybe_available = true;
200 
201    return VK_SUCCESS;
202 }
203 
204 static VkResult
handle_copy_query_results_cpu_job(struct v3dv_job * job)205 handle_copy_query_results_cpu_job(struct v3dv_job *job)
206 {
207    struct v3dv_copy_query_results_cpu_job_info *info =
208       &job->cpu.query_copy_results;
209 
210    assert(info->dst && info->dst->mem && info->dst->mem->bo);
211    struct v3dv_bo *bo = info->dst->mem->bo;
212 
213    /* Map the entire dst buffer for the CPU copy if needed */
214    assert(!bo->map || bo->map_size == bo->size);
215    if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
216       return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
217 
218    /* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a
219     * sync wait on the CPU for the corresponding GPU jobs to finish. We might
220     * want to use a submission thread to avoid blocking on the main thread.
221     */
222    v3dv_get_query_pool_results_cpu(job->device,
223                                    info->pool,
224                                    info->first,
225                                    info->count,
226                                    bo->map + info->dst->mem_offset,
227                                    info->stride,
228                                    info->flags);
229 
230    return VK_SUCCESS;
231 }
232 
233 static VkResult
handle_set_event_cpu_job(struct v3dv_job * job,bool is_wait_thread)234 handle_set_event_cpu_job(struct v3dv_job *job, bool is_wait_thread)
235 {
236    /* From the Vulkan 1.0 spec:
237     *
238     *    "When vkCmdSetEvent is submitted to a queue, it defines an execution
239     *     dependency on commands that were submitted before it, and defines an
240     *     event signal operation which sets the event to the signaled state.
241     *     The first synchronization scope includes every command previously
242     *     submitted to the same queue, including those in the same command
243     *     buffer and batch".
244     *
245     * So we should wait for all prior work to be completed before signaling
246     * the event, this includes all active CPU wait threads spawned for any
247     * command buffer submitted *before* this.
248     *
249     * FIXME: we could avoid blocking the main thread for this if we use a
250     *        submission thread.
251     */
252 
253    /* If we are calling this from a wait thread it will only wait
254     * wait threads sspawned before it, otherwise it will wait for
255     * all active threads to complete.
256     */
257    cpu_queue_wait_idle(&job->device->queue);
258 
259    VkResult result = gpu_queue_wait_idle(&job->device->queue);
260    if (result != VK_SUCCESS)
261       return result;
262 
263    struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
264    p_atomic_set(&info->event->state, info->state);
265 
266    return VK_SUCCESS;
267 }
268 
269 static bool
check_wait_events_complete(struct v3dv_job * job)270 check_wait_events_complete(struct v3dv_job *job)
271 {
272    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
273 
274    struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
275    for (uint32_t i = 0; i < info->event_count; i++) {
276       if (!p_atomic_read(&info->events[i]->state))
277          return false;
278    }
279    return true;
280 }
281 
282 static void
wait_thread_finish(struct v3dv_queue * queue,pthread_t thread)283 wait_thread_finish(struct v3dv_queue *queue, pthread_t thread)
284 {
285    mtx_lock(&queue->mutex);
286    list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
287                        &queue->submit_wait_list, list_link) {
288       for (uint32_t  i = 0; i < info->wait_thread_count; i++) {
289          if (info->wait_threads[i].thread == thread) {
290             info->wait_threads[i].finished = true;
291             goto done;
292          }
293       }
294    }
295 
296    unreachable(!"Failed to finish wait thread: not found");
297 
298 done:
299    mtx_unlock(&queue->mutex);
300 }
301 
302 static void *
event_wait_thread_func(void * _job)303 event_wait_thread_func(void *_job)
304 {
305    struct v3dv_job *job = (struct v3dv_job *) _job;
306    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
307    struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
308 
309    /* Wait for events to be signaled */
310    const useconds_t wait_interval_ms = 1;
311    while (!check_wait_events_complete(job))
312       usleep(wait_interval_ms * 1000);
313 
314    /* Now continue submitting pending jobs for the same command buffer after
315     * the wait job.
316     */
317    struct v3dv_queue *queue = &job->device->queue;
318    list_for_each_entry_from(struct v3dv_job, pjob, job->list_link.next,
319                             &job->cmd_buffer->jobs, list_link) {
320       /* We don't want to spawn more than one wait thread per command buffer.
321        * If this job also requires a wait for events, we will do the wait here.
322        */
323       VkResult result = queue_submit_job(queue, pjob, info->sem_wait, NULL);
324       if (result == VK_NOT_READY) {
325          while (!check_wait_events_complete(pjob)) {
326             usleep(wait_interval_ms * 1000);
327          }
328          result = VK_SUCCESS;
329       }
330 
331       if (result != VK_SUCCESS) {
332          fprintf(stderr, "Wait thread job execution failed.\n");
333          goto done;
334       }
335    }
336 
337 done:
338    wait_thread_finish(queue, pthread_self());
339    return NULL;
340 }
341 
342 static VkResult
spawn_event_wait_thread(struct v3dv_job * job,pthread_t * wait_thread)343 spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread)
344 
345 {
346    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
347    assert(job->cmd_buffer);
348    assert(wait_thread != NULL);
349 
350    if (pthread_create(wait_thread, NULL, event_wait_thread_func, job))
351       return vk_error(job->device->instance, VK_ERROR_DEVICE_LOST);
352 
353    return VK_NOT_READY;
354 }
355 
356 static VkResult
handle_wait_events_cpu_job(struct v3dv_job * job,bool sem_wait,pthread_t * wait_thread)357 handle_wait_events_cpu_job(struct v3dv_job *job,
358                            bool sem_wait,
359                            pthread_t *wait_thread)
360 {
361    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
362    struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
363 
364    /* If all events are signaled then we are done and can continue submitting
365     * the rest of the command buffer normally.
366     */
367    if (check_wait_events_complete(job))
368       return VK_SUCCESS;
369 
370    /* Otherwise, we put the rest of the command buffer on a wait thread until
371     * all events are signaled. We only spawn a new thread on the first
372     * wait job we see for a command buffer, any additional wait jobs in the
373     * same command buffer will run in that same wait thread and will get here
374     * with a NULL wait_thread pointer.
375     *
376     * Also, whether we spawn a wait thread or not, we always return
377     * VK_NOT_READY (unless an error happened), so we stop trying to submit
378     * any jobs in the same command buffer after the wait job. The wait thread
379     * will attempt to submit them after the wait completes.
380     */
381    info->sem_wait = sem_wait;
382    if (wait_thread)
383       return spawn_event_wait_thread(job, wait_thread);
384    else
385       return VK_NOT_READY;
386 }
387 
388 static VkResult
handle_copy_buffer_to_image_cpu_job(struct v3dv_job * job)389 handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
390 {
391    assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
392    struct v3dv_copy_buffer_to_image_cpu_job_info *info =
393       &job->cpu.copy_buffer_to_image;
394 
395    /* Wait for all GPU work to finish first, since we may be accessing
396     * the BOs involved in the operation.
397     */
398    v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
399 
400    /* Map BOs */
401    struct v3dv_bo *dst_bo = info->image->mem->bo;
402    assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
403    if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
404       return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
405    void *dst_ptr = dst_bo->map;
406 
407    struct v3dv_bo *src_bo = info->buffer->mem->bo;
408    assert(!src_bo->map || src_bo->map_size == src_bo->size);
409    if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
410       return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
411    void *src_ptr = src_bo->map;
412 
413    const struct v3d_resource_slice *slice =
414       &info->image->slices[info->mip_level];
415 
416    const struct pipe_box box = {
417       info->image_offset.x, info->image_offset.y, info->base_layer,
418       info->image_extent.width, info->image_extent.height, info->layer_count,
419    };
420 
421    /* Copy each layer */
422    for (uint32_t i = 0; i < info->layer_count; i++) {
423       const uint32_t dst_offset =
424          v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);
425       const uint32_t src_offset =
426          info->buffer->mem_offset + info->buffer_offset +
427          info->buffer_layer_stride * i;
428       v3d_store_tiled_image(
429          dst_ptr + dst_offset, slice->stride,
430          src_ptr + src_offset, info->buffer_stride,
431          slice->tiling, info->image->cpp, slice->padded_height, &box);
432    }
433 
434    return VK_SUCCESS;
435 }
436 
437 static VkResult
handle_timestamp_query_cpu_job(struct v3dv_job * job)438 handle_timestamp_query_cpu_job(struct v3dv_job *job)
439 {
440    assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
441    struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
442 
443    /* Wait for completion of all work queued before the timestamp query */
444    v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
445 
446    /* Compute timestamp */
447    struct timespec t;
448    clock_gettime(CLOCK_MONOTONIC, &t);
449    assert(info->query < info->pool->query_count);
450    struct v3dv_query *query = &info->pool->queries[info->query];
451    query->maybe_available = true;
452    query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
453 
454    return VK_SUCCESS;
455 }
456 
457 static VkResult
458 handle_csd_job(struct v3dv_queue *queue,
459                struct v3dv_job *job,
460                bool do_sem_wait);
461 
462 static VkResult
handle_csd_indirect_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)463 handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
464                             struct v3dv_job *job,
465                             bool do_sem_wait)
466 {
467    assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
468    struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;
469    assert(info->csd_job);
470 
471    /* Make sure the GPU is no longer using the indirect buffer*/
472    assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
473    const uint64_t infinite = 0xffffffffffffffffull;
474    v3dv_bo_wait(queue->device, info->buffer->mem->bo, infinite);
475 
476    /* Map the indirect buffer and read the dispatch parameters */
477    assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
478    struct v3dv_bo *bo = info->buffer->mem->bo;
479    if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
480       return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
481    assert(bo->map);
482 
483    const uint32_t offset = info->buffer->mem_offset + info->offset;
484    const uint32_t *group_counts = (uint32_t *) (bo->map + offset);
485    if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)
486       return VK_SUCCESS;
487 
488    if (memcmp(group_counts, info->csd_job->csd.wg_count,
489               sizeof(info->csd_job->csd.wg_count)) != 0) {
490       v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
491    }
492 
493    handle_csd_job(queue, info->csd_job, do_sem_wait);
494 
495    return VK_SUCCESS;
496 }
497 
498 static VkResult
process_semaphores_to_signal(struct v3dv_device * device,uint32_t count,const VkSemaphore * sems)499 process_semaphores_to_signal(struct v3dv_device *device,
500                              uint32_t count, const VkSemaphore *sems)
501 {
502    if (count == 0)
503       return VK_SUCCESS;
504 
505    int fd;
506    mtx_lock(&device->mutex);
507    drmSyncobjExportSyncFile(device->render_fd, device->last_job_sync, &fd);
508    mtx_unlock(&device->mutex);
509    if (fd == -1)
510       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
511 
512    for (uint32_t i = 0; i < count; i++) {
513       struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);
514 
515       if (sem->fd >= 0)
516          close(sem->fd);
517       sem->fd = -1;
518 
519       int ret = drmSyncobjImportSyncFile(device->render_fd, sem->sync, fd);
520       if (ret)
521          return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
522 
523       sem->fd = fd;
524    }
525 
526    return VK_SUCCESS;
527 }
528 
529 static VkResult
process_fence_to_signal(struct v3dv_device * device,VkFence _fence)530 process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
531 {
532    if (_fence == VK_NULL_HANDLE)
533       return VK_SUCCESS;
534 
535    struct v3dv_fence *fence = v3dv_fence_from_handle(_fence);
536 
537    if (fence->fd >= 0)
538       close(fence->fd);
539    fence->fd = -1;
540 
541    int fd;
542    mtx_lock(&device->mutex);
543    drmSyncobjExportSyncFile(device->render_fd, device->last_job_sync, &fd);
544    mtx_unlock(&device->mutex);
545    if (fd == -1)
546       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
547 
548    int ret = drmSyncobjImportSyncFile(device->render_fd, fence->sync, fd);
549    if (ret)
550       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
551 
552    fence->fd = fd;
553 
554    return VK_SUCCESS;
555 }
556 
557 static VkResult
handle_cl_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)558 handle_cl_job(struct v3dv_queue *queue,
559               struct v3dv_job *job,
560               bool do_sem_wait)
561 {
562    struct v3dv_device *device = queue->device;
563 
564    struct drm_v3d_submit_cl submit;
565 
566    /* Sanity check: we should only flag a bcl sync on a job that needs to be
567     * serialized.
568     */
569    assert(job->serialize || !job->needs_bcl_sync);
570 
571    /* We expect to have just one RCL per job which should fit in just one BO.
572     * Our BCL, could chain multiple BOS together though.
573     */
574    assert(list_length(&job->rcl.bo_list) == 1);
575    assert(list_length(&job->bcl.bo_list) >= 1);
576    struct v3dv_bo *bcl_fist_bo =
577       list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);
578    submit.bcl_start = bcl_fist_bo->offset;
579    submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
580    submit.rcl_start = job->rcl.bo->offset;
581    submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
582 
583    submit.qma = job->tile_alloc->offset;
584    submit.qms = job->tile_alloc->size;
585    submit.qts = job->tile_state->offset;
586 
587    /* FIXME: we already know that we support cache flush, as we only support
588     * hw that supports that, but would be better to just DRM-ask it
589     */
590    submit.flags = 0;
591    if (job->tmu_dirty_rcl)
592       submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
593 
594    submit.bo_handle_count = job->bo_count;
595    uint32_t *bo_handles =
596       (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit.bo_handle_count * 2));
597    uint32_t bo_idx = 0;
598    set_foreach(job->bos, entry) {
599       struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
600       bo_handles[bo_idx++] = bo->handle;
601    }
602    assert(bo_idx == submit.bo_handle_count);
603    submit.bo_handles = (uintptr_t)(void *)bo_handles;
604 
605    /* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or
606     * if the job comes after a pipeline barrier than involves geometry stages
607     * (needs_bcl_sync).
608     *
609     * We need a render sync if the job doesn't need a binning sync but has
610     * still been flagged for serialization. It should be noted that RCL jobs
611     * don't start until the previous RCL job has finished so we don't really
612     * need to add a fence for those, however, we might need to wait on a CSD or
613     * TFU job, which are not automatically serialized with CL jobs.
614     *
615     * FIXME: for now, if we are asked to wait on any semaphores, we just wait
616     * on the last job we submitted. In the future we might want to pass the
617     * actual syncobj of the wait semaphores so we don't block on the last RCL
618     * if we only need to wait for a previous CSD or TFU, for example, but
619     * we would have to extend our kernel interface to support the case where
620     * we have more than one semaphore to wait on.
621     */
622    const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync;
623    const bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
624 
625    mtx_lock(&queue->device->mutex);
626    submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;
627    submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;
628    submit.out_sync = device->last_job_sync;
629    v3dv_clif_dump(device, job, &submit);
630    int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_CL, &submit);
631    mtx_unlock(&queue->device->mutex);
632 
633    static bool warned = false;
634    if (ret && !warned) {
635       fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
636               strerror(errno));
637       warned = true;
638    }
639 
640    free(bo_handles);
641 
642    if (ret)
643       return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
644 
645    return VK_SUCCESS;
646 }
647 
648 static VkResult
handle_tfu_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)649 handle_tfu_job(struct v3dv_queue *queue,
650                struct v3dv_job *job,
651                bool do_sem_wait)
652 {
653    struct v3dv_device *device = queue->device;
654 
655    const bool needs_sync = do_sem_wait || job->serialize;
656 
657    mtx_lock(&device->mutex);
658    job->tfu.in_sync = needs_sync ? device->last_job_sync : 0;
659    job->tfu.out_sync = device->last_job_sync;
660    int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
661    mtx_unlock(&device->mutex);
662 
663    if (ret != 0) {
664       fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
665       return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
666    }
667 
668    return VK_SUCCESS;
669 }
670 
671 static VkResult
handle_csd_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)672 handle_csd_job(struct v3dv_queue *queue,
673                struct v3dv_job *job,
674                bool do_sem_wait)
675 {
676    struct v3dv_device *device = queue->device;
677 
678    struct drm_v3d_submit_csd *submit = &job->csd.submit;
679 
680    submit->bo_handle_count = job->bo_count;
681    uint32_t *bo_handles =
682       (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
683    uint32_t bo_idx = 0;
684    set_foreach(job->bos, entry) {
685       struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
686       bo_handles[bo_idx++] = bo->handle;
687    }
688    assert(bo_idx == submit->bo_handle_count);
689    submit->bo_handles = (uintptr_t)(void *)bo_handles;
690 
691    const bool needs_sync = do_sem_wait || job->serialize;
692 
693    mtx_lock(&queue->device->mutex);
694    submit->in_sync = needs_sync ? device->last_job_sync : 0;
695    submit->out_sync = device->last_job_sync;
696    int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_CSD, submit);
697    mtx_unlock(&queue->device->mutex);
698 
699    static bool warned = false;
700    if (ret && !warned) {
701       fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
702               strerror(errno));
703       warned = true;
704    }
705 
706    free(bo_handles);
707 
708    if (ret)
709       return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
710 
711    return VK_SUCCESS;
712 }
713 
714 static VkResult
queue_submit_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait,pthread_t * wait_thread)715 queue_submit_job(struct v3dv_queue *queue,
716                  struct v3dv_job *job,
717                  bool do_sem_wait,
718                  pthread_t *wait_thread)
719 {
720    assert(job);
721 
722    switch (job->type) {
723    case V3DV_JOB_TYPE_GPU_CL:
724       return handle_cl_job(queue, job, do_sem_wait);
725    case V3DV_JOB_TYPE_GPU_TFU:
726       return handle_tfu_job(queue, job, do_sem_wait);
727    case V3DV_JOB_TYPE_GPU_CSD:
728       return handle_csd_job(queue, job, do_sem_wait);
729    case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
730       return handle_reset_query_cpu_job(job);
731    case V3DV_JOB_TYPE_CPU_END_QUERY:
732       return handle_end_query_cpu_job(job);
733    case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
734       return handle_copy_query_results_cpu_job(job);
735    case V3DV_JOB_TYPE_CPU_SET_EVENT:
736       return handle_set_event_cpu_job(job, wait_thread != NULL);
737    case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
738       return handle_wait_events_cpu_job(job, do_sem_wait, wait_thread);
739    case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
740       return handle_copy_buffer_to_image_cpu_job(job);
741    case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
742       return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
743    case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
744       return handle_timestamp_query_cpu_job(job);
745    default:
746       unreachable("Unhandled job type");
747    }
748 }
749 
750 static void
emit_noop_bin(struct v3dv_job * job)751 emit_noop_bin(struct v3dv_job *job)
752 {
753    v3dv_job_start_frame(job, 1, 1, 1, 1, V3D_INTERNAL_BPP_32, false);
754    v3dv_job_emit_binning_flush(job);
755 }
756 
757 static void
emit_noop_render(struct v3dv_job * job)758 emit_noop_render(struct v3dv_job *job)
759 {
760    struct v3dv_cl *rcl = &job->rcl;
761    v3dv_cl_ensure_space_with_branch(rcl, 200 + 1 * 256 *
762                                     cl_packet_length(SUPERTILE_COORDINATES));
763 
764    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
765       config.early_z_disable = true;
766       config.image_width_pixels = 1;
767       config.image_height_pixels = 1;
768       config.number_of_render_targets = 1;
769       config.multisample_mode_4x = false;
770       config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
771    }
772 
773    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
774       rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
775       rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
776       rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
777    }
778 
779    cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
780       clear.z_clear_value = 1.0f;
781       clear.stencil_clear_value = 0;
782    };
783 
784    cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
785       init.use_auto_chained_tile_lists = true;
786       init.size_of_first_block_in_chained_tile_lists =
787          TILE_ALLOCATION_BLOCK_SIZE_64B;
788    }
789 
790    cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
791       list.address = v3dv_cl_address(job->tile_alloc, 0);
792    }
793 
794    cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
795       config.number_of_bin_tile_lists = 1;
796       config.total_frame_width_in_tiles = 1;
797       config.total_frame_height_in_tiles = 1;
798       config.supertile_width_in_tiles = 1;
799       config.supertile_height_in_tiles = 1;
800       config.total_frame_width_in_supertiles = 1;
801       config.total_frame_height_in_supertiles = 1;
802    }
803 
804    struct v3dv_cl *icl = &job->indirect;
805    v3dv_cl_ensure_space(icl, 200, 1);
806    struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(icl);
807 
808    cl_emit(icl, TILE_COORDINATES_IMPLICIT, coords);
809 
810    cl_emit(icl, END_OF_LOADS, end);
811 
812    cl_emit(icl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
813 
814    cl_emit(icl, STORE_TILE_BUFFER_GENERAL, store) {
815       store.buffer_to_store = NONE;
816    }
817 
818    cl_emit(icl, END_OF_TILE_MARKER, end);
819 
820    cl_emit(icl, RETURN_FROM_SUB_LIST, ret);
821 
822    cl_emit(rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
823       branch.start = tile_list_start;
824       branch.end = v3dv_cl_get_address(icl);
825    }
826 
827    cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
828       coords.column_number_in_supertiles = 0;
829       coords.row_number_in_supertiles = 0;
830    }
831 
832    cl_emit(rcl, END_OF_RENDERING, end);
833 }
834 
835 static VkResult
queue_create_noop_job(struct v3dv_queue * queue)836 queue_create_noop_job(struct v3dv_queue *queue)
837 {
838    struct v3dv_device *device = queue->device;
839    queue->noop_job = vk_zalloc(&device->alloc, sizeof(struct v3dv_job), 8,
840                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
841    if (!queue->noop_job)
842       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
843    v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
844 
845    emit_noop_bin(queue->noop_job);
846    emit_noop_render(queue->noop_job);
847 
848    return VK_SUCCESS;
849 }
850 
851 static VkResult
queue_submit_noop_job(struct v3dv_queue * queue,const VkSubmitInfo * pSubmit)852 queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit)
853 {
854    /* VkQueue host access is externally synchronized so we don't need to lock
855     * here for the static variable.
856     */
857    if (!queue->noop_job) {
858       VkResult result = queue_create_noop_job(queue);
859       if (result != VK_SUCCESS)
860          return result;
861    }
862 
863    return queue_submit_job(queue, queue->noop_job,
864                            pSubmit->waitSemaphoreCount > 0, NULL);
865 }
866 
867 static VkResult
queue_submit_cmd_buffer(struct v3dv_queue * queue,struct v3dv_cmd_buffer * cmd_buffer,const VkSubmitInfo * pSubmit,pthread_t * wait_thread)868 queue_submit_cmd_buffer(struct v3dv_queue *queue,
869                         struct v3dv_cmd_buffer *cmd_buffer,
870                         const VkSubmitInfo *pSubmit,
871                         pthread_t *wait_thread)
872 {
873    assert(cmd_buffer);
874    assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_EXECUTABLE);
875 
876    if (list_is_empty(&cmd_buffer->jobs))
877       return queue_submit_noop_job(queue, pSubmit);
878 
879    list_for_each_entry_safe(struct v3dv_job, job,
880                             &cmd_buffer->jobs, list_link) {
881       VkResult result = queue_submit_job(queue, job,
882                                          pSubmit->waitSemaphoreCount > 0,
883                                          wait_thread);
884       if (result != VK_SUCCESS)
885          return result;
886    }
887 
888    return VK_SUCCESS;
889 }
890 
891 static void
add_wait_thread_to_list(struct v3dv_device * device,pthread_t thread,struct v3dv_queue_submit_wait_info ** wait_info)892 add_wait_thread_to_list(struct v3dv_device *device,
893                         pthread_t thread,
894                         struct v3dv_queue_submit_wait_info **wait_info)
895 {
896    /* If this is the first time we spawn a wait thread for this queue
897     * submission create a v3dv_queue_submit_wait_info to track this and
898     * any other threads in the same submission and add it to the global list
899     * in the queue.
900     */
901    if (*wait_info == NULL) {
902       *wait_info =
903          vk_zalloc(&device->alloc, sizeof(struct v3dv_queue_submit_wait_info), 8,
904                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
905       (*wait_info)->device = device;
906    }
907 
908    /* And add the thread to the list of wait threads for this submission */
909    const uint32_t thread_idx = (*wait_info)->wait_thread_count;
910    assert(thread_idx < 16);
911    (*wait_info)->wait_threads[thread_idx].thread = thread;
912    (*wait_info)->wait_threads[thread_idx].finished = false;
913    (*wait_info)->wait_thread_count++;
914 }
915 
916 static void
add_signal_semaphores_to_wait_list(struct v3dv_device * device,const VkSubmitInfo * pSubmit,struct v3dv_queue_submit_wait_info * wait_info)917 add_signal_semaphores_to_wait_list(struct v3dv_device *device,
918                                    const VkSubmitInfo *pSubmit,
919                                    struct v3dv_queue_submit_wait_info *wait_info)
920 {
921    assert(wait_info);
922 
923    if (pSubmit->signalSemaphoreCount == 0)
924       return;
925 
926    /* FIXME: We put all the semaphores in a list and we signal all of them
927     * together from the submit master thread when the last wait thread in the
928     * submit completes. We could do better though: group the semaphores per
929     * submit and signal them as soon as all wait threads for a particular
930     * submit completes. Not sure if the extra work would be worth it though,
931     * since we only spawn waith threads for event waits and only when the
932     * event if set from the host after the queue submission.
933     */
934 
935    /* Check the size of the current semaphore list */
936    const uint32_t prev_count = wait_info->signal_semaphore_count;
937    const uint32_t prev_alloc_size = prev_count * sizeof(VkSemaphore);
938    VkSemaphore *prev_list = wait_info->signal_semaphores;
939 
940    /* Resize the list to hold the additional semaphores */
941    const uint32_t extra_alloc_size =
942       pSubmit->signalSemaphoreCount * sizeof(VkSemaphore);
943    wait_info->signal_semaphore_count += pSubmit->signalSemaphoreCount;
944    wait_info->signal_semaphores =
945       vk_alloc(&device->alloc, prev_alloc_size + extra_alloc_size, 8,
946                VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
947 
948    /* Copy the old list to the new allocation and free the old list */
949    if (prev_count > 0) {
950       memcpy(wait_info->signal_semaphores, prev_list, prev_alloc_size);
951       vk_free(&device->alloc, prev_list);
952    }
953 
954    /* Add the new semaphores to the list */
955    memcpy(wait_info->signal_semaphores + prev_count,
956           pSubmit->pSignalSemaphores, extra_alloc_size);
957 }
958 
959 static VkResult
queue_submit_cmd_buffer_batch(struct v3dv_queue * queue,const VkSubmitInfo * pSubmit,struct v3dv_queue_submit_wait_info ** wait_info)960 queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,
961                               const VkSubmitInfo *pSubmit,
962                               struct v3dv_queue_submit_wait_info **wait_info)
963 {
964    VkResult result = VK_SUCCESS;
965    bool has_wait_threads = false;
966 
967    /* Even if we don't have any actual work to submit we still need to wait
968     * on the wait semaphores and signal the signal semaphores and fence, so
969     * in this scenario we just submit a trivial no-op job so we don't have
970     * to do anything special, it should not be a common case anyway.
971     */
972    if (pSubmit->commandBufferCount == 0) {
973       result = queue_submit_noop_job(queue, pSubmit);
974    } else {
975       for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {
976          pthread_t wait_thread;
977          struct v3dv_cmd_buffer *cmd_buffer =
978             v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);
979          result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit,
980                                           &wait_thread);
981 
982          /* We get VK_NOT_READY if we had to spawn a wait thread for the
983           * command buffer. In that scenario, we want to continue submitting
984           * any pending command buffers in the batch, but we don't want to
985           * process any signal semaphores for the batch until we know we have
986           * submitted every job for every command buffer in the batch.
987           */
988          if (result == VK_NOT_READY) {
989             result = VK_SUCCESS;
990             add_wait_thread_to_list(queue->device, wait_thread, wait_info);
991             has_wait_threads = true;
992          }
993 
994          if (result != VK_SUCCESS)
995             break;
996       }
997    }
998 
999    if (result != VK_SUCCESS)
1000       return result;
1001 
1002    /* If had to emit any wait threads in this submit we need to wait for all
1003     * of them to complete before we can signal any semaphores.
1004     */
1005    if (!has_wait_threads) {
1006       return process_semaphores_to_signal(queue->device,
1007                                           pSubmit->signalSemaphoreCount,
1008                                           pSubmit->pSignalSemaphores);
1009    } else {
1010       assert(*wait_info);
1011       add_signal_semaphores_to_wait_list(queue->device, pSubmit, *wait_info);
1012       return VK_NOT_READY;
1013    }
1014 }
1015 
1016 static void *
master_wait_thread_func(void * _wait_info)1017 master_wait_thread_func(void *_wait_info)
1018 {
1019    struct v3dv_queue_submit_wait_info *wait_info =
1020       (struct v3dv_queue_submit_wait_info *) _wait_info;
1021 
1022    struct v3dv_queue *queue = &wait_info->device->queue;
1023 
1024    /* Wait for all command buffer wait threads to complete */
1025    for (uint32_t i = 0; i < wait_info->wait_thread_count; i++) {
1026       int res = pthread_join(wait_info->wait_threads[i].thread, NULL);
1027       if (res != 0)
1028          fprintf(stderr, "Wait thread failed to join.\n");
1029    }
1030 
1031    /* Signal semaphores and fences */
1032    VkResult result;
1033    result = process_semaphores_to_signal(wait_info->device,
1034                                          wait_info->signal_semaphore_count,
1035                                          wait_info->signal_semaphores);
1036    if (result != VK_SUCCESS)
1037       fprintf(stderr, "Wait thread semaphore signaling failed.");
1038 
1039    result = process_fence_to_signal(wait_info->device, wait_info->fence);
1040    if (result != VK_SUCCESS)
1041       fprintf(stderr, "Wait thread fence signaling failed.");
1042 
1043    /* Release wait_info */
1044    mtx_lock(&queue->mutex);
1045    list_del(&wait_info->list_link);
1046    mtx_unlock(&queue->mutex);
1047 
1048    vk_free(&wait_info->device->alloc, wait_info->signal_semaphores);
1049    vk_free(&wait_info->device->alloc, wait_info);
1050 
1051    return NULL;
1052 }
1053 
1054 
1055 static VkResult
spawn_master_wait_thread(struct v3dv_queue * queue,struct v3dv_queue_submit_wait_info * wait_info)1056 spawn_master_wait_thread(struct v3dv_queue *queue,
1057                          struct v3dv_queue_submit_wait_info *wait_info)
1058 
1059 {
1060    VkResult result = VK_SUCCESS;
1061 
1062    mtx_lock(&queue->mutex);
1063    if (pthread_create(&wait_info->master_wait_thread, NULL,
1064                       master_wait_thread_func, wait_info)) {
1065       result = vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
1066       goto done;
1067    }
1068 
1069    list_addtail(&wait_info->list_link, &queue->submit_wait_list);
1070 
1071 done:
1072    mtx_unlock(&queue->mutex);
1073    return result;
1074 }
1075 
1076 VkResult
v3dv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)1077 v3dv_QueueSubmit(VkQueue _queue,
1078                  uint32_t submitCount,
1079                  const VkSubmitInfo* pSubmits,
1080                  VkFence fence)
1081 {
1082    V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1083 
1084    struct v3dv_queue_submit_wait_info *wait_info = NULL;
1085 
1086    VkResult result = VK_SUCCESS;
1087    for (uint32_t i = 0; i < submitCount; i++) {
1088       result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], &wait_info);
1089       if (result != VK_SUCCESS && result != VK_NOT_READY)
1090          goto done;
1091    }
1092 
1093    if (!wait_info) {
1094       assert(result != VK_NOT_READY);
1095       result = process_fence_to_signal(queue->device, fence);
1096       goto done;
1097    }
1098 
1099    /* We emitted wait threads, so we have to spwan a master thread for this
1100     * queue submission that waits for all other threads to complete and then
1101     * will signal any semaphores and fences.
1102     */
1103    assert(wait_info);
1104    wait_info->fence = fence;
1105    result = spawn_master_wait_thread(queue, wait_info);
1106 
1107 done:
1108    return result;
1109 }
1110 
1111 VkResult
v3dv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)1112 v3dv_CreateSemaphore(VkDevice _device,
1113                      const VkSemaphoreCreateInfo *pCreateInfo,
1114                      const VkAllocationCallbacks *pAllocator,
1115                      VkSemaphore *pSemaphore)
1116 {
1117    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1118 
1119    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
1120 
1121    struct v3dv_semaphore *sem =
1122       vk_alloc2(&device->alloc, pAllocator, sizeof(struct v3dv_semaphore), 8,
1123                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1124    if (sem == NULL)
1125       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1126 
1127    sem->fd = -1;
1128 
1129    int ret = drmSyncobjCreate(device->render_fd, 0, &sem->sync);
1130    if (ret) {
1131       vk_free2(&device->alloc, pAllocator, sem);
1132       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1133    }
1134 
1135    *pSemaphore = v3dv_semaphore_to_handle(sem);
1136 
1137    return VK_SUCCESS;
1138 }
1139 
1140 void
v3dv_DestroySemaphore(VkDevice _device,VkSemaphore semaphore,const VkAllocationCallbacks * pAllocator)1141 v3dv_DestroySemaphore(VkDevice _device,
1142                       VkSemaphore semaphore,
1143                       const VkAllocationCallbacks *pAllocator)
1144 {
1145    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1146    V3DV_FROM_HANDLE(v3dv_semaphore, sem, semaphore);
1147 
1148    if (sem == NULL)
1149       return;
1150 
1151    drmSyncobjDestroy(device->render_fd, sem->sync);
1152 
1153    if (sem->fd != -1)
1154       close(sem->fd);
1155 
1156    vk_free2(&device->alloc, pAllocator, sem);
1157 }
1158 
1159 VkResult
v3dv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1160 v3dv_CreateFence(VkDevice _device,
1161                  const VkFenceCreateInfo *pCreateInfo,
1162                  const VkAllocationCallbacks *pAllocator,
1163                  VkFence *pFence)
1164 {
1165    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1166 
1167    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1168 
1169    struct v3dv_fence *fence =
1170       vk_alloc2(&device->alloc, pAllocator, sizeof(struct v3dv_fence), 8,
1171                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1172    if (fence == NULL)
1173       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1174 
1175    unsigned flags = 0;
1176    if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1177       flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1178    int ret = drmSyncobjCreate(device->render_fd, flags, &fence->sync);
1179    if (ret) {
1180       vk_free2(&device->alloc, pAllocator, fence);
1181       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1182    }
1183 
1184    fence->fd = -1;
1185 
1186    *pFence = v3dv_fence_to_handle(fence);
1187 
1188    return VK_SUCCESS;
1189 }
1190 
1191 void
v3dv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)1192 v3dv_DestroyFence(VkDevice _device,
1193                   VkFence _fence,
1194                   const VkAllocationCallbacks *pAllocator)
1195 {
1196    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1197    V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1198 
1199    if (fence == NULL)
1200       return;
1201 
1202    drmSyncobjDestroy(device->render_fd, fence->sync);
1203 
1204    if (fence->fd != -1)
1205       close(fence->fd);
1206 
1207    vk_free2(&device->alloc, pAllocator, fence);
1208 }
1209 
1210 VkResult
v3dv_GetFenceStatus(VkDevice _device,VkFence _fence)1211 v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)
1212 {
1213    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1214    V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1215 
1216    int ret = drmSyncobjWait(device->render_fd, &fence->sync, 1,
1217                             0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
1218    if (ret == -ETIME)
1219       return VK_NOT_READY;
1220    else if (ret)
1221       return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1222    return VK_SUCCESS;
1223 }
1224 
1225 VkResult
v3dv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1226 v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1227 {
1228    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1229 
1230    uint32_t *syncobjs = vk_alloc(&device->alloc,
1231                                  sizeof(*syncobjs) * fenceCount, 8,
1232                                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1233    if (!syncobjs)
1234       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1235 
1236    for (uint32_t i = 0; i < fenceCount; i++) {
1237       struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1238       syncobjs[i] = fence->sync;
1239    }
1240 
1241    int ret = drmSyncobjReset(device->render_fd, syncobjs, fenceCount);
1242 
1243    vk_free(&device->alloc, syncobjs);
1244 
1245    if (ret)
1246       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1247    return VK_SUCCESS;
1248 }
1249 
1250 VkResult
v3dv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1251 v3dv_WaitForFences(VkDevice _device,
1252                    uint32_t fenceCount,
1253                    const VkFence *pFences,
1254                    VkBool32 waitAll,
1255                    uint64_t timeout)
1256 {
1257    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1258 
1259    const uint64_t abs_timeout = get_absolute_timeout(timeout);
1260 
1261    uint32_t *syncobjs = vk_alloc(&device->alloc,
1262                                  sizeof(*syncobjs) * fenceCount, 8,
1263                                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1264    if (!syncobjs)
1265       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1266 
1267    for (uint32_t i = 0; i < fenceCount; i++) {
1268       struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1269       syncobjs[i] = fence->sync;
1270    }
1271 
1272    unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
1273    if (waitAll)
1274       flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1275 
1276    int ret;
1277    do {
1278       ret = drmSyncobjWait(device->render_fd, syncobjs, fenceCount,
1279                            timeout, flags, NULL);
1280    } while (ret == -ETIME && gettime_ns() < abs_timeout);
1281 
1282    vk_free(&device->alloc, syncobjs);
1283 
1284    if (ret == -ETIME)
1285       return VK_TIMEOUT;
1286    else if (ret)
1287       return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1288    return VK_SUCCESS;
1289 }
1290 
1291 VkResult
v3dv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)1292 v3dv_QueueBindSparse(VkQueue _queue,
1293                      uint32_t bindInfoCount,
1294                      const VkBindSparseInfo *pBindInfo,
1295                      VkFence fence)
1296 {
1297    V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1298    return vk_error(queue->device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1299 }
1300