1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_sync_timeline.h"
25 
26 #include <inttypes.h>
27 
28 #include "util/os_time.h"
29 #include "util/timespec.h"
30 
31 #include "vk_alloc.h"
32 #include "vk_device.h"
33 #include "vk_log.h"
34 
35 static struct vk_sync_timeline *
to_vk_sync_timeline(struct vk_sync * sync)36 to_vk_sync_timeline(struct vk_sync *sync)
37 {
38    assert(sync->type->init == vk_sync_timeline_init);
39 
40    return container_of(sync, struct vk_sync_timeline, sync);
41 }
42 
43 static void
vk_sync_timeline_type_validate(const struct vk_sync_timeline_type * ttype)44 vk_sync_timeline_type_validate(const struct vk_sync_timeline_type *ttype)
45 {
46    ASSERTED const enum vk_sync_features req_features =
47       VK_SYNC_FEATURE_BINARY |
48       VK_SYNC_FEATURE_GPU_WAIT |
49       VK_SYNC_FEATURE_GPU_MULTI_WAIT |
50       VK_SYNC_FEATURE_CPU_WAIT |
51       VK_SYNC_FEATURE_CPU_RESET;
52 
53    assert(!(req_features & ~ttype->point_sync_type->features));
54 }
55 
56 VkResult
vk_sync_timeline_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)57 vk_sync_timeline_init(struct vk_device *device,
58                       struct vk_sync *sync,
59                       uint64_t initial_value)
60 {
61    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
62    int ret;
63 
64    ASSERTED const struct vk_sync_timeline_type *ttype =
65       container_of(timeline->sync.type, struct vk_sync_timeline_type, sync);
66    vk_sync_timeline_type_validate(ttype);
67 
68    ret = mtx_init(&timeline->mutex, mtx_plain);
69    if (ret != thrd_success)
70       return vk_errorf(device, VK_ERROR_UNKNOWN, "mtx_init failed");
71 
72    ret = cnd_init(&timeline->cond);
73    if (ret != thrd_success) {
74       mtx_destroy(&timeline->mutex);
75       return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_init failed");
76    }
77 
78    timeline->highest_past =
79       timeline->highest_pending = initial_value;
80    list_inithead(&timeline->pending_points);
81    list_inithead(&timeline->free_points);
82 
83    return VK_SUCCESS;
84 }
85 
86 static void
vk_sync_timeline_finish(struct vk_device * device,struct vk_sync * sync)87 vk_sync_timeline_finish(struct vk_device *device,
88                         struct vk_sync *sync)
89 {
90    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
91 
92    list_for_each_entry_safe(struct vk_sync_timeline_point, point,
93                             &timeline->free_points, link) {
94       list_del(&point->link);
95       vk_sync_finish(device, &point->sync);
96       vk_free(&device->alloc, point);
97    }
98    list_for_each_entry_safe(struct vk_sync_timeline_point, point,
99                             &timeline->pending_points, link) {
100       list_del(&point->link);
101       vk_sync_finish(device, &point->sync);
102       vk_free(&device->alloc, point);
103    }
104 
105    cnd_destroy(&timeline->cond);
106    mtx_destroy(&timeline->mutex);
107 }
108 
109 static struct vk_sync_timeline_point *
vk_sync_timeline_first_point(struct vk_sync_timeline * timeline)110 vk_sync_timeline_first_point(struct vk_sync_timeline *timeline)
111 {
112    struct vk_sync_timeline_point *point =
113       list_first_entry(&timeline->pending_points,
114                        struct vk_sync_timeline_point, link);
115 
116    assert(point->value <= timeline->highest_pending);
117    assert(point->value > timeline->highest_past);
118 
119    return point;
120 }
121 
122 static VkResult
123 vk_sync_timeline_gc_locked(struct vk_device *device,
124                            struct vk_sync_timeline *timeline,
125                            bool drain);
126 
127 static VkResult
vk_sync_timeline_alloc_point_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value,struct vk_sync_timeline_point ** point_out)128 vk_sync_timeline_alloc_point_locked(struct vk_device *device,
129                                     struct vk_sync_timeline *timeline,
130                                     uint64_t value,
131                                     struct vk_sync_timeline_point **point_out)
132 {
133    struct vk_sync_timeline_point *point;
134    VkResult result;
135 
136    result = vk_sync_timeline_gc_locked(device, timeline, false);
137    if (unlikely(result != VK_SUCCESS))
138       return result;
139 
140    if (list_is_empty(&timeline->free_points)) {
141       const struct vk_sync_timeline_type *ttype =
142          container_of(timeline->sync.type, struct vk_sync_timeline_type, sync);
143       const struct vk_sync_type *point_sync_type = ttype->point_sync_type;
144 
145       size_t size = offsetof(struct vk_sync_timeline_point, sync) +
146                     point_sync_type->size;
147 
148       point = vk_zalloc(&device->alloc, size, 8,
149                         VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
150       if (!point)
151          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
152 
153       point->timeline = timeline;
154 
155       result = vk_sync_init(device, &point->sync, point_sync_type,
156                             0 /* flags */, 0 /* initial_value */);
157       if (unlikely(result != VK_SUCCESS)) {
158          vk_free(&device->alloc, point);
159          return result;
160       }
161    } else {
162       point = list_first_entry(&timeline->free_points,
163                                struct vk_sync_timeline_point, link);
164 
165       if (point->sync.type->reset) {
166          result = vk_sync_reset(device, &point->sync);
167          if (unlikely(result != VK_SUCCESS))
168             return result;
169       }
170 
171       list_del(&point->link);
172    }
173 
174    point->value = value;
175    *point_out = point;
176 
177    return VK_SUCCESS;
178 }
179 
180 VkResult
vk_sync_timeline_alloc_point(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value,struct vk_sync_timeline_point ** point_out)181 vk_sync_timeline_alloc_point(struct vk_device *device,
182                              struct vk_sync_timeline *timeline,
183                              uint64_t value,
184                              struct vk_sync_timeline_point **point_out)
185 {
186    VkResult result;
187 
188    mtx_lock(&timeline->mutex);
189    result = vk_sync_timeline_alloc_point_locked(device, timeline, value, point_out);
190    mtx_unlock(&timeline->mutex);
191 
192    return result;
193 }
194 
195 static void
vk_sync_timeline_point_free_locked(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)196 vk_sync_timeline_point_free_locked(struct vk_sync_timeline *timeline,
197                                    struct vk_sync_timeline_point *point)
198 {
199    assert(point->refcount == 0 && !point->pending);
200    list_add(&point->link, &timeline->free_points);
201 }
202 
203 void
vk_sync_timeline_point_free(struct vk_device * device,struct vk_sync_timeline_point * point)204 vk_sync_timeline_point_free(struct vk_device *device,
205                             struct vk_sync_timeline_point *point)
206 {
207    struct vk_sync_timeline *timeline = point->timeline;
208 
209    mtx_lock(&timeline->mutex);
210    vk_sync_timeline_point_free_locked(timeline, point);
211    mtx_unlock(&timeline->mutex);
212 }
213 
214 static void
vk_sync_timeline_point_ref(struct vk_sync_timeline_point * point)215 vk_sync_timeline_point_ref(struct vk_sync_timeline_point *point)
216 {
217    point->refcount++;
218 }
219 
220 static void
vk_sync_timeline_point_unref(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)221 vk_sync_timeline_point_unref(struct vk_sync_timeline *timeline,
222                              struct vk_sync_timeline_point *point)
223 {
224    assert(point->refcount > 0);
225    point->refcount--;
226    if (point->refcount == 0 && !point->pending)
227       vk_sync_timeline_point_free_locked(timeline, point);
228 }
229 
230 static void
vk_sync_timeline_point_complete(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)231 vk_sync_timeline_point_complete(struct vk_sync_timeline *timeline,
232                                 struct vk_sync_timeline_point *point)
233 {
234    if (!point->pending)
235       return;
236 
237    assert(timeline->highest_past < point->value);
238    timeline->highest_past = point->value;
239 
240    point->pending = false;
241    list_del(&point->link);
242 
243    if (point->refcount == 0)
244       vk_sync_timeline_point_free_locked(timeline, point);
245 }
246 
247 static VkResult
vk_sync_timeline_gc_locked(struct vk_device * device,struct vk_sync_timeline * timeline,bool drain)248 vk_sync_timeline_gc_locked(struct vk_device *device,
249                            struct vk_sync_timeline *timeline,
250                            bool drain)
251 {
252    list_for_each_entry_safe(struct vk_sync_timeline_point, point,
253                             &timeline->pending_points, link) {
254       /* timeline->higest_pending is only incremented once submission has
255        * happened. If this point has a greater serial, it means the point
256        * hasn't been submitted yet.
257        */
258       if (point->value > timeline->highest_pending)
259          return VK_SUCCESS;
260 
261       /* If someone is waiting on this time point, consider it busy and don't
262        * try to recycle it. There's a slim possibility that it's no longer
263        * busy by the time we look at it but we would be recycling it out from
264        * under a waiter and that can lead to weird races.
265        *
266        * We walk the list in-order so if this time point is still busy so is
267        * every following time point
268        */
269       assert(point->refcount >= 0);
270       if (point->refcount > 0 && !drain)
271          return VK_SUCCESS;
272 
273       /* Garbage collect any signaled point. */
274       VkResult result = vk_sync_wait(device, &point->sync, 0,
275                                      VK_SYNC_WAIT_COMPLETE,
276                                      0 /* abs_timeout_ns */);
277       if (result == VK_TIMEOUT) {
278          /* We walk the list in-order so if this time point is still busy so
279           * is every following time point
280           */
281          return VK_SUCCESS;
282       } else if (result != VK_SUCCESS) {
283          return result;
284       }
285 
286       vk_sync_timeline_point_complete(timeline, point);
287    }
288 
289    return VK_SUCCESS;
290 }
291 
292 VkResult
vk_sync_timeline_point_install(struct vk_device * device,struct vk_sync_timeline_point * point)293 vk_sync_timeline_point_install(struct vk_device *device,
294                                struct vk_sync_timeline_point *point)
295 {
296    struct vk_sync_timeline *timeline = point->timeline;
297 
298    mtx_lock(&timeline->mutex);
299 
300    assert(point->value > timeline->highest_pending);
301    timeline->highest_pending = point->value;
302 
303    assert(point->refcount == 0);
304    point->pending = true;
305    list_addtail(&point->link, &timeline->pending_points);
306 
307    int ret = cnd_broadcast(&timeline->cond);
308 
309    mtx_unlock(&timeline->mutex);
310 
311    if (ret == thrd_error)
312       return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed");
313 
314    return VK_SUCCESS;
315 }
316 
317 static VkResult
vk_sync_timeline_get_point_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,struct vk_sync_timeline_point ** point_out)318 vk_sync_timeline_get_point_locked(struct vk_device *device,
319                                   struct vk_sync_timeline *timeline,
320                                   uint64_t wait_value,
321                                   struct vk_sync_timeline_point **point_out)
322 {
323    if (timeline->highest_past >= wait_value) {
324       /* Nothing to wait on */
325       *point_out = NULL;
326       return VK_SUCCESS;
327    }
328 
329    list_for_each_entry(struct vk_sync_timeline_point, point,
330                        &timeline->pending_points, link) {
331       if (point->value >= wait_value) {
332          vk_sync_timeline_point_ref(point);
333          *point_out = point;
334          return VK_SUCCESS;
335       }
336    }
337 
338    return VK_NOT_READY;
339 }
340 
341 VkResult
vk_sync_timeline_get_point(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,struct vk_sync_timeline_point ** point_out)342 vk_sync_timeline_get_point(struct vk_device *device,
343                            struct vk_sync_timeline *timeline,
344                            uint64_t wait_value,
345                            struct vk_sync_timeline_point **point_out)
346 {
347    mtx_lock(&timeline->mutex);
348    VkResult result = vk_sync_timeline_get_point_locked(device, timeline,
349                                                   wait_value, point_out);
350    mtx_unlock(&timeline->mutex);
351 
352    return result;
353 }
354 
355 void
vk_sync_timeline_point_release(struct vk_device * device,struct vk_sync_timeline_point * point)356 vk_sync_timeline_point_release(struct vk_device *device,
357                                struct vk_sync_timeline_point *point)
358 {
359    struct vk_sync_timeline *timeline = point->timeline;
360 
361    mtx_lock(&timeline->mutex);
362    vk_sync_timeline_point_unref(timeline, point);
363    mtx_unlock(&timeline->mutex);
364 }
365 
366 static VkResult
vk_sync_timeline_signal_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value)367 vk_sync_timeline_signal_locked(struct vk_device *device,
368                                struct vk_sync_timeline *timeline,
369                                uint64_t value)
370 {
371    VkResult result = vk_sync_timeline_gc_locked(device, timeline, true);
372    if (unlikely(result != VK_SUCCESS))
373       return result;
374 
375    if (unlikely(value <= timeline->highest_past)) {
376       return vk_device_set_lost(device, "Timeline values must only ever "
377                                         "strictly increase.");
378    }
379 
380    assert(list_is_empty(&timeline->pending_points));
381    assert(timeline->highest_pending == timeline->highest_past);
382    timeline->highest_pending = timeline->highest_past = value;
383 
384    int ret = cnd_broadcast(&timeline->cond);
385    if (ret == thrd_error)
386       return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed");
387 
388    return VK_SUCCESS;
389 }
390 
391 static VkResult
vk_sync_timeline_signal(struct vk_device * device,struct vk_sync * sync,uint64_t value)392 vk_sync_timeline_signal(struct vk_device *device,
393                         struct vk_sync *sync,
394                         uint64_t value)
395 {
396    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
397 
398    mtx_lock(&timeline->mutex);
399    VkResult result = vk_sync_timeline_signal_locked(device, timeline, value);
400    mtx_unlock(&timeline->mutex);
401 
402    return result;
403 }
404 
405 static VkResult
vk_sync_timeline_get_value(struct vk_device * device,struct vk_sync * sync,uint64_t * value)406 vk_sync_timeline_get_value(struct vk_device *device,
407                            struct vk_sync *sync,
408                            uint64_t *value)
409 {
410    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
411 
412    mtx_lock(&timeline->mutex);
413    VkResult result = vk_sync_timeline_gc_locked(device, timeline, true);
414    mtx_unlock(&timeline->mutex);
415 
416    if (result != VK_SUCCESS)
417       return result;
418 
419    *value = timeline->highest_past;
420 
421    return VK_SUCCESS;
422 }
423 
424 static VkResult
vk_sync_timeline_wait_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)425 vk_sync_timeline_wait_locked(struct vk_device *device,
426                              struct vk_sync_timeline *timeline,
427                              uint64_t wait_value,
428                              enum vk_sync_wait_flags wait_flags,
429                              uint64_t abs_timeout_ns)
430 {
431    /* Wait on the queue_submit condition variable until the timeline has a
432     * time point pending that's at least as high as wait_value.
433     */
434    uint64_t now_ns = os_time_get_nano();
435    while (timeline->highest_pending < wait_value) {
436       if (now_ns >= abs_timeout_ns)
437          return VK_TIMEOUT;
438 
439       int ret;
440       if (abs_timeout_ns >= INT64_MAX) {
441          /* Common infinite wait case */
442          ret = cnd_wait(&timeline->cond, &timeline->mutex);
443       } else {
444          /* This is really annoying.  The C11 threads API uses CLOCK_REALTIME
445           * while all our absolute timeouts are in CLOCK_MONOTONIC.  Best
446           * thing we can do is to convert and hope the system admin doesn't
447           * change the time out from under us.
448           */
449          uint64_t rel_timeout_ns = abs_timeout_ns - now_ns;
450 
451          struct timespec now_ts, abs_timeout_ts;
452          timespec_get(&now_ts, TIME_UTC);
453          if (timespec_add_nsec(&abs_timeout_ts, &now_ts, rel_timeout_ns)) {
454             /* Overflowed; may as well be infinite */
455             ret = cnd_wait(&timeline->cond, &timeline->mutex);
456          } else {
457             ret = cnd_timedwait(&timeline->cond, &timeline->mutex,
458                                 &abs_timeout_ts);
459          }
460       }
461       if (ret == thrd_error)
462          return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_timedwait failed");
463 
464       /* We don't trust the timeout condition on cnd_timedwait() because of
465        * the potential clock issues caused by using CLOCK_REALTIME.  Instead,
466        * update now_ns, go back to the top of the loop, and re-check.
467        */
468       now_ns = os_time_get_nano();
469    }
470 
471    if (wait_flags & VK_SYNC_WAIT_PENDING)
472       return VK_SUCCESS;
473 
474    VkResult result = vk_sync_timeline_gc_locked(device, timeline, false);
475    if (result != VK_SUCCESS)
476       return result;
477 
478    while (timeline->highest_past < wait_value) {
479       struct vk_sync_timeline_point *point = vk_sync_timeline_first_point(timeline);
480 
481       /* Drop the lock while we wait. */
482       vk_sync_timeline_point_ref(point);
483       mtx_unlock(&timeline->mutex);
484 
485       result = vk_sync_wait(device, &point->sync, 0,
486                             VK_SYNC_WAIT_COMPLETE,
487                             abs_timeout_ns);
488 
489       /* Pick the mutex back up */
490       mtx_lock(&timeline->mutex);
491       vk_sync_timeline_point_unref(timeline, point);
492 
493       /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
494       if (result != VK_SUCCESS)
495          return result;
496 
497       vk_sync_timeline_point_complete(timeline, point);
498    }
499 
500    return VK_SUCCESS;
501 }
502 
503 static VkResult
vk_sync_timeline_wait(struct vk_device * device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)504 vk_sync_timeline_wait(struct vk_device *device,
505                       struct vk_sync *sync,
506                       uint64_t wait_value,
507                       enum vk_sync_wait_flags wait_flags,
508                       uint64_t abs_timeout_ns)
509 {
510    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
511 
512    mtx_lock(&timeline->mutex);
513    VkResult result = vk_sync_timeline_wait_locked(device, timeline,
514                                              wait_value, wait_flags,
515                                              abs_timeout_ns);
516    mtx_unlock(&timeline->mutex);
517 
518    return result;
519 }
520 
521 struct vk_sync_timeline_type
vk_sync_timeline_get_type(const struct vk_sync_type * point_sync_type)522 vk_sync_timeline_get_type(const struct vk_sync_type *point_sync_type)
523 {
524    return (struct vk_sync_timeline_type) {
525       .sync = {
526          .size = sizeof(struct vk_sync_timeline),
527          .features = VK_SYNC_FEATURE_TIMELINE |
528                      VK_SYNC_FEATURE_GPU_WAIT |
529                      VK_SYNC_FEATURE_CPU_WAIT |
530                      VK_SYNC_FEATURE_CPU_SIGNAL |
531                      VK_SYNC_FEATURE_WAIT_ANY |
532                      VK_SYNC_FEATURE_WAIT_PENDING,
533          .init = vk_sync_timeline_init,
534          .finish = vk_sync_timeline_finish,
535          .signal = vk_sync_timeline_signal,
536          .get_value = vk_sync_timeline_get_value,
537          .wait = vk_sync_timeline_wait,
538       },
539       .point_sync_type = point_sync_type,
540    };
541 }
542