1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #ifndef VK_DEVICE_H
24 #define VK_DEVICE_H
25 
26 #include "rmv/vk_rmv_common.h"
27 #include "vk_dispatch_table.h"
28 #include "vk_extensions.h"
29 #include "vk_object.h"
30 #include "vk_physical_device_features.h"
31 
32 #include "util/list.h"
33 #include "util/simple_mtx.h"
34 #include "util/u_atomic.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 
40 struct vk_command_buffer_ops;
41 struct vk_sync;
42 
43 enum vk_queue_submit_mode {
44    /** Submits happen immediately
45     *
46     * `vkQueueSubmit()` and `vkQueueBindSparse()` call
47     * `vk_queue::driver_submit` directly for all submits and the last call to
48     * `vk_queue::driver_submit` will have completed by the time
49     * `vkQueueSubmit()` or `vkQueueBindSparse()` return.
50     */
51    VK_QUEUE_SUBMIT_MODE_IMMEDIATE,
52 
53    /** Submits may be deferred until a future `vk_queue_flush()`
54     *
55     * Submits are added to the queue and `vk_queue_flush()` is called.
56     * However, any submits with unsatisfied dependencies will be left on the
57     * queue until a future `vk_queue_flush()` call.  This is used for
58     * implementing emulated timeline semaphores without threading.
59     */
60    VK_QUEUE_SUBMIT_MODE_DEFERRED,
61 
62    /** Submits will be added to the queue and handled later by a thread
63     *
64     * This places additional requirements on the vk_sync types used by the
65     * driver:
66     *
67     *    1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also
68     *       support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can
69     *       sort out when a given submit has all its dependencies resolved.
70     *
71     *    2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT`
72     *       also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset
73     *       semaphores after waiting on them.
74     *
75     *    3. All vk_sync types used as permanent payloads of semaphores support
76     *       `vk_sync_type::move` so that it can move the pending signal into a
77     *       temporary vk_sync and reset the semaphore.
78     *
79     * This is requied for shared timeline semaphores where we need to handle
80     * wait-before-signal by threading in the driver if we ever see an
81     * unresolve dependency.
82     */
83    VK_QUEUE_SUBMIT_MODE_THREADED,
84 
85    /** Threaded but only if we need it to resolve dependencies
86     *
87     * This imposes all the same requirements on `vk_sync` types as
88     * `VK_QUEUE_SUBMIT_MODE_THREADED`.
89     */
90    VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND,
91 };
92 
93 /** Base struct for VkDevice */
94 struct vk_device {
95    struct vk_object_base base;
96 
97    /** Allocator used to create this device
98     *
99     * This is used as a fall-back for when a NULL pAllocator is passed into a
100     * device-level create function such as vkCreateImage().
101     */
102    VkAllocationCallbacks alloc;
103 
104    /** Pointer to the physical device */
105    struct vk_physical_device *physical;
106 
107    /** Table of enabled extensions */
108    struct vk_device_extension_table enabled_extensions;
109 
110    /** Table of enabled features */
111    struct vk_features enabled_features;
112 
113    /** Device-level dispatch table */
114    struct vk_device_dispatch_table dispatch_table;
115 
116    /** Command dispatch table
117     *
118     * This is used for emulated secondary command buffer support.  To use
119     * emulated (trace/replay) secondary command buffers:
120     *
121     *  1. Provide your "real" command buffer dispatch table here.  Because
122     *     this doesn't get populated by vk_device_init(), the driver will have
123     *     to add the vk_common entrypoints to this table itself.
124     *
125     *  2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device
126     *     level dispatch table.
127     */
128    const struct vk_device_dispatch_table *command_dispatch_table;
129 
130    /** Command buffer vtable when using the common command pool */
131    const struct vk_command_buffer_ops *command_buffer_ops;
132 
133    /** Driver provided callback for capturing traces
134     *
135     * Triggers for this callback are:
136     *    - Keyboard input (F12)
137     *    - Creation of a trigger file
138     *    - Reaching the trace frame
139     */
140    VkResult (*capture_trace)(VkQueue queue);
141 
142    uint32_t current_frame;
143    bool trace_hotkey_trigger;
144    simple_mtx_t trace_mtx;
145 
146    /* For VK_EXT_private_data */
147    uint32_t private_data_next_index;
148 
149    struct list_head queues;
150 
151    struct {
152       int lost;
153       bool reported;
154    } _lost;
155 
156    /** Checks the status of this device
157     *
158     * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
159     * It is called before vk_queue::driver_submit and after every non-trivial
160     * wait operation to ensure the device is still around.  This gives the
161     * driver a hook to ask the kernel if its device is still valid.  If the
162     * kernel says the device has been lost, it MUST call vk_device_set_lost().
163     *
164     * This function may be called from any thread at any time.
165     */
166    VkResult (*check_status)(struct vk_device *device);
167 
168    /** Creates a vk_sync that wraps a memory object
169     *
170     * This is always a one-shot object so it need not track any additional
171     * state.  Since it's intended for synchronizing between processes using
172     * implicit synchronization mechanisms, no such tracking would be valid
173     * anyway.
174     *
175     * If `signal_memory` is set, the resulting vk_sync will be used to signal
176     * the memory object from a queue via vk_queue_submit::signals.  The common
177     * code guarantees that, by the time vkQueueSubmit() returns, the signal
178     * operation has been submitted to the kernel via the driver's
179     * vk_queue::driver_submit hook.  This means that any vkQueueSubmit() call
180     * which needs implicit synchronization may block.
181     *
182     * If `signal_memory` is not set, it can be assumed that memory object
183     * already has a signal operation pending from some other process and we
184     * need only wait on it.
185     */
186    VkResult (*create_sync_for_memory)(struct vk_device *device,
187                                       VkDeviceMemory memory,
188                                       bool signal_memory,
189                                       struct vk_sync **sync_out);
190 
191    /* Set by vk_device_set_drm_fd() */
192    int drm_fd;
193 
194    /** An enum describing how timeline semaphores work */
195    enum vk_device_timeline_mode {
196       /** Timeline semaphores are not supported */
197       VK_DEVICE_TIMELINE_MODE_NONE,
198 
199       /** Timeline semaphores are emulated with vk_timeline
200        *
201        * In this mode, timeline semaphores are emulated using vk_timeline
202        * which is a collection of binary semaphores, one per time point.
203        * These timeline semaphores cannot be shared because the data structure
204        * exists entirely in userspace.  These timelines are virtually
205        * invisible to the driver; all it sees are the binary vk_syncs, one per
206        * time point.
207        *
208        * To handle wait-before-signal, we place all vk_queue_submits in the
209        * queue's submit list in vkQueueSubmit() and call vk_device_flush() at
210        * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
211        * This ensures that, as soon as a given submit's dependencies are fully
212        * resolvable, it gets submitted to the driver.
213        */
214       VK_DEVICE_TIMELINE_MODE_EMULATED,
215 
216       /** Timeline semaphores are a kernel-assisted emulation
217        *
218        * In this mode, timeline semaphores are still technically an emulation
219        * in the sense that they don't support wait-before-signal natively.
220        * Instead, all GPU-waitable objects support a CPU wait-for-pending
221        * operation which lets the userspace driver wait until a given event
222        * on the (possibly shared) vk_sync is pending.  The event is "pending"
223        * if a job has been submitted to the kernel (possibly from a different
224        * process) which will signal it.  In vkQueueSubit, we use this wait
225        * mode to detect waits which are not yet pending and, the first time we
226        * do, spawn a thread to manage the queue.  That thread waits for each
227        * submit's waits to all be pending before submitting to the driver
228        * queue.
229        *
230        * We have to be a bit more careful about a few things in this mode.
231        * In particular, we can never assume that any given wait operation is
232        * pending.  For instance, when we go to export a sync file from a
233        * binary semaphore, we need to first wait for it to be pending.  The
234        * spec guarantees that the vast majority of these waits return almost
235        * immediately, but we do need to insert them for correctness.
236        */
237       VK_DEVICE_TIMELINE_MODE_ASSISTED,
238 
239       /** Timeline semaphores are 100% native
240        *
241        * In this mode, wait-before-signal is natively supported by the
242        * underlying timeline implementation.  We can submit-and-forget and
243        * assume that dependencies will get resolved for us by the kernel.
244        * Currently, this isn't supported by any Linux primitives.
245        */
246       VK_DEVICE_TIMELINE_MODE_NATIVE,
247    } timeline_mode;
248 
249    /** Per-device submit mode
250     *
251     * This represents the device-wide submit strategy which may be different
252     * from the per-queue submit mode.  See vk_queue.submit.mode for more
253     * details.
254     */
255    enum vk_queue_submit_mode submit_mode;
256 
257    struct vk_memory_trace_data memory_trace_data;
258 
259 #ifdef ANDROID
260    mtx_t swapchain_private_mtx;
261    struct hash_table *swapchain_private;
262 #endif
263 };
264 
265 VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice,
266                        VK_OBJECT_TYPE_DEVICE);
267 
268 /** Initialize a vk_device
269  *
270  * Along with initializing the data structures in `vk_device`, this function
271  * checks that every extension specified by
272  * `VkInstanceCreateInfo::ppEnabledExtensionNames` is actually supported by
273  * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an
274  * unsupported extension is requested.  It also checks all the feature struct
275  * chained into the `pCreateInfo->pNext` chain against the features returned
276  * by `vkGetPhysicalDeviceFeatures2` and returns
277  * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested.
278  *
279  * @param[out] device               The device to initialize
280  * @param[in]  physical_device      The physical device
281  * @param[in]  dispatch_table       Device-level dispatch table
282  * @param[in]  pCreateInfo          VkDeviceCreateInfo pointer passed to
283  *                                  `vkCreateDevice()`
284  * @param[in]  alloc                Allocation callbacks passed to
285  *                                  `vkCreateDevice()`
286  */
287 VkResult MUST_CHECK
288 vk_device_init(struct vk_device *device,
289                struct vk_physical_device *physical_device,
290                const struct vk_device_dispatch_table *dispatch_table,
291                const VkDeviceCreateInfo *pCreateInfo,
292                const VkAllocationCallbacks *alloc);
293 
294 static inline void
vk_device_set_drm_fd(struct vk_device * device,int drm_fd)295 vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
296 {
297    device->drm_fd = drm_fd;
298 }
299 
300 /** Tears down a vk_device
301  *
302  * @param[out] device               The device to tear down
303  */
304 void
305 vk_device_finish(struct vk_device *device);
306 
307 /** Enables threaded submit on this device
308  *
309  * This doesn't ensure that threaded submit will be used.  It just disables
310  * the deferred submit option for emulated timeline semaphores and forces them
311  * to always use the threaded path.  It also does some checks that the vk_sync
312  * types used by the driver work for threaded submit.
313  *
314  * This must be called before any queues are created.
315  */
316 void vk_device_enable_threaded_submit(struct vk_device *device);
317 
318 static inline bool
vk_device_supports_threaded_submit(const struct vk_device * device)319 vk_device_supports_threaded_submit(const struct vk_device *device)
320 {
321    return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED ||
322           device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
323 }
324 
325 VkResult vk_device_flush(struct vk_device *device);
326 
327 VkResult PRINTFLIKE(4, 5)
328 _vk_device_set_lost(struct vk_device *device,
329                     const char *file, int line,
330                     const char *msg, ...);
331 
332 #define vk_device_set_lost(device, ...) \
333    _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__)
334 
335 void _vk_device_report_lost(struct vk_device *device);
336 
337 static inline bool
vk_device_is_lost_no_report(struct vk_device * device)338 vk_device_is_lost_no_report(struct vk_device *device)
339 {
340    return p_atomic_read(&device->_lost.lost) > 0;
341 }
342 
343 static inline bool
vk_device_is_lost(struct vk_device * device)344 vk_device_is_lost(struct vk_device *device)
345 {
346    int lost = vk_device_is_lost_no_report(device);
347    if (unlikely(lost && !device->_lost.reported))
348       _vk_device_report_lost(device);
349    return lost;
350 }
351 
352 static inline VkResult
vk_device_check_status(struct vk_device * device)353 vk_device_check_status(struct vk_device *device)
354 {
355    if (vk_device_is_lost(device))
356       return VK_ERROR_DEVICE_LOST;
357 
358    if (!device->check_status)
359       return VK_SUCCESS;
360 
361    VkResult result = device->check_status(device);
362 
363    assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST);
364    if (result == VK_ERROR_DEVICE_LOST)
365       assert(vk_device_is_lost_no_report(device));
366 
367    return result;
368 }
369 
370 #ifndef _WIN32
371 
372 uint64_t
373 vk_clock_gettime(clockid_t clock_id);
374 
375 static inline uint64_t
vk_time_max_deviation(uint64_t begin,uint64_t end,uint64_t max_clock_period)376 vk_time_max_deviation(uint64_t begin, uint64_t end, uint64_t max_clock_period)
377 {
378     /*
379      * The maximum deviation is the sum of the interval over which we
380      * perform the sampling and the maximum period of any sampled
381      * clock. That's because the maximum skew between any two sampled
382      * clock edges is when the sampled clock with the largest period is
383      * sampled at the end of that period but right at the beginning of the
384      * sampling interval and some other clock is sampled right at the
385      * beginning of its sampling period and right at the end of the
386      * sampling interval. Let's assume the GPU has the longest clock
387      * period and that the application is sampling GPU and monotonic:
388      *
389      *                               s                 e
390      *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
391      *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
392      *
393      *                               g
394      *		  0         1         2         3
395      *	GPU       -----_____-----_____-----_____-----_____
396      *
397      *                                                m
398      *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
399      *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
400      *
401      *	Interval                     <----------------->
402      *	Deviation           <-------------------------->
403      *
404      *		s  = read(raw)       2
405      *		g  = read(GPU)       1
406      *		m  = read(monotonic) 2
407      *		e  = read(raw)       b
408      *
409      * We round the sample interval up by one tick to cover sampling error
410      * in the interval clock
411      */
412 
413    uint64_t sample_interval = end - begin + 1;
414 
415    return sample_interval + max_clock_period;
416 }
417 
418 #endif //!_WIN32
419 
420 PFN_vkVoidFunction
421 vk_device_get_proc_addr(const struct vk_device *device,
422                         const char *name);
423 
424 bool vk_get_physical_device_core_1_1_feature_ext(struct VkBaseOutStructure *ext,
425                                                  const VkPhysicalDeviceVulkan11Features *core);
426 bool vk_get_physical_device_core_1_2_feature_ext(struct VkBaseOutStructure *ext,
427                                                  const VkPhysicalDeviceVulkan12Features *core);
428 bool vk_get_physical_device_core_1_3_feature_ext(struct VkBaseOutStructure *ext,
429                                                  const VkPhysicalDeviceVulkan13Features *core);
430 
431 bool vk_get_physical_device_core_1_1_property_ext(struct VkBaseOutStructure *ext,
432                                                      const VkPhysicalDeviceVulkan11Properties *core);
433 bool vk_get_physical_device_core_1_2_property_ext(struct VkBaseOutStructure *ext,
434                                                      const VkPhysicalDeviceVulkan12Properties *core);
435 bool vk_get_physical_device_core_1_3_property_ext(struct VkBaseOutStructure *ext,
436                                                      const VkPhysicalDeviceVulkan13Properties *core);
437 
438 #ifdef __cplusplus
439 }
440 #endif
441 
442 #endif /* VK_DEVICE_H */
443