1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_alloc.h"
25 #include "vk_command_buffer.h"
26 #include "vk_common_entrypoints.h"
27 #include "vk_device.h"
28 #include "vk_queue.h"
29 #include "vk_util.h"
30 #include "../wsi/wsi_common.h"
31 
32 VKAPI_ATTR void VKAPI_CALL
vk_common_CmdWriteTimestamp(VkCommandBuffer commandBuffer,VkPipelineStageFlagBits pipelineStage,VkQueryPool queryPool,uint32_t query)33 vk_common_CmdWriteTimestamp(
34    VkCommandBuffer                             commandBuffer,
35    VkPipelineStageFlagBits                     pipelineStage,
36    VkQueryPool                                 queryPool,
37    uint32_t                                    query)
38 {
39    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
40    struct vk_device *device = cmd_buffer->base.device;
41 
42    device->dispatch_table.CmdWriteTimestamp2KHR(commandBuffer,
43                                                 (VkPipelineStageFlags2) pipelineStage,
44                                                 queryPool,
45                                                 query);
46 }
47 
48 static VkMemoryBarrier2
upgrade_memory_barrier(const VkMemoryBarrier * barrier,VkPipelineStageFlags2 src_stage_mask2,VkPipelineStageFlags2 dst_stage_mask2)49 upgrade_memory_barrier(const VkMemoryBarrier *barrier,
50                        VkPipelineStageFlags2 src_stage_mask2,
51                        VkPipelineStageFlags2 dst_stage_mask2)
52 {
53    return (VkMemoryBarrier2) {
54       .sType         = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
55       .pNext         = barrier->pNext,
56       .srcStageMask  = src_stage_mask2,
57       .srcAccessMask = (VkAccessFlags2) barrier->srcAccessMask,
58       .dstStageMask  = dst_stage_mask2,
59       .dstAccessMask = (VkAccessFlags2) barrier->dstAccessMask,
60    };
61 }
62 
63 static VkBufferMemoryBarrier2
upgrade_buffer_memory_barrier(const VkBufferMemoryBarrier * barrier,VkPipelineStageFlags2 src_stage_mask2,VkPipelineStageFlags2 dst_stage_mask2)64 upgrade_buffer_memory_barrier(const VkBufferMemoryBarrier *barrier,
65                               VkPipelineStageFlags2 src_stage_mask2,
66                               VkPipelineStageFlags2 dst_stage_mask2)
67 {
68    return (VkBufferMemoryBarrier2) {
69       .sType                = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
70       .pNext                = barrier->pNext,
71       .srcStageMask         = src_stage_mask2,
72       .srcAccessMask        = (VkAccessFlags2) barrier->srcAccessMask,
73       .dstStageMask         = dst_stage_mask2,
74       .dstAccessMask        = (VkAccessFlags2) barrier->dstAccessMask,
75       .srcQueueFamilyIndex  = barrier->srcQueueFamilyIndex,
76       .dstQueueFamilyIndex  = barrier->dstQueueFamilyIndex,
77       .buffer               = barrier->buffer,
78       .offset               = barrier->offset,
79       .size                 = barrier->size,
80    };
81 }
82 
83 static VkImageMemoryBarrier2
upgrade_image_memory_barrier(const VkImageMemoryBarrier * barrier,VkPipelineStageFlags2 src_stage_mask2,VkPipelineStageFlags2 dst_stage_mask2)84 upgrade_image_memory_barrier(const VkImageMemoryBarrier *barrier,
85                              VkPipelineStageFlags2 src_stage_mask2,
86                              VkPipelineStageFlags2 dst_stage_mask2)
87 {
88    return (VkImageMemoryBarrier2) {
89       .sType                = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
90       .pNext                = barrier->pNext,
91       .srcStageMask         = src_stage_mask2,
92       .srcAccessMask        = (VkAccessFlags2) barrier->srcAccessMask,
93       .dstStageMask         = dst_stage_mask2,
94       .dstAccessMask        = (VkAccessFlags2) barrier->dstAccessMask,
95       .oldLayout            = barrier->oldLayout,
96       .newLayout            = barrier->newLayout,
97       .srcQueueFamilyIndex  = barrier->srcQueueFamilyIndex,
98       .dstQueueFamilyIndex  = barrier->dstQueueFamilyIndex,
99       .image                = barrier->image,
100       .subresourceRange     = barrier->subresourceRange,
101    };
102 }
103 
104 VKAPI_ATTR void VKAPI_CALL
vk_common_CmdPipelineBarrier(VkCommandBuffer commandBuffer,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,VkDependencyFlags dependencyFlags,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers)105 vk_common_CmdPipelineBarrier(
106     VkCommandBuffer                             commandBuffer,
107     VkPipelineStageFlags                        srcStageMask,
108     VkPipelineStageFlags                        dstStageMask,
109     VkDependencyFlags                           dependencyFlags,
110     uint32_t                                    memoryBarrierCount,
111     const VkMemoryBarrier*                      pMemoryBarriers,
112     uint32_t                                    bufferMemoryBarrierCount,
113     const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
114     uint32_t                                    imageMemoryBarrierCount,
115     const VkImageMemoryBarrier*                 pImageMemoryBarriers)
116 {
117    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
118    struct vk_device *device = cmd_buffer->base.device;
119 
120    STACK_ARRAY(VkMemoryBarrier2, memory_barriers, memoryBarrierCount);
121    STACK_ARRAY(VkBufferMemoryBarrier2, buffer_barriers, bufferMemoryBarrierCount);
122    STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, imageMemoryBarrierCount);
123 
124    VkPipelineStageFlags2 src_stage_mask2 = (VkPipelineStageFlags2) srcStageMask;
125    VkPipelineStageFlags2 dst_stage_mask2 = (VkPipelineStageFlags2) dstStageMask;
126 
127    for (uint32_t i = 0; i < memoryBarrierCount; i++) {
128       memory_barriers[i] = upgrade_memory_barrier(&pMemoryBarriers[i],
129                                                   src_stage_mask2,
130                                                   dst_stage_mask2);
131    }
132    for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
133       buffer_barriers[i] = upgrade_buffer_memory_barrier(&pBufferMemoryBarriers[i],
134                                                          src_stage_mask2,
135                                                          dst_stage_mask2);
136    }
137    for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
138       image_barriers[i] = upgrade_image_memory_barrier(&pImageMemoryBarriers[i],
139                                                        src_stage_mask2,
140                                                        dst_stage_mask2);
141    }
142 
143    VkDependencyInfo dep_info = {
144       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
145       .memoryBarrierCount = memoryBarrierCount,
146       .pMemoryBarriers = memory_barriers,
147       .bufferMemoryBarrierCount = bufferMemoryBarrierCount,
148       .pBufferMemoryBarriers = buffer_barriers,
149       .imageMemoryBarrierCount = imageMemoryBarrierCount,
150       .pImageMemoryBarriers = image_barriers,
151    };
152 
153    device->dispatch_table.CmdPipelineBarrier2KHR(commandBuffer, &dep_info);
154 
155    STACK_ARRAY_FINISH(memory_barriers);
156    STACK_ARRAY_FINISH(buffer_barriers);
157    STACK_ARRAY_FINISH(image_barriers);
158 }
159 
160 VKAPI_ATTR void VKAPI_CALL
vk_common_CmdSetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask)161 vk_common_CmdSetEvent(
162     VkCommandBuffer                             commandBuffer,
163     VkEvent                                     event,
164     VkPipelineStageFlags                        stageMask)
165 {
166    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
167    struct vk_device *device = cmd_buffer->base.device;
168 
169    VkMemoryBarrier2 mem_barrier = {
170       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
171       .srcStageMask = (VkPipelineStageFlags2) stageMask,
172       .dstStageMask = (VkPipelineStageFlags2) stageMask,
173    };
174    VkDependencyInfo dep_info = {
175       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
176       .memoryBarrierCount = 1,
177       .pMemoryBarriers = &mem_barrier,
178    };
179 
180    device->dispatch_table.CmdSetEvent2KHR(commandBuffer, event, &dep_info);
181 }
182 
183 VKAPI_ATTR void VKAPI_CALL
vk_common_CmdResetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask)184 vk_common_CmdResetEvent(
185     VkCommandBuffer                             commandBuffer,
186     VkEvent                                     event,
187     VkPipelineStageFlags                        stageMask)
188 {
189    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
190    struct vk_device *device = cmd_buffer->base.device;
191 
192    device->dispatch_table.CmdResetEvent2KHR(commandBuffer,
193                                             event,
194                                             (VkPipelineStageFlags2) stageMask);
195 }
196 
197 VKAPI_ATTR void VKAPI_CALL
vk_common_CmdWaitEvents(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags destStageMask,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers)198 vk_common_CmdWaitEvents(
199     VkCommandBuffer                             commandBuffer,
200     uint32_t                                    eventCount,
201     const VkEvent*                              pEvents,
202     VkPipelineStageFlags                        srcStageMask,
203     VkPipelineStageFlags                        destStageMask,
204     uint32_t                                    memoryBarrierCount,
205     const VkMemoryBarrier*                      pMemoryBarriers,
206     uint32_t                                    bufferMemoryBarrierCount,
207     const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
208     uint32_t                                    imageMemoryBarrierCount,
209     const VkImageMemoryBarrier*                 pImageMemoryBarriers)
210 {
211    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
212    struct vk_device *device = cmd_buffer->base.device;
213 
214    STACK_ARRAY(VkDependencyInfo, deps, eventCount);
215 
216    /* Note that dstStageMask and srcStageMask in the CmdWaitEvent2() call
217     * are the same.  This is to match the CmdSetEvent2() call from
218     * vk_common_CmdSetEvent().  The actual src->dst stage barrier will
219     * happen as part of the CmdPipelineBarrier() call below.
220     */
221    VkMemoryBarrier2 stage_barrier = {
222       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
223       .srcStageMask = srcStageMask,
224       .dstStageMask = srcStageMask,
225    };
226 
227    for (uint32_t i = 0; i < eventCount; i++) {
228       deps[i] = (VkDependencyInfo) {
229          .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
230          .memoryBarrierCount = 1,
231          .pMemoryBarriers = &stage_barrier,
232       };
233    }
234    device->dispatch_table.CmdWaitEvents2KHR(commandBuffer, eventCount, pEvents, deps);
235 
236    STACK_ARRAY_FINISH(deps);
237 
238    /* Setting dependency to 0 because :
239     *
240     *    - For BY_REGION_BIT and VIEW_LOCAL_BIT, events are not allowed inside a
241     *      render pass so these don't apply.
242     *
243     *    - For DEVICE_GROUP_BIT, we have the following bit of spec text:
244     *
245     *        "Semaphore and event dependencies are device-local and only
246     *         execute on the one physical device that performs the
247     *         dependency."
248     */
249    const VkDependencyFlags dep_flags = 0;
250 
251    device->dispatch_table.CmdPipelineBarrier(commandBuffer,
252                                              srcStageMask, destStageMask,
253                                              dep_flags,
254                                              memoryBarrierCount, pMemoryBarriers,
255                                              bufferMemoryBarrierCount, pBufferMemoryBarriers,
256                                              imageMemoryBarrierCount, pImageMemoryBarriers);
257 }
258 
259 VKAPI_ATTR void VKAPI_CALL
vk_common_CmdWriteBufferMarkerAMD(VkCommandBuffer commandBuffer,VkPipelineStageFlagBits pipelineStage,VkBuffer dstBuffer,VkDeviceSize dstOffset,uint32_t marker)260 vk_common_CmdWriteBufferMarkerAMD(
261     VkCommandBuffer                             commandBuffer,
262     VkPipelineStageFlagBits                     pipelineStage,
263     VkBuffer                                    dstBuffer,
264     VkDeviceSize                                dstOffset,
265     uint32_t                                    marker)
266 {
267    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
268    struct vk_device *device = cmd_buffer->base.device;
269 
270    device->dispatch_table.CmdWriteBufferMarker2AMD(commandBuffer,
271                                                    (VkPipelineStageFlags2) pipelineStage,
272                                                    dstBuffer,
273                                                    dstOffset,
274                                                    marker);
275 }
276 
277 VKAPI_ATTR void VKAPI_CALL
vk_common_GetQueueCheckpointDataNV(VkQueue queue,uint32_t * pCheckpointDataCount,VkCheckpointDataNV * pCheckpointData)278 vk_common_GetQueueCheckpointDataNV(
279     VkQueue                                     queue,
280     uint32_t*                                   pCheckpointDataCount,
281     VkCheckpointDataNV*                         pCheckpointData)
282 {
283    unreachable("Entrypoint not implemented");
284 }
285 
286 VKAPI_ATTR VkResult VKAPI_CALL
vk_common_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)287 vk_common_QueueSubmit(
288     VkQueue                                     _queue,
289     uint32_t                                    submitCount,
290     const VkSubmitInfo*                         pSubmits,
291     VkFence                                     fence)
292 {
293    VK_FROM_HANDLE(vk_queue, queue, _queue);
294    struct vk_device *device = queue->base.device;
295 
296    STACK_ARRAY(VkSubmitInfo2, submit_info_2, submitCount);
297    STACK_ARRAY(VkPerformanceQuerySubmitInfoKHR, perf_query_submit_info, submitCount);
298    STACK_ARRAY(struct wsi_memory_signal_submit_info, wsi_mem_submit_info, submitCount);
299 
300    uint32_t n_wait_semaphores = 0;
301    uint32_t n_command_buffers = 0;
302    uint32_t n_signal_semaphores = 0;
303    for (uint32_t s = 0; s < submitCount; s++) {
304       n_wait_semaphores += pSubmits[s].waitSemaphoreCount;
305       n_command_buffers += pSubmits[s].commandBufferCount;
306       n_signal_semaphores += pSubmits[s].signalSemaphoreCount;
307    }
308 
309    STACK_ARRAY(VkSemaphoreSubmitInfo, wait_semaphores, n_wait_semaphores);
310    STACK_ARRAY(VkCommandBufferSubmitInfo, command_buffers, n_command_buffers);
311    STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphores, n_signal_semaphores);
312 
313    n_wait_semaphores = 0;
314    n_command_buffers = 0;
315    n_signal_semaphores = 0;
316 
317    for (uint32_t s = 0; s < submitCount; s++) {
318       const VkTimelineSemaphoreSubmitInfo *timeline_info =
319          vk_find_struct_const(pSubmits[s].pNext,
320                               TIMELINE_SEMAPHORE_SUBMIT_INFO);
321       const uint64_t *wait_values = NULL;
322       const uint64_t *signal_values = NULL;
323 
324       if (timeline_info && timeline_info->waitSemaphoreValueCount) {
325          /* From the Vulkan 1.3.204 spec:
326           *
327           *    VUID-VkSubmitInfo-pNext-03240
328           *
329           *    "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure
330           *    and any element of pSignalSemaphores was created with a VkSemaphoreType of
331           *    VK_SEMAPHORE_TYPE_TIMELINE, then its signalSemaphoreValueCount member must equal
332           *    signalSemaphoreCount"
333           */
334          assert(timeline_info->waitSemaphoreValueCount == pSubmits[s].waitSemaphoreCount);
335          wait_values = timeline_info->pWaitSemaphoreValues;
336       }
337 
338       if (timeline_info && timeline_info->signalSemaphoreValueCount) {
339          /* From the Vulkan 1.3.204 spec:
340           *
341           *    VUID-VkSubmitInfo-pNext-03241
342           *
343           *    "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure
344           *    and any element of pWaitSemaphores was created with a VkSemaphoreType of
345           *    VK_SEMAPHORE_TYPE_TIMELINE, then its waitSemaphoreValueCount member must equal
346           *    waitSemaphoreCount"
347           */
348          assert(timeline_info->signalSemaphoreValueCount == pSubmits[s].signalSemaphoreCount);
349          signal_values = timeline_info->pSignalSemaphoreValues;
350       }
351 
352       const VkDeviceGroupSubmitInfo *group_info =
353          vk_find_struct_const(pSubmits[s].pNext, DEVICE_GROUP_SUBMIT_INFO);
354 
355       for (uint32_t i = 0; i < pSubmits[s].waitSemaphoreCount; i++) {
356          wait_semaphores[n_wait_semaphores + i] = (VkSemaphoreSubmitInfo) {
357             .sType       = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
358             .semaphore   = pSubmits[s].pWaitSemaphores[i],
359             .value       = wait_values ? wait_values[i] : 0,
360             .stageMask   = pSubmits[s].pWaitDstStageMask[i],
361             .deviceIndex = group_info ? group_info->pWaitSemaphoreDeviceIndices[i] : 0,
362          };
363       }
364       for (uint32_t i = 0; i < pSubmits[s].commandBufferCount; i++) {
365          command_buffers[n_command_buffers + i] = (VkCommandBufferSubmitInfo) {
366             .sType         = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
367             .commandBuffer = pSubmits[s].pCommandBuffers[i],
368             .deviceMask    = group_info ? group_info->pCommandBufferDeviceMasks[i] : 0,
369          };
370       }
371       for (uint32_t i = 0; i < pSubmits[s].signalSemaphoreCount; i++) {
372          signal_semaphores[n_signal_semaphores + i] = (VkSemaphoreSubmitInfo) {
373             .sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
374             .semaphore = pSubmits[s].pSignalSemaphores[i],
375             .value     = signal_values ? signal_values[i] : 0,
376             .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
377             .deviceIndex = group_info ? group_info->pSignalSemaphoreDeviceIndices[i] : 0,
378          };
379       }
380 
381       const VkProtectedSubmitInfo *protected_info =
382          vk_find_struct_const(pSubmits[s].pNext, PROTECTED_SUBMIT_INFO);
383 
384       submit_info_2[s] = (VkSubmitInfo2) {
385          .sType                    = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
386          .flags                    = ((protected_info && protected_info->protectedSubmit) ?
387                                       VK_SUBMIT_PROTECTED_BIT : 0),
388          .waitSemaphoreInfoCount   = pSubmits[s].waitSemaphoreCount,
389          .pWaitSemaphoreInfos      = &wait_semaphores[n_wait_semaphores],
390          .commandBufferInfoCount   = pSubmits[s].commandBufferCount,
391          .pCommandBufferInfos      = &command_buffers[n_command_buffers],
392          .signalSemaphoreInfoCount = pSubmits[s].signalSemaphoreCount,
393          .pSignalSemaphoreInfos    = &signal_semaphores[n_signal_semaphores],
394       };
395 
396       const VkPerformanceQuerySubmitInfoKHR *query_info =
397          vk_find_struct_const(pSubmits[s].pNext,
398                               PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
399       if (query_info) {
400          perf_query_submit_info[s] = *query_info;
401          perf_query_submit_info[s].pNext = NULL;
402          __vk_append_struct(&submit_info_2[s], &perf_query_submit_info[s]);
403       }
404 
405       const struct wsi_memory_signal_submit_info *mem_signal_info =
406          vk_find_struct_const(pSubmits[s].pNext,
407                               WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
408       if (mem_signal_info) {
409          wsi_mem_submit_info[s] = *mem_signal_info;
410          wsi_mem_submit_info[s].pNext = NULL;
411          __vk_append_struct(&submit_info_2[s], &wsi_mem_submit_info[s]);
412       }
413 
414       n_wait_semaphores += pSubmits[s].waitSemaphoreCount;
415       n_command_buffers += pSubmits[s].commandBufferCount;
416       n_signal_semaphores += pSubmits[s].signalSemaphoreCount;
417    }
418 
419    VkResult result = device->dispatch_table.QueueSubmit2KHR(_queue,
420                                                             submitCount,
421                                                             submit_info_2,
422                                                             fence);
423 
424    STACK_ARRAY_FINISH(wait_semaphores);
425    STACK_ARRAY_FINISH(command_buffers);
426    STACK_ARRAY_FINISH(signal_semaphores);
427    STACK_ARRAY_FINISH(submit_info_2);
428    STACK_ARRAY_FINISH(perf_query_submit_info);
429    STACK_ARRAY_FINISH(wsi_mem_submit_info);
430 
431    return result;
432 }
433