1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25  * DEALINGS IN THE SOFTWARE.
26  */
27 #include "tu_private.h"
28 
29 #include "vk_util.h"
30 #include "vk_format.h"
31 
32 /* Return true if we have to fallback to sysmem rendering because the
33  * dependency can't be satisfied with tiled rendering.
34  */
35 
36 static bool
dep_invalid_for_gmem(const VkSubpassDependency2 * dep)37 dep_invalid_for_gmem(const VkSubpassDependency2 *dep)
38 {
39    /* External dependencies don't matter here. */
40    if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||
41        dep->dstSubpass == VK_SUBPASS_EXTERNAL)
42       return false;
43 
44    /* We can conceptually break down the process of rewriting a sysmem
45     * renderpass into a gmem one into two parts:
46     *
47     * 1. Split each draw and multisample resolve into N copies, one for each
48     * bin. (If hardware binning, add one more copy where the FS is disabled
49     * for the binning pass). This is always allowed because the vertex stage
50     * is allowed to run an arbitrary number of times and there are no extra
51     * ordering constraints within a draw.
52     * 2. Take the last copy of the second-to-last draw and slide it down to
53     * before the last copy of the last draw. Repeat for each earlier draw
54     * until the draw pass for the last bin is complete, then repeat for each
55     * earlier bin until we finish with the first bin.
56     *
57     * During this rearranging process, we can't slide draws past each other in
58     * a way that breaks the subpass dependencies. For each draw, we must slide
59     * it past (copies of) the rest of the draws in the renderpass. We can
60     * slide a draw past another if there isn't a dependency between them, or
61     * if the dependenc(ies) are dependencies between framebuffer-space stages
62     * only with the BY_REGION bit set. Note that this includes
63     * self-dependencies, since these may result in pipeline barriers that also
64     * break the rearranging process.
65     */
66 
67    /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
68     * Region Dependencies":
69     */
70    const VkPipelineStageFlags framebuffer_space_stages =
71       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
72       VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
73       VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
74       VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
75 
76    return
77       (dep->srcStageMask & ~framebuffer_space_stages) ||
78       (dep->dstStageMask & ~framebuffer_space_stages) ||
79       !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);
80 }
81 
82 static void
tu_render_pass_add_subpass_dep(struct tu_render_pass * pass,const VkSubpassDependency2 * dep)83 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
84                                const VkSubpassDependency2 *dep)
85 {
86    uint32_t src = dep->srcSubpass;
87    uint32_t dst = dep->dstSubpass;
88 
89    if (dep_invalid_for_gmem(dep))
90       pass->gmem_pixels = 0;
91 
92    /* Ignore subpass self-dependencies as they allow the app to call
93     * vkCmdPipelineBarrier() inside the render pass and the driver should only
94     * do the barrier when called, not when starting the render pass.
95     */
96    if (src == dst)
97       return;
98 
99    struct tu_subpass_barrier *src_barrier;
100    if (src == VK_SUBPASS_EXTERNAL) {
101       src_barrier = &pass->subpasses[0].start_barrier;
102    } else if (src == pass->subpass_count - 1) {
103       src_barrier = &pass->end_barrier;
104    } else {
105       src_barrier = &pass->subpasses[src + 1].start_barrier;
106    }
107 
108    struct tu_subpass_barrier *dst_barrier;
109    if (dst == VK_SUBPASS_EXTERNAL) {
110       dst_barrier = &pass->end_barrier;
111    } else {
112       dst_barrier = &pass->subpasses[dst].start_barrier;
113    }
114 
115    if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
116       src_barrier->src_stage_mask |= dep->srcStageMask;
117    src_barrier->src_access_mask |= dep->srcAccessMask;
118    dst_barrier->dst_access_mask |= dep->dstAccessMask;
119 }
120 
121 /* We currently only care about undefined layouts, because we have to
122  * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
123  * UNDEFINED for anything not linear tiled, but we don't know yet whether the
124  * images used are tiled, so just assume they are.
125  */
126 
127 static bool
layout_undefined(VkImageLayout layout)128 layout_undefined(VkImageLayout layout)
129 {
130    return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
131           layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
132 }
133 
134 /* This implements the following bit of spec text:
135  *
136  *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
137  *    first subpass that uses an attachment, then an implicit subpass
138  *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
139  *    used in. The implicit subpass dependency only exists if there
140  *    exists an automatic layout transition away from initialLayout.
141  *    The subpass dependency operates as if defined with the
142  *    following parameters:
143  *
144  *    VkSubpassDependency implicitDependency = {
145  *        .srcSubpass = VK_SUBPASS_EXTERNAL;
146  *        .dstSubpass = firstSubpass; // First subpass attachment is used in
147  *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
148  *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
149  *        .srcAccessMask = 0;
150  *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
151  *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
152  *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
153  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
154  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
155  *        .dependencyFlags = 0;
156  *    };
157  *
158  *    Similarly, if there is no subpass dependency from the last subpass
159  *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
160  *    subpass dependency exists from the last subpass it is used in to
161  *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
162  *    if there exists an automatic layout transition into finalLayout.
163  *    The subpass dependency operates as if defined with the following
164  *    parameters:
165  *
166  *    VkSubpassDependency implicitDependency = {
167  *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
168  *        .dstSubpass = VK_SUBPASS_EXTERNAL;
169  *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
170  *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
171  *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
172  *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173  *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
174  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
175  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
176  *        .dstAccessMask = 0;
177  *        .dependencyFlags = 0;
178  *    };
179  *
180  * Note: currently this is the only use we have for layout transitions,
181  * besides needing to invalidate CCU at the beginning, so we also flag
182  * transitions from UNDEFINED here.
183  */
184 static void
tu_render_pass_add_implicit_deps(struct tu_render_pass * pass,const VkRenderPassCreateInfo2 * info)185 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
186                                  const VkRenderPassCreateInfo2 *info)
187 {
188    const VkAttachmentDescription2* att = info->pAttachments;
189    bool has_external_src[info->subpassCount];
190    bool has_external_dst[info->subpassCount];
191    bool att_used[pass->attachment_count];
192 
193    memset(has_external_src, 0, sizeof(has_external_src));
194    memset(has_external_dst, 0, sizeof(has_external_dst));
195 
196    for (uint32_t i = 0; i < info->dependencyCount; i++) {
197       uint32_t src = info->pDependencies[i].srcSubpass;
198       uint32_t dst = info->pDependencies[i].dstSubpass;
199 
200       if (src == dst)
201          continue;
202 
203       if (src == VK_SUBPASS_EXTERNAL)
204          has_external_src[dst] = true;
205       if (dst == VK_SUBPASS_EXTERNAL)
206          has_external_dst[src] = true;
207    }
208 
209    memset(att_used, 0, sizeof(att_used));
210 
211    for (unsigned i = 0; i < info->subpassCount; i++) {
212       if (!has_external_src[i])
213          continue;
214 
215       const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
216       bool src_implicit_dep = false;
217 
218       for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
219          uint32_t a = subpass->pInputAttachments[j].attachment;
220          if (a == VK_ATTACHMENT_UNUSED)
221             continue;
222          if (att[a].initialLayout != subpass->pInputAttachments[j].layout && !att_used[a])
223             src_implicit_dep = true;
224          att_used[a] = true;
225       }
226 
227       for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
228          uint32_t a = subpass->pColorAttachments[j].attachment;
229          if (a == VK_ATTACHMENT_UNUSED)
230             continue;
231          if (att[a].initialLayout != subpass->pColorAttachments[j].layout && !att_used[a])
232             src_implicit_dep = true;
233          att_used[a] = true;
234       }
235 
236       if (subpass->pResolveAttachments) {
237          for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
238             uint32_t a = subpass->pResolveAttachments[j].attachment;
239             if (a == VK_ATTACHMENT_UNUSED)
240                continue;
241             if (att[a].initialLayout != subpass->pResolveAttachments[j].layout && !att_used[a])
242                src_implicit_dep = true;
243             att_used[a] = true;
244          }
245       }
246 
247       if (src_implicit_dep) {
248          tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
249             .srcSubpass = VK_SUBPASS_EXTERNAL,
250             .dstSubpass = i,
251             .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
252             .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
253             .srcAccessMask = 0,
254             .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
255                              VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
256                              VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
257                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
258                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
259             .dependencyFlags = 0,
260          });
261       }
262    }
263 
264    memset(att_used, 0, sizeof(att_used));
265 
266    for (int i = info->subpassCount - 1; i >= 0; i--) {
267       if (!has_external_dst[i])
268          continue;
269 
270       const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
271       bool dst_implicit_dep = false;
272 
273       for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
274          uint32_t a = subpass->pInputAttachments[j].attachment;
275          if (a == VK_ATTACHMENT_UNUSED)
276             continue;
277          if (att[a].finalLayout != subpass->pInputAttachments[j].layout && !att_used[a])
278             dst_implicit_dep = true;
279          att_used[a] = true;
280       }
281 
282       for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
283          uint32_t a = subpass->pColorAttachments[j].attachment;
284          if (a == VK_ATTACHMENT_UNUSED)
285             continue;
286          if (att[a].finalLayout != subpass->pColorAttachments[j].layout && !att_used[a])
287             dst_implicit_dep = true;
288          att_used[a] = true;
289       }
290 
291       if (subpass->pResolveAttachments) {
292          for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
293             uint32_t a = subpass->pResolveAttachments[j].attachment;
294             if (a == VK_ATTACHMENT_UNUSED)
295                continue;
296             if (att[a].finalLayout != subpass->pResolveAttachments[j].layout && !att_used[a])
297                dst_implicit_dep = true;
298             att_used[a] = true;
299          }
300       }
301 
302       if (dst_implicit_dep) {
303          tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
304             .srcSubpass = i,
305             .dstSubpass = VK_SUBPASS_EXTERNAL,
306             .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
307             .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
308             .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
309                              VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
310                              VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
311                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
312                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
313             .dstAccessMask = 0,
314             .dependencyFlags = 0,
315          });
316       }
317    }
318 
319    /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
320     * Assume that if an attachment has an initial layout of UNDEFINED, it gets
321     * transitioned eventually.
322     */
323    for (unsigned i = 0; i < info->attachmentCount; i++) {
324       if (layout_undefined(att[i].initialLayout)) {
325          if (vk_format_is_depth_or_stencil(att[i].format)) {
326             pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
327          } else {
328             pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
329          }
330       }
331    }
332 }
333 
update_samples(struct tu_subpass * subpass,VkSampleCountFlagBits samples)334 static void update_samples(struct tu_subpass *subpass,
335                            VkSampleCountFlagBits samples)
336 {
337    assert(subpass->samples == 0 || subpass->samples == samples);
338    subpass->samples = samples;
339 }
340 
341 static void
tu_render_pass_gmem_config(struct tu_render_pass * pass,const struct tu_physical_device * phys_dev)342 tu_render_pass_gmem_config(struct tu_render_pass *pass,
343                            const struct tu_physical_device *phys_dev)
344 {
345    uint32_t block_align_shift = 3; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
346    uint32_t tile_align_w = phys_dev->info.tile_align_w;
347    uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * phys_dev->info.tile_align_h;
348 
349    /* calculate total bytes per pixel */
350    uint32_t cpp_total = 0;
351    for (uint32_t i = 0; i < pass->attachment_count; i++) {
352       struct tu_render_pass_attachment *att = &pass->attachments[i];
353       bool cpp1 = (att->cpp == 1);
354       if (att->gmem_offset >= 0) {
355          cpp_total += att->cpp;
356 
357          /* take into account the separate stencil: */
358          if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
359             cpp1 = (att->samples == 1);
360             cpp_total += att->samples;
361          }
362 
363          /* texture pitch must be aligned to 64, use a tile_align_w that is
364           * a multiple of 64 for cpp==1 attachment to work as input attachment
365           */
366          if (cpp1 && tile_align_w % 64 != 0) {
367             tile_align_w *= 2;
368             block_align_shift -= 1;
369          }
370       }
371    }
372 
373    pass->tile_align_w = tile_align_w;
374 
375    /* no gmem attachments */
376    if (cpp_total == 0) {
377       /* any value non-zero value so tiling config works with no attachments */
378       pass->gmem_pixels = 1024*1024;
379       return;
380    }
381 
382    /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
383     * doesn't break things. maybe there is a better solution?
384     * TODO: this algorithm isn't optimal
385     * for example, two attachments with cpp = {1, 4}
386     * result:  nblocks = {12, 52}, pixels = 196608
387     * optimal: nblocks = {13, 51}, pixels = 208896
388     */
389    uint32_t gmem_blocks = phys_dev->info.a6xx.ccu_offset_gmem / gmem_align;
390    uint32_t offset = 0, pixels = ~0u, i;
391    for (i = 0; i < pass->attachment_count; i++) {
392       struct tu_render_pass_attachment *att = &pass->attachments[i];
393       if (att->gmem_offset < 0)
394          continue;
395 
396       att->gmem_offset = offset;
397 
398       uint32_t align = MAX2(1, att->cpp >> block_align_shift);
399       uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
400 
401       if (nblocks > gmem_blocks)
402          break;
403 
404       gmem_blocks -= nblocks;
405       cpp_total -= att->cpp;
406       offset += nblocks * gmem_align;
407       pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
408 
409       /* repeat the same for separate stencil */
410       if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
411          att->gmem_offset_stencil = offset;
412 
413          /* note: for s8_uint, block align is always 1 */
414          uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
415          if (nblocks > gmem_blocks)
416             break;
417 
418          gmem_blocks -= nblocks;
419          cpp_total -= att->samples;
420          offset += nblocks * gmem_align;
421          pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
422       }
423    }
424 
425    /* if the loop didn't complete then the gmem config is impossible */
426    if (i == pass->attachment_count)
427       pass->gmem_pixels = pixels;
428 }
429 
430 static void
attachment_set_ops(struct tu_render_pass_attachment * att,VkAttachmentLoadOp load_op,VkAttachmentLoadOp stencil_load_op,VkAttachmentStoreOp store_op,VkAttachmentStoreOp stencil_store_op)431 attachment_set_ops(struct tu_render_pass_attachment *att,
432                    VkAttachmentLoadOp load_op,
433                    VkAttachmentLoadOp stencil_load_op,
434                    VkAttachmentStoreOp store_op,
435                    VkAttachmentStoreOp stencil_store_op)
436 {
437    /* load/store ops */
438    att->clear_mask =
439       (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
440    att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
441    att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
442 
443    bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
444    bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
445    bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
446 
447    switch (att->format) {
448    case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
449       if (att->clear_mask)
450          att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
451       if (stencil_clear)
452          att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
453       if (stencil_load)
454          att->load = true;
455       if (stencil_store)
456          att->store = true;
457       break;
458    case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
459       att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
460       att->load = stencil_load;
461       att->store = stencil_store;
462       break;
463    case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
464       if (att->clear_mask)
465          att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
466       if (stencil_clear)
467          att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
468       if (stencil_load)
469          att->load_stencil = true;
470       if (stencil_store)
471          att->store_stencil = true;
472       break;
473    default:
474       break;
475    }
476 }
477 
478 VkResult
tu_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2KHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)479 tu_CreateRenderPass2(VkDevice _device,
480                      const VkRenderPassCreateInfo2KHR *pCreateInfo,
481                      const VkAllocationCallbacks *pAllocator,
482                      VkRenderPass *pRenderPass)
483 {
484    TU_FROM_HANDLE(tu_device, device, _device);
485    struct tu_render_pass *pass;
486    size_t size;
487    size_t attachments_offset;
488 
489    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
490 
491    size = sizeof(*pass);
492    size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
493    attachments_offset = size;
494    size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
495 
496    pass = vk_object_zalloc(&device->vk, pAllocator, size,
497                            VK_OBJECT_TYPE_RENDER_PASS);
498    if (pass == NULL)
499       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
500 
501    pass->attachment_count = pCreateInfo->attachmentCount;
502    pass->subpass_count = pCreateInfo->subpassCount;
503    pass->attachments = (void *) pass + attachments_offset;
504 
505    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
506       struct tu_render_pass_attachment *att = &pass->attachments[i];
507 
508       att->format = pCreateInfo->pAttachments[i].format;
509       att->samples = pCreateInfo->pAttachments[i].samples;
510       /* for d32s8, cpp is for the depth image, and
511        * att->samples will be used as the cpp for the stencil image
512        */
513       if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
514          att->cpp = 4 * att->samples;
515       else
516          att->cpp = vk_format_get_blocksize(att->format) * att->samples;
517       att->gmem_offset = -1;
518 
519       attachment_set_ops(att,
520                          pCreateInfo->pAttachments[i].loadOp,
521                          pCreateInfo->pAttachments[i].stencilLoadOp,
522                          pCreateInfo->pAttachments[i].storeOp,
523                          pCreateInfo->pAttachments[i].stencilStoreOp);
524    }
525    uint32_t subpass_attachment_count = 0;
526    struct tu_subpass_attachment *p;
527    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
528       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
529 
530       subpass_attachment_count +=
531          desc->inputAttachmentCount + desc->colorAttachmentCount +
532          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
533    }
534 
535    if (subpass_attachment_count) {
536       pass->subpass_attachments = vk_alloc2(
537          &device->vk.alloc, pAllocator,
538          subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
539          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
540       if (pass->subpass_attachments == NULL) {
541          vk_object_free(&device->vk, pAllocator, pass);
542          return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
543       }
544    } else
545       pass->subpass_attachments = NULL;
546 
547    p = pass->subpass_attachments;
548    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
549       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
550       struct tu_subpass *subpass = &pass->subpasses[i];
551 
552       subpass->input_count = desc->inputAttachmentCount;
553       subpass->color_count = desc->colorAttachmentCount;
554       subpass->samples = 0;
555       subpass->srgb_cntl = 0;
556 
557       subpass->multiview_mask = desc->viewMask;
558 
559       if (desc->inputAttachmentCount > 0) {
560          subpass->input_attachments = p;
561          p += desc->inputAttachmentCount;
562 
563          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
564             uint32_t a = desc->pInputAttachments[j].attachment;
565             subpass->input_attachments[j].attachment = a;
566             if (a != VK_ATTACHMENT_UNUSED)
567                pass->attachments[a].gmem_offset = 0;
568          }
569       }
570 
571       if (desc->colorAttachmentCount > 0) {
572          subpass->color_attachments = p;
573          p += desc->colorAttachmentCount;
574 
575          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
576             uint32_t a = desc->pColorAttachments[j].attachment;
577             subpass->color_attachments[j].attachment = a;
578 
579             if (a != VK_ATTACHMENT_UNUSED) {
580                pass->attachments[a].gmem_offset = 0;
581                update_samples(subpass, pCreateInfo->pAttachments[a].samples);
582 
583                if (vk_format_is_srgb(pass->attachments[a].format))
584                   subpass->srgb_cntl |= 1 << j;
585 
586                pass->attachments[a].clear_views |= subpass->multiview_mask;
587             }
588          }
589       }
590 
591       subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
592       if (desc->pResolveAttachments) {
593          p += desc->colorAttachmentCount;
594          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
595             subpass->resolve_attachments[j].attachment =
596                   desc->pResolveAttachments[j].attachment;
597          }
598       }
599 
600 
601       uint32_t a = desc->pDepthStencilAttachment ?
602          desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
603       subpass->depth_stencil_attachment.attachment = a;
604       if (a != VK_ATTACHMENT_UNUSED) {
605             pass->attachments[a].gmem_offset = 0;
606             update_samples(subpass, pCreateInfo->pAttachments[a].samples);
607       }
608 
609       subpass->samples = subpass->samples ?: 1;
610    }
611 
612    /* disable unused attachments */
613    for (uint32_t i = 0; i < pass->attachment_count; i++) {
614       struct tu_render_pass_attachment *att = &pass->attachments[i];
615       if (att->gmem_offset < 0) {
616          att->clear_mask = 0;
617          att->load = false;
618       }
619    }
620 
621    /* From the VK_KHR_multiview spec:
622     *
623     *    Multiview is all-or-nothing for a render pass - that is, either all
624     *    subpasses must have a non-zero view mask (though some subpasses may
625     *    have only one view) or all must be zero.
626     *
627     * This means we only have to check one of the view masks.
628     */
629    if (pCreateInfo->pSubpasses[0].viewMask) {
630       /* It seems multiview must use sysmem rendering. */
631       pass->gmem_pixels = 0;
632    } else {
633       tu_render_pass_gmem_config(pass, device->physical_device);
634    }
635 
636    for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
637       tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
638    }
639 
640    tu_render_pass_add_implicit_deps(pass, pCreateInfo);
641 
642    *pRenderPass = tu_render_pass_to_handle(pass);
643 
644    return VK_SUCCESS;
645 }
646 
647 void
tu_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)648 tu_DestroyRenderPass(VkDevice _device,
649                      VkRenderPass _pass,
650                      const VkAllocationCallbacks *pAllocator)
651 {
652    TU_FROM_HANDLE(tu_device, device, _device);
653    TU_FROM_HANDLE(tu_render_pass, pass, _pass);
654 
655    if (!_pass)
656       return;
657 
658    vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
659    vk_object_free(&device->vk, pAllocator, pass);
660 }
661 
662 void
tu_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)663 tu_GetRenderAreaGranularity(VkDevice _device,
664                             VkRenderPass renderPass,
665                             VkExtent2D *pGranularity)
666 {
667    TU_FROM_HANDLE(tu_device, device, _device);
668    pGranularity->width = device->physical_device->info.gmem_align_w;
669    pGranularity->height = device->physical_device->info.gmem_align_h;
670 }
671