1 // Copyright 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "CompressedImageInfo.h"
16 
17 #include "aemu/base/ArraySize.h"
18 #include "vulkan/VkFormatUtils.h"
19 #include "vulkan/emulated_textures/shaders/DecompressionShaders.h"
20 #include "vulkan/VkFormatUtils.h"
21 #include "vulkan/vk_enum_string_helper.h"
22 
23 namespace gfxstream {
24 namespace vk {
25 namespace {
26 
27 using emugl::ABORT_REASON_OTHER;
28 using emugl::FatalError;
29 
30 // Returns x / y, rounded up. E.g. ceil_div(7, 2) == 4
31 // Note the potential integer overflow for large numbers.
ceil_div(uint32_t x,uint32_t y)32 inline constexpr uint32_t ceil_div(uint32_t x, uint32_t y) { return (x + y - 1) / y; }
33 
createDefaultImageView(VulkanDispatch * vk,VkDevice device,VkImage image,VkFormat format,VkImageType imageType,uint32_t mipLevel,uint32_t layerCount)34 VkImageView createDefaultImageView(VulkanDispatch* vk, VkDevice device, VkImage image,
35                                    VkFormat format, VkImageType imageType, uint32_t mipLevel,
36                                    uint32_t layerCount) {
37     VkImageViewCreateInfo imageViewInfo = {
38         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
39         .image = image,
40         .format = format,
41         .components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
42                        VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY},
43         .subresourceRange =
44             {
45                 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
46                 .baseMipLevel = mipLevel,
47                 .levelCount = 1,
48                 .baseArrayLayer = 0,
49                 .layerCount = layerCount,
50             },
51     };
52 
53     switch (imageType) {
54         case VK_IMAGE_TYPE_1D:
55             imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_1D_ARRAY;
56             break;
57         case VK_IMAGE_TYPE_2D:
58             imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
59             break;
60         case VK_IMAGE_TYPE_3D:
61             imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
62             break;
63         default:
64             imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
65             break;
66     }
67     VkImageView imageView;
68     VkResult result = vk->vkCreateImageView(device, &imageViewInfo, nullptr, &imageView);
69     if (result != VK_SUCCESS) {
70         WARN("GPU decompression: createDefaultImageView failed: %d", result);
71         return VK_NULL_HANDLE;
72     }
73     return imageView;
74 }
75 
getBlockSize(VkFormat format)76 VkExtent2D getBlockSize(VkFormat format) {
77     switch (format) {
78         case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
79         case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
80         case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
81         case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
82         case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
83         case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
84         case VK_FORMAT_EAC_R11_UNORM_BLOCK:
85         case VK_FORMAT_EAC_R11_SNORM_BLOCK:
86         case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
87         case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
88             return {4, 4};
89         case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
90         case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
91             return {4, 4};
92         case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
93         case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
94             return {5, 4};
95         case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
96         case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
97             return {5, 5};
98         case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
99         case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
100             return {6, 5};
101         case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
102         case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
103             return {6, 6};
104         case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
105         case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
106             return {8, 5};
107         case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
108         case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
109             return {8, 6};
110         case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
111         case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
112             return {8, 8};
113         case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
114         case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
115             return {10, 5};
116         case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
117         case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
118             return {10, 6};
119         case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
120         case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
121             return {10, 8};
122         case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
123         case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
124             return {10, 10};
125         case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
126         case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
127             return {12, 10};
128         case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
129         case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
130             return {12, 12};
131         default:
132             return {1, 1};
133     }
134 }
135 
isReadableImageLayout(VkImageLayout layout)136 bool isReadableImageLayout(VkImageLayout layout) {
137     switch (layout) {
138         case VK_IMAGE_LAYOUT_GENERAL:
139         case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
140         case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
141         case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL_KHR:
142             return true;
143         default:
144             return false;
145     }
146 }
147 
isWritableImageLayout(VkImageLayout layout)148 bool isWritableImageLayout(VkImageLayout layout) {
149     switch (layout) {
150         case VK_IMAGE_LAYOUT_GENERAL:
151         case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
152             return true;
153         default:
154             return false;
155     }
156 }
157 
158 // Returns whether a given memory barrier puts the image in a layout where it can be read from.
imageWillBecomeReadable(const VkImageMemoryBarrier & barrier)159 bool imageWillBecomeReadable(const VkImageMemoryBarrier& barrier) {
160     bool fromReadable = isReadableImageLayout(barrier.oldLayout);
161     bool toReadable = isReadableImageLayout(barrier.newLayout);
162     bool toWritable = isWritableImageLayout(barrier.newLayout);
163 
164     // TODO(gregschlom) This doesn't take into account that the GENERAL layout is both readable and
165     //  writable, so this warning could incorrectly trigger some times.
166     if (fromReadable && toWritable) {
167         WARN(
168             "Compressed image is being transitioned from readable (%s) to writable (%s). This may "
169             "lead to unexpected results.",
170             string_VkImageLayout(barrier.oldLayout), string_VkImageLayout(barrier.newLayout));
171     }
172 
173     // If we're transitioning from UNDEFINED, the image content is undefined, so don't try to
174     // decompress it.
175     if (barrier.oldLayout == VK_IMAGE_LAYOUT_UNDEFINED) return false;
176 
177     // TODO(gregschlom): Address the corner case of GENERAL, which is both readable and writable.
178     // For example, the image could be transitioned only once, from UNDEFINED to GENERAL.
179     // Currently, there is no way to perform decompression in this case.
180 
181     return toReadable;
182 }
183 
isCompressedFormat(VkFormat format)184 bool isCompressedFormat(VkFormat format) {
185     return gfxstream::vk::isAstc(format) || gfxstream::vk::isEtc2(format) ||
186            gfxstream::vk::isBc(format);
187 }
188 
189 // Returns the format that the shader uses to write the output image
getShaderFormat(VkFormat outputFormat)190 VkFormat getShaderFormat(VkFormat outputFormat) {
191     switch (outputFormat) {
192         case VK_FORMAT_R16_UNORM:
193         case VK_FORMAT_R16_SNORM:
194         case VK_FORMAT_R16G16_UNORM:
195         case VK_FORMAT_R16G16_SNORM:
196             return outputFormat;
197         case VK_FORMAT_BC3_UNORM_BLOCK:
198         case VK_FORMAT_BC3_SRGB_BLOCK:
199             return VK_FORMAT_R32G32B32A32_UINT;
200         default:
201             return VK_FORMAT_R8G8B8A8_UINT;
202     }
203 }
204 
205 // Returns the next memory offset on a given alignment.
206 // Will divide by zero if alignment is zero.
nextAlignedOffset(VkDeviceSize offset,VkDeviceSize alignment)207 VkDeviceSize nextAlignedOffset(VkDeviceSize offset, VkDeviceSize alignment) {
208     return ceil_div(offset, alignment) * alignment;
209 }
210 
211 // Check that the alignment is valid:
212 // - sets the alignment to 1 if it's 0
213 // - aborts if it's not a power of 2
checkValidAlignment(VkDeviceSize & n)214 void checkValidAlignment(VkDeviceSize& n) {
215     if (n == 0) {
216         n = 1;
217         return;
218     }
219 
220     // Check that the alignment is a power of 2
221     // http://www.graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
222     if ((n & (n - 1))) {
223         GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER)) << "vkGetImageMemoryRequirements returned non-power-of-two alignment: " + std::to_string(n);
224     }
225 }
226 
227 }  // namespace
228 
CompressedImageInfo(VkDevice device)229 CompressedImageInfo::CompressedImageInfo(VkDevice device) : mDevice(device) {}
230 
CompressedImageInfo(VkDevice device,const VkImageCreateInfo & createInfo,GpuDecompressionPipelineManager * pipelineManager)231 CompressedImageInfo::CompressedImageInfo(VkDevice device, const VkImageCreateInfo& createInfo,
232                                          GpuDecompressionPipelineManager* pipelineManager)
233     : mCompressedFormat(createInfo.format),
234       mOutputFormat(getOutputFormat(mCompressedFormat)),
235       mCompressedMipmapsFormat(getCompressedMipmapsFormat(mCompressedFormat)),
236       mImageType(createInfo.imageType),
237       mMipLevels(createInfo.mipLevels),
238       mExtent(createInfo.extent),
239       mBlock(getBlockSize(mCompressedFormat)),
240       mLayerCount(createInfo.arrayLayers),
241       mDevice(device),
242       mPipelineManager(pipelineManager) {}
243 
244 // static
getOutputFormat(VkFormat compFmt)245 VkFormat CompressedImageInfo::getOutputFormat(VkFormat compFmt) {
246     switch (compFmt) {
247         case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
248         case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
249         case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
250             return VK_FORMAT_R8G8B8A8_UNORM;
251         case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
252         case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
253         case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
254             return VK_FORMAT_R8G8B8A8_SRGB;
255         case VK_FORMAT_EAC_R11_UNORM_BLOCK:
256             return VK_FORMAT_R16_UNORM;
257         case VK_FORMAT_EAC_R11_SNORM_BLOCK:
258             return VK_FORMAT_R16_SNORM;
259         case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
260             return VK_FORMAT_R16G16_UNORM;
261         case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
262             return VK_FORMAT_R16G16_SNORM;
263         case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
264         case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
265         case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
266         case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
267         case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
268         case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
269         case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
270         case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
271         case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
272         case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
273         case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
274         case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
275         case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
276         case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
277             return GpuDecompressionPipelineManager::astcDecoder() == AstcDecoder::NewBc3
278                        ? VK_FORMAT_BC3_UNORM_BLOCK
279                        : VK_FORMAT_R8G8B8A8_UNORM;
280         case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
281         case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
282         case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
283         case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
284         case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
285         case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
286         case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
287         case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
288         case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
289         case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
290         case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
291         case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
292         case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
293         case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
294             return GpuDecompressionPipelineManager::astcDecoder() == AstcDecoder::NewBc3
295                        ? VK_FORMAT_BC3_SRGB_BLOCK
296                        : VK_FORMAT_R8G8B8A8_SRGB;
297         default:
298             return compFmt;
299     }
300 }
301 
302 // static
getCompressedMipmapsFormat(VkFormat compFmt)303 VkFormat CompressedImageInfo::getCompressedMipmapsFormat(VkFormat compFmt) {
304     switch (compFmt) {
305         case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
306         case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
307         case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
308         case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
309             return VK_FORMAT_R16G16B16A16_UINT;
310         case VK_FORMAT_EAC_R11_UNORM_BLOCK:
311         case VK_FORMAT_EAC_R11_SNORM_BLOCK:
312             return VK_FORMAT_R32G32_UINT;
313         case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
314         case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
315         case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
316         case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
317         case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
318         case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
319         case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
320         case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
321         case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
322         case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
323         case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
324         case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
325         case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
326         case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
327         case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
328         case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
329         case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
330         case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
331         case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
332         case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
333         case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
334         case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
335         case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
336         case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
337         case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
338         case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
339         case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
340         case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
341         case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
342         case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
343         case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
344         case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
345             return VK_FORMAT_R32G32B32A32_UINT;
346         default:
347             return compFmt;
348     }
349 }
350 
351 // static
needEmulatedAlpha(VkFormat format)352 bool CompressedImageInfo::needEmulatedAlpha(VkFormat format) {
353     switch (format) {
354         case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
355         case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
356             return true;
357         default:
358             return false;
359     }
360 }
361 
isEtc2() const362 bool CompressedImageInfo::isEtc2() const { return gfxstream::vk::isEtc2(mCompressedFormat); }
363 
isAstc() const364 bool CompressedImageInfo::isAstc() const { return gfxstream::vk::isAstc(mCompressedFormat); }
365 
getOutputCreateInfo(const VkImageCreateInfo & createInfo) const366 VkImageCreateInfo CompressedImageInfo::getOutputCreateInfo(
367     const VkImageCreateInfo& createInfo) const {
368     VkImageCreateInfo result = createInfo;
369     result.format = mOutputFormat;
370 
371     result.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
372                     // Needed for ASTC->BC3 transcoding so that we can create a BC3 image with
373                     // VK_IMAGE_USAGE_STORAGE_BIT
374                     VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
375 
376     if (!isCompressedFormat(mOutputFormat)) {
377         // Need to clear this flag since the application might have specified it, but it's invalid
378         // on non-compressed formats
379         result.flags &= ~VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT;
380     } else {
381         // Need to set this flag so that we can cast the output image into a non-compressed format
382         // so that the decompression shader can write to it.
383         result.flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT;
384     }
385 
386     result.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
387     return result;
388 }
389 
createCompressedMipmapImages(VulkanDispatch * vk,const VkImageCreateInfo & createInfo)390 void CompressedImageInfo::createCompressedMipmapImages(VulkanDispatch* vk,
391                                                        const VkImageCreateInfo& createInfo) {
392     if (!mCompressedMipmaps.empty()) {
393         return;
394     }
395 
396     VkImageCreateInfo createInfoCopy = createInfo;
397     createInfoCopy.format = mCompressedMipmapsFormat;
398     // Note: if you change the flags here, you must also change both versions of
399     // on_vkGetPhysicalDeviceImageFormatProperties in VkDecoderGlobalState
400     // TODO(gregschlom): Remove duplicated logic.
401     createInfoCopy.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
402     createInfoCopy.flags &= ~VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT;
403     createInfoCopy.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
404     createInfoCopy.mipLevels = 1;
405 
406     mCompressedMipmaps.resize(mMipLevels);
407     for (uint32_t i = 0; i < mMipLevels; ++i) {
408         createInfoCopy.extent = compressedMipmapExtent(i);
409         vk->vkCreateImage(mDevice, &createInfoCopy, nullptr, &mCompressedMipmaps[i]);
410     }
411 
412     // Compute the memory requirements for all the images (output image + compressed mipmaps)
413 
414     vk->vkGetImageMemoryRequirements(mDevice, mOutputImage, &mMemoryRequirements);
415     checkValidAlignment(mMemoryRequirements.alignment);
416     std::vector<VkMemoryRequirements> mipmapsMemReqs(mMipLevels);
417     for (size_t i = 0; i < mMipLevels; ++i) {
418         vk->vkGetImageMemoryRequirements(mDevice, mCompressedMipmaps[i], &mipmapsMemReqs[i]);
419         checkValidAlignment(mipmapsMemReqs[i].alignment);
420     }
421 
422     for (const auto& r : mipmapsMemReqs) {
423         // What we want here is the least common multiple of all the alignments. However, since
424         // alignments are always powers of 2, the lcm is simply the largest value.
425         if (r.alignment > mMemoryRequirements.alignment) {
426             mMemoryRequirements.alignment = r.alignment;
427         }
428         mMemoryRequirements.memoryTypeBits &= r.memoryTypeBits;
429     }
430 
431     // At this point, we have the following:
432     //   - mMemoryRequirements.size is the size of the output image
433     //   - mMemoryRequirements.alignment is the least common multiple of all alignments
434     //   - mMemoryRequirements.memoryTypeBits is the intersection of all the memoryTypeBits
435     // Now, compute the offsets of each mipmap image as well as the total memory size we need.
436     mMipmapOffsets.resize(mMipLevels);
437     for (size_t i = 0; i < mMipLevels; ++i) {
438         // This works because the alignment we request is the lcm of all alignments
439         mMipmapOffsets[i] =
440             nextAlignedOffset(mMemoryRequirements.size, mipmapsMemReqs[i].alignment);
441         mMemoryRequirements.size = mMipmapOffsets[i] + mipmapsMemReqs[i].size;
442     }
443 }
444 
initAstcCpuDecompression(VulkanDispatch * vk,VkPhysicalDevice physicalDevice)445 void CompressedImageInfo::initAstcCpuDecompression(VulkanDispatch* vk,
446                                                    VkPhysicalDevice physicalDevice) {
447     mAstcTexture = std::make_unique<AstcTexture>(vk, mDevice, physicalDevice, mExtent, mBlock.width,
448                                                  mBlock.height, &AstcCpuDecompressor::get());
449 }
450 
decompressIfNeeded(VulkanDispatch * vk,VkCommandBuffer commandBuffer,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,const VkImageMemoryBarrier & targetBarrier,std::vector<VkImageMemoryBarrier> & outputBarriers)451 bool CompressedImageInfo::decompressIfNeeded(VulkanDispatch* vk, VkCommandBuffer commandBuffer,
452                                              VkPipelineStageFlags srcStageMask,
453                                              VkPipelineStageFlags dstStageMask,
454                                              const VkImageMemoryBarrier& targetBarrier,
455                                              std::vector<VkImageMemoryBarrier>& outputBarriers) {
456     std::vector<VkImageMemoryBarrier> imageBarriers = getImageBarriers(targetBarrier);
457 
458     if (!imageWillBecomeReadable(targetBarrier)) {
459         // We're not going to read from the image, no need to decompress it.
460         // Apply the target barrier to the compressed mipmaps and the decompressed image.
461         outputBarriers.insert(outputBarriers.end(), imageBarriers.begin(), imageBarriers.end());
462         return false;
463     }
464 
465     VkResult result = initializeDecompressionPipeline(vk, mDevice);
466     if (result != VK_SUCCESS) {
467         WARN("Failed to initialize pipeline for texture decompression");
468         return false;
469     }
470 
471     // Transition the layout of all the compressed mipmaps so that the shader can read from them.
472     for (auto& barrier : imageBarriers) {
473         barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
474         barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
475     }
476 
477     // Transition the layout of the output image so that we can write to it.
478     imageBarriers.back().srcAccessMask = 0;
479     imageBarriers.back().oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
480     imageBarriers.back().dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
481     imageBarriers.back().newLayout = VK_IMAGE_LAYOUT_GENERAL;
482 
483     // Do the layout transitions
484     vk->vkCmdPipelineBarrier(commandBuffer, srcStageMask, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
485                              0, nullptr, 0, nullptr, imageBarriers.size(), imageBarriers.data());
486 
487     // Run the decompression shader
488     decompress(vk, commandBuffer, getImageSubresourceRange(targetBarrier.subresourceRange));
489 
490     // Finally, transition the layout of all images to match the target barrier.
491     for (auto& barrier : imageBarriers) {
492         barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
493         barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
494         barrier.dstAccessMask = targetBarrier.dstAccessMask;
495         barrier.newLayout = targetBarrier.newLayout;
496     }
497     // (adjust the last barrier since it's for the output image)
498     imageBarriers.back().srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
499 
500     // Do the layout transitions
501     vk->vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dstStageMask, 0,
502                              0, nullptr, 0, nullptr, imageBarriers.size(), imageBarriers.data());
503 
504     return true;
505 }
506 
decompressOnCpu(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkBufferImageCopy * pRegions,const VkDecoderContext & context)507 void CompressedImageInfo::decompressOnCpu(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
508                                           size_t astcDataSize, VkImage dstImage,
509                                           VkImageLayout dstImageLayout, uint32_t regionCount,
510                                           const VkBufferImageCopy* pRegions,
511                                           const VkDecoderContext& context) {
512     mAstcTexture->on_vkCmdCopyBufferToImage(commandBuffer, srcAstcData, astcDataSize, dstImage,
513                                             dstImageLayout, regionCount, pRegions, context);
514 }
515 
decompressOnCpu(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkDecoderContext & context)516 void CompressedImageInfo::decompressOnCpu(VkCommandBuffer commandBuffer, uint8_t* srcAstcData, size_t astcDataSize,
517                          const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo, const VkDecoderContext& context) {
518     mAstcTexture->on_vkCmdCopyBufferToImage2(commandBuffer, srcAstcData, astcDataSize, pCopyBufferToImageInfo, context);
519 }
520 
getMemoryRequirements() const521 VkMemoryRequirements CompressedImageInfo::getMemoryRequirements() const {
522     return mMemoryRequirements;
523 }
524 
bindCompressedMipmapsMemory(VulkanDispatch * vk,VkDeviceMemory memory,VkDeviceSize memoryOffset)525 VkResult CompressedImageInfo::bindCompressedMipmapsMemory(VulkanDispatch* vk, VkDeviceMemory memory,
526                                                           VkDeviceSize memoryOffset) {
527     VkResult result = VK_SUCCESS;
528     for (size_t i = 0; i < mCompressedMipmaps.size(); i++) {
529         VkResult res = vk->vkBindImageMemory(mDevice, mCompressedMipmaps[i], memory,
530                                              memoryOffset + mMipmapOffsets[i]);
531         if (res != VK_SUCCESS) result = res;
532     }
533     return result;
534 }
535 
getBufferImageCopy(const VkBufferImageCopy & origRegion) const536 VkBufferImageCopy CompressedImageInfo::getBufferImageCopy(
537     const VkBufferImageCopy& origRegion) const {
538     VkBufferImageCopy region = origRegion;
539     uint32_t mipLevel = region.imageSubresource.mipLevel;
540     region.imageSubresource.mipLevel = 0;
541     region.bufferRowLength /= mBlock.width;
542     region.bufferImageHeight /= mBlock.height;
543     region.imageOffset.x /= mBlock.width;
544     region.imageOffset.y /= mBlock.height;
545     region.imageExtent = compressedMipmapPortion(region.imageExtent, mipLevel);
546     return region;
547 }
548 
getBufferImageCopy(const VkBufferImageCopy2 & origRegion) const549 VkBufferImageCopy2 CompressedImageInfo::getBufferImageCopy(
550     const VkBufferImageCopy2& origRegion) const {
551     VkBufferImageCopy2 region = origRegion;
552     uint32_t mipLevel = region.imageSubresource.mipLevel;
553     region.imageSubresource.mipLevel = 0;
554     region.bufferRowLength /= mBlock.width;
555     region.bufferImageHeight /= mBlock.height;
556     region.imageOffset.x /= mBlock.width;
557     region.imageOffset.y /= mBlock.height;
558     region.imageExtent = compressedMipmapPortion(region.imageExtent, mipLevel);
559     return region;
560 }
561 
562 // static
getCompressedMipmapsImageCopy(const VkImageCopy & origRegion,const CompressedImageInfo & srcImg,const CompressedImageInfo & dstImg,bool needEmulatedSrc,bool needEmulatedDst)563 VkImageCopy CompressedImageInfo::getCompressedMipmapsImageCopy(const VkImageCopy& origRegion,
564                                                                const CompressedImageInfo& srcImg,
565                                                                const CompressedImageInfo& dstImg,
566                                                                bool needEmulatedSrc,
567                                                                bool needEmulatedDst) {
568     VkImageCopy region = origRegion;
569     if (needEmulatedSrc) {
570         uint32_t mipLevel = region.srcSubresource.mipLevel;
571         region.srcSubresource.mipLevel = 0;
572         region.srcOffset.x /= srcImg.mBlock.width;
573         region.srcOffset.y /= srcImg.mBlock.height;
574         region.extent = srcImg.compressedMipmapPortion(region.extent, mipLevel);
575     }
576     if (needEmulatedDst) {
577         region.dstSubresource.mipLevel = 0;
578         region.dstOffset.x /= dstImg.mBlock.width;
579         region.dstOffset.y /= dstImg.mBlock.height;
580     }
581     return region;
582 }
583 
getCompressedMipmapsImageCopy(const VkImageCopy2 & origRegion,const CompressedImageInfo & srcImg,const CompressedImageInfo & dstImg,bool needEmulatedSrc,bool needEmulatedDst)584 VkImageCopy2 CompressedImageInfo::getCompressedMipmapsImageCopy(const VkImageCopy2& origRegion,
585                                                                 const CompressedImageInfo& srcImg,
586                                                                 const CompressedImageInfo& dstImg,
587                                                                 bool needEmulatedSrc,
588                                                                 bool needEmulatedDst) {
589     VkImageCopy2 region = origRegion;
590     if (needEmulatedSrc) {
591         uint32_t mipLevel = region.srcSubresource.mipLevel;
592         region.srcSubresource.mipLevel = 0;
593         region.srcOffset.x /= srcImg.mBlock.width;
594         region.srcOffset.y /= srcImg.mBlock.height;
595         region.extent = srcImg.compressedMipmapPortion(region.extent, mipLevel);
596     }
597     if (needEmulatedDst) {
598         region.dstSubresource.mipLevel = 0;
599         region.dstOffset.x /= dstImg.mBlock.width;
600         region.dstOffset.y /= dstImg.mBlock.height;
601     }
602     return region;
603 }
604 
destroy(VulkanDispatch * vk)605 void CompressedImageInfo::destroy(VulkanDispatch* vk) {
606     for (const auto& image : mCompressedMipmaps) {
607         vk->vkDestroyImage(mDevice, image, nullptr);
608     }
609     vk->vkDestroyDescriptorPool(mDevice, mDecompDescriptorPool, nullptr);
610     for (const auto& imageView : mCompressedMipmapsImageViews) {
611         vk->vkDestroyImageView(mDevice, imageView, nullptr);
612     }
613     for (const auto& imageView : mOutputImageViews) {
614         vk->vkDestroyImageView(mDevice, imageView, nullptr);
615     }
616     vk->vkDestroyImage(mDevice, mOutputImage, nullptr);
617 }
618 
getImageBarriers(const VkImageMemoryBarrier & srcBarrier)619 std::vector<VkImageMemoryBarrier> CompressedImageInfo::getImageBarriers(
620     const VkImageMemoryBarrier& srcBarrier) {
621     const VkImageSubresourceRange range = getImageSubresourceRange(srcBarrier.subresourceRange);
622 
623     std::vector<VkImageMemoryBarrier> imageBarriers;
624     imageBarriers.reserve(range.levelCount + 1);
625 
626     // Add the barriers for the compressed mipmaps
627     VkImageMemoryBarrier mipmapBarrier = srcBarrier;
628     mipmapBarrier.subresourceRange.baseMipLevel = 0;
629     mipmapBarrier.subresourceRange.levelCount = 1;
630     imageBarriers.insert(imageBarriers.begin(), range.levelCount, mipmapBarrier);
631     for (uint32_t j = 0; j < range.levelCount; j++) {
632         imageBarriers[j].image = mCompressedMipmaps[range.baseMipLevel + j];
633     }
634 
635     // Add a barrier for the output image
636     imageBarriers.push_back(srcBarrier);
637     imageBarriers.back().image = mOutputImage;
638 
639     return imageBarriers;
640 }
641 
getImageSubresourceRange(const VkImageSubresourceRange & range) const642 VkImageSubresourceRange CompressedImageInfo::getImageSubresourceRange(
643     const VkImageSubresourceRange& range) const {
644     VkImageSubresourceRange result = range;
645     if (result.levelCount == VK_REMAINING_MIP_LEVELS) {
646         result.levelCount = mMipLevels - range.baseMipLevel;
647     }
648     if (result.layerCount == VK_REMAINING_ARRAY_LAYERS) {
649         result.layerCount = mLayerCount - range.baseArrayLayer;
650     }
651     return result;
652 }
653 
initializeDecompressionPipeline(VulkanDispatch * vk,VkDevice device)654 VkResult CompressedImageInfo::initializeDecompressionPipeline(VulkanDispatch* vk, VkDevice device) {
655     if (mDecompPipelineInitialized) {
656         return VK_SUCCESS;
657     }
658 
659     mDecompPipeline = mPipelineManager->get(mCompressedFormat, mImageType);
660     if (mDecompPipeline == nullptr) {
661         ERR("Failed to initialize GPU decompression pipeline");
662         return VK_ERROR_INITIALIZATION_FAILED;
663     }
664 
665     VkDescriptorPoolSize poolSize = {
666         .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
667         .descriptorCount = 2 * mMipLevels,
668     };
669     VkDescriptorPoolCreateInfo dsPoolInfo = {
670         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
671         .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
672         .maxSets = mMipLevels,
673         .poolSizeCount = 1,
674         .pPoolSizes = &poolSize,
675     };
676     VkResult result =
677         vk->vkCreateDescriptorPool(device, &dsPoolInfo, nullptr, &mDecompDescriptorPool);
678     if (result != VK_SUCCESS) {
679         ERR("GPU decompression error. vkCreateDescriptorPool failed: %d", result);
680         return result;
681     }
682 
683     std::vector<VkDescriptorSetLayout> layouts(mMipLevels, mDecompPipeline->descriptorSetLayout());
684 
685     VkDescriptorSetAllocateInfo dsInfo = {
686         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
687         .descriptorPool = mDecompDescriptorPool,
688         .descriptorSetCount = mMipLevels,
689         .pSetLayouts = layouts.data(),
690     };
691     mDecompDescriptorSets.resize(mMipLevels);
692     result = vk->vkAllocateDescriptorSets(device, &dsInfo, mDecompDescriptorSets.data());
693     if (result != VK_SUCCESS) {
694         ERR("GPU decompression error. vkAllocateDescriptorSets failed: %d", result);
695         return result;
696     }
697 
698     VkFormat shaderFormat = getShaderFormat(mOutputFormat);
699     mCompressedMipmapsImageViews.resize(mMipLevels);
700     mOutputImageViews.resize(mMipLevels);
701 
702     VkDescriptorImageInfo compressedMipmapsDescriptorImageInfo = {.imageLayout =
703                                                                       VK_IMAGE_LAYOUT_GENERAL};
704     VkDescriptorImageInfo mDecompDescriptorImageInfo = {.imageLayout = VK_IMAGE_LAYOUT_GENERAL};
705     VkWriteDescriptorSet writeDescriptorSets[2] = {
706         {
707             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
708             .dstBinding = 0,
709             .descriptorCount = 1,
710             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
711             .pImageInfo = &compressedMipmapsDescriptorImageInfo,
712         },
713         {
714             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
715             .dstBinding = 1,
716             .descriptorCount = 1,
717             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
718             .pImageInfo = &mDecompDescriptorImageInfo,
719         }};
720 
721     for (uint32_t i = 0; i < mMipLevels; i++) {
722         mCompressedMipmapsImageViews[i] =
723             createDefaultImageView(vk, device, mCompressedMipmaps[i], mCompressedMipmapsFormat,
724                                    mImageType, 0, mLayerCount);
725         mOutputImageViews[i] = createDefaultImageView(vk, device, mOutputImage, shaderFormat,
726                                                       mImageType, i, mLayerCount);
727         compressedMipmapsDescriptorImageInfo.imageView = mCompressedMipmapsImageViews[i];
728         mDecompDescriptorImageInfo.imageView = mOutputImageViews[i];
729         writeDescriptorSets[0].dstSet = mDecompDescriptorSets[i];
730         writeDescriptorSets[1].dstSet = mDecompDescriptorSets[i];
731         vk->vkUpdateDescriptorSets(device, 2, writeDescriptorSets, 0, nullptr);
732     }
733 
734     mDecompPipelineInitialized = true;
735     return VK_SUCCESS;
736 }
737 
decompress(VulkanDispatch * vk,VkCommandBuffer commandBuffer,const VkImageSubresourceRange & range)738 void CompressedImageInfo::decompress(VulkanDispatch* vk, VkCommandBuffer commandBuffer,
739                                      const VkImageSubresourceRange& range) {
740     vk->vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
741                           mDecompPipeline->pipeline());
742     uint32_t dispatchZ = mExtent.depth == 1 ? range.layerCount : mExtent.depth;
743     bool perPixel = false;  // Whether the shader operates per compressed block or per pixel
744     if (isEtc2()) {
745         const Etc2PushConstant pushConstant = {
746             .compFormat = (uint32_t)mCompressedFormat,
747             .baseLayer = mExtent.depth == 1 ? range.baseArrayLayer : 0};
748         vk->vkCmdPushConstants(commandBuffer, mDecompPipeline->pipelineLayout(),
749                                VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pushConstant), &pushConstant);
750     } else if (isAstc()) {
751         uint32_t smallBlock = false;
752         switch (mCompressedFormat) {
753             case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
754             case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
755             case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
756             case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
757             case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
758             case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
759             case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
760             case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
761                 smallBlock = true;
762                 break;
763             default:
764                 break;
765         }
766         const AstcPushConstant pushConstant = {
767             .blockSize = {mBlock.width, mBlock.height},
768             .baseLayer = mExtent.depth == 1 ? range.baseArrayLayer : 0,
769             .smallBlock = smallBlock};
770         vk->vkCmdPushConstants(commandBuffer, mDecompPipeline->pipelineLayout(),
771                                VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pushConstant), &pushConstant);
772         // The old shader is per-block, the new shaders are per-pixel
773         perPixel = GpuDecompressionPipelineManager::astcDecoder() != AstcDecoder::Old;
774     }
775     for (uint32_t i = range.baseMipLevel; i < range.baseMipLevel + range.levelCount; i++) {
776         vk->vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
777                                     mDecompPipeline->pipelineLayout(), 0, 1,
778                                     mDecompDescriptorSets.data() + i, 0, nullptr);
779         VkExtent3D extent = perPixel ? mipmapExtent(i) : compressedMipmapExtent(i);
780         vk->vkCmdDispatch(commandBuffer, ceil_div(extent.width, 8), ceil_div(extent.height, 8),
781                           dispatchZ);
782     }
783 }
784 
mipmapExtent(uint32_t level) const785 VkExtent3D CompressedImageInfo::mipmapExtent(uint32_t level) const {
786     return {
787         .width = std::max<uint32_t>(mExtent.width >> level, 1),
788         .height = std::max<uint32_t>(mExtent.height >> level, 1),
789         .depth = std::max<uint32_t>(mExtent.depth >> level, 1),
790     };
791 }
792 
compressedMipmapExtent(uint32_t level) const793 VkExtent3D CompressedImageInfo::compressedMipmapExtent(uint32_t level) const {
794     VkExtent3D result = mipmapExtent(level);
795     result.width = ceil_div(result.width, mBlock.width);
796     result.height = ceil_div(result.height, mBlock.height);
797     return result;
798 }
799 
compressedMipmapPortion(const VkExtent3D & origExtent,uint32_t level) const800 VkExtent3D CompressedImageInfo::compressedMipmapPortion(const VkExtent3D& origExtent,
801                                                         uint32_t level) const {
802     VkExtent3D maxExtent = compressedMipmapExtent(level);
803     return {
804         .width = std::min(ceil_div(origExtent.width, mBlock.width), maxExtent.width),
805         .height = std::min(ceil_div(origExtent.height, mBlock.height), maxExtent.height),
806         // TODO(gregschlom): this is correct for 2DArrays, but incorrect for 3D images. We should
807         // take the image type into account to do the right thing here. See also
808         // https://android-review.git.corp.google.com/c/device/generic/vulkan-cereal/+/2458549/comment/cfc7480f_912dd378/
809         .depth = origExtent.depth,
810     };
811 }
812 
813 }  // namespace vk
814 }  // namespace gfxstream
815