1 // Copyright 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "CompressedImageInfo.h"
16
17 #include "aemu/base/ArraySize.h"
18 #include "vulkan/VkFormatUtils.h"
19 #include "vulkan/emulated_textures/shaders/DecompressionShaders.h"
20 #include "vulkan/VkFormatUtils.h"
21 #include "vulkan/vk_enum_string_helper.h"
22
23 namespace gfxstream {
24 namespace vk {
25 namespace {
26
27 using emugl::ABORT_REASON_OTHER;
28 using emugl::FatalError;
29
30 // Returns x / y, rounded up. E.g. ceil_div(7, 2) == 4
31 // Note the potential integer overflow for large numbers.
ceil_div(uint32_t x,uint32_t y)32 inline constexpr uint32_t ceil_div(uint32_t x, uint32_t y) { return (x + y - 1) / y; }
33
createDefaultImageView(VulkanDispatch * vk,VkDevice device,VkImage image,VkFormat format,VkImageType imageType,uint32_t mipLevel,uint32_t layerCount)34 VkImageView createDefaultImageView(VulkanDispatch* vk, VkDevice device, VkImage image,
35 VkFormat format, VkImageType imageType, uint32_t mipLevel,
36 uint32_t layerCount) {
37 VkImageViewCreateInfo imageViewInfo = {
38 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
39 .image = image,
40 .format = format,
41 .components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
42 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY},
43 .subresourceRange =
44 {
45 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
46 .baseMipLevel = mipLevel,
47 .levelCount = 1,
48 .baseArrayLayer = 0,
49 .layerCount = layerCount,
50 },
51 };
52
53 switch (imageType) {
54 case VK_IMAGE_TYPE_1D:
55 imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_1D_ARRAY;
56 break;
57 case VK_IMAGE_TYPE_2D:
58 imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
59 break;
60 case VK_IMAGE_TYPE_3D:
61 imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
62 break;
63 default:
64 imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
65 break;
66 }
67 VkImageView imageView;
68 VkResult result = vk->vkCreateImageView(device, &imageViewInfo, nullptr, &imageView);
69 if (result != VK_SUCCESS) {
70 WARN("GPU decompression: createDefaultImageView failed: %d", result);
71 return VK_NULL_HANDLE;
72 }
73 return imageView;
74 }
75
getBlockSize(VkFormat format)76 VkExtent2D getBlockSize(VkFormat format) {
77 switch (format) {
78 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
79 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
80 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
81 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
82 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
83 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
84 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
85 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
86 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
87 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
88 return {4, 4};
89 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
90 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
91 return {4, 4};
92 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
93 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
94 return {5, 4};
95 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
96 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
97 return {5, 5};
98 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
99 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
100 return {6, 5};
101 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
102 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
103 return {6, 6};
104 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
105 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
106 return {8, 5};
107 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
108 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
109 return {8, 6};
110 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
111 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
112 return {8, 8};
113 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
114 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
115 return {10, 5};
116 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
117 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
118 return {10, 6};
119 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
120 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
121 return {10, 8};
122 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
123 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
124 return {10, 10};
125 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
126 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
127 return {12, 10};
128 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
129 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
130 return {12, 12};
131 default:
132 return {1, 1};
133 }
134 }
135
isReadableImageLayout(VkImageLayout layout)136 bool isReadableImageLayout(VkImageLayout layout) {
137 switch (layout) {
138 case VK_IMAGE_LAYOUT_GENERAL:
139 case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
140 case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
141 case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL_KHR:
142 return true;
143 default:
144 return false;
145 }
146 }
147
isWritableImageLayout(VkImageLayout layout)148 bool isWritableImageLayout(VkImageLayout layout) {
149 switch (layout) {
150 case VK_IMAGE_LAYOUT_GENERAL:
151 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
152 return true;
153 default:
154 return false;
155 }
156 }
157
158 // Returns whether a given memory barrier puts the image in a layout where it can be read from.
imageWillBecomeReadable(const VkImageMemoryBarrier & barrier)159 bool imageWillBecomeReadable(const VkImageMemoryBarrier& barrier) {
160 bool fromReadable = isReadableImageLayout(barrier.oldLayout);
161 bool toReadable = isReadableImageLayout(barrier.newLayout);
162 bool toWritable = isWritableImageLayout(barrier.newLayout);
163
164 // TODO(gregschlom) This doesn't take into account that the GENERAL layout is both readable and
165 // writable, so this warning could incorrectly trigger some times.
166 if (fromReadable && toWritable) {
167 WARN(
168 "Compressed image is being transitioned from readable (%s) to writable (%s). This may "
169 "lead to unexpected results.",
170 string_VkImageLayout(barrier.oldLayout), string_VkImageLayout(barrier.newLayout));
171 }
172
173 // If we're transitioning from UNDEFINED, the image content is undefined, so don't try to
174 // decompress it.
175 if (barrier.oldLayout == VK_IMAGE_LAYOUT_UNDEFINED) return false;
176
177 // TODO(gregschlom): Address the corner case of GENERAL, which is both readable and writable.
178 // For example, the image could be transitioned only once, from UNDEFINED to GENERAL.
179 // Currently, there is no way to perform decompression in this case.
180
181 return toReadable;
182 }
183
isCompressedFormat(VkFormat format)184 bool isCompressedFormat(VkFormat format) {
185 return gfxstream::vk::isAstc(format) || gfxstream::vk::isEtc2(format) ||
186 gfxstream::vk::isBc(format);
187 }
188
189 // Returns the format that the shader uses to write the output image
getShaderFormat(VkFormat outputFormat)190 VkFormat getShaderFormat(VkFormat outputFormat) {
191 switch (outputFormat) {
192 case VK_FORMAT_R16_UNORM:
193 case VK_FORMAT_R16_SNORM:
194 case VK_FORMAT_R16G16_UNORM:
195 case VK_FORMAT_R16G16_SNORM:
196 return outputFormat;
197 case VK_FORMAT_BC3_UNORM_BLOCK:
198 case VK_FORMAT_BC3_SRGB_BLOCK:
199 return VK_FORMAT_R32G32B32A32_UINT;
200 default:
201 return VK_FORMAT_R8G8B8A8_UINT;
202 }
203 }
204
205 // Returns the next memory offset on a given alignment.
206 // Will divide by zero if alignment is zero.
nextAlignedOffset(VkDeviceSize offset,VkDeviceSize alignment)207 VkDeviceSize nextAlignedOffset(VkDeviceSize offset, VkDeviceSize alignment) {
208 return ceil_div(offset, alignment) * alignment;
209 }
210
211 // Check that the alignment is valid:
212 // - sets the alignment to 1 if it's 0
213 // - aborts if it's not a power of 2
checkValidAlignment(VkDeviceSize & n)214 void checkValidAlignment(VkDeviceSize& n) {
215 if (n == 0) {
216 n = 1;
217 return;
218 }
219
220 // Check that the alignment is a power of 2
221 // http://www.graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
222 if ((n & (n - 1))) {
223 GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER)) << "vkGetImageMemoryRequirements returned non-power-of-two alignment: " + std::to_string(n);
224 }
225 }
226
227 } // namespace
228
CompressedImageInfo(VkDevice device)229 CompressedImageInfo::CompressedImageInfo(VkDevice device) : mDevice(device) {}
230
CompressedImageInfo(VkDevice device,const VkImageCreateInfo & createInfo,GpuDecompressionPipelineManager * pipelineManager)231 CompressedImageInfo::CompressedImageInfo(VkDevice device, const VkImageCreateInfo& createInfo,
232 GpuDecompressionPipelineManager* pipelineManager)
233 : mCompressedFormat(createInfo.format),
234 mOutputFormat(getOutputFormat(mCompressedFormat)),
235 mCompressedMipmapsFormat(getCompressedMipmapsFormat(mCompressedFormat)),
236 mImageType(createInfo.imageType),
237 mMipLevels(createInfo.mipLevels),
238 mExtent(createInfo.extent),
239 mBlock(getBlockSize(mCompressedFormat)),
240 mLayerCount(createInfo.arrayLayers),
241 mDevice(device),
242 mPipelineManager(pipelineManager) {}
243
244 // static
getOutputFormat(VkFormat compFmt)245 VkFormat CompressedImageInfo::getOutputFormat(VkFormat compFmt) {
246 switch (compFmt) {
247 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
248 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
249 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
250 return VK_FORMAT_R8G8B8A8_UNORM;
251 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
252 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
253 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
254 return VK_FORMAT_R8G8B8A8_SRGB;
255 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
256 return VK_FORMAT_R16_UNORM;
257 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
258 return VK_FORMAT_R16_SNORM;
259 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
260 return VK_FORMAT_R16G16_UNORM;
261 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
262 return VK_FORMAT_R16G16_SNORM;
263 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
264 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
265 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
266 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
267 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
268 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
269 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
270 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
271 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
272 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
273 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
274 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
275 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
276 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
277 return GpuDecompressionPipelineManager::astcDecoder() == AstcDecoder::NewBc3
278 ? VK_FORMAT_BC3_UNORM_BLOCK
279 : VK_FORMAT_R8G8B8A8_UNORM;
280 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
281 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
282 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
283 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
284 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
285 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
286 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
287 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
288 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
289 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
290 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
291 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
292 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
293 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
294 return GpuDecompressionPipelineManager::astcDecoder() == AstcDecoder::NewBc3
295 ? VK_FORMAT_BC3_SRGB_BLOCK
296 : VK_FORMAT_R8G8B8A8_SRGB;
297 default:
298 return compFmt;
299 }
300 }
301
302 // static
getCompressedMipmapsFormat(VkFormat compFmt)303 VkFormat CompressedImageInfo::getCompressedMipmapsFormat(VkFormat compFmt) {
304 switch (compFmt) {
305 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
306 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
307 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
308 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
309 return VK_FORMAT_R16G16B16A16_UINT;
310 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
311 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
312 return VK_FORMAT_R32G32_UINT;
313 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
314 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
315 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
316 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
317 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
318 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
319 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
320 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
321 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
322 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
323 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
324 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
325 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
326 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
327 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
328 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
329 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
330 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
331 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
332 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
333 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
334 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
335 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
336 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
337 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
338 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
339 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
340 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
341 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
342 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
343 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
344 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
345 return VK_FORMAT_R32G32B32A32_UINT;
346 default:
347 return compFmt;
348 }
349 }
350
351 // static
needEmulatedAlpha(VkFormat format)352 bool CompressedImageInfo::needEmulatedAlpha(VkFormat format) {
353 switch (format) {
354 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
355 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
356 return true;
357 default:
358 return false;
359 }
360 }
361
isEtc2() const362 bool CompressedImageInfo::isEtc2() const { return gfxstream::vk::isEtc2(mCompressedFormat); }
363
isAstc() const364 bool CompressedImageInfo::isAstc() const { return gfxstream::vk::isAstc(mCompressedFormat); }
365
getOutputCreateInfo(const VkImageCreateInfo & createInfo) const366 VkImageCreateInfo CompressedImageInfo::getOutputCreateInfo(
367 const VkImageCreateInfo& createInfo) const {
368 VkImageCreateInfo result = createInfo;
369 result.format = mOutputFormat;
370
371 result.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
372 // Needed for ASTC->BC3 transcoding so that we can create a BC3 image with
373 // VK_IMAGE_USAGE_STORAGE_BIT
374 VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
375
376 if (!isCompressedFormat(mOutputFormat)) {
377 // Need to clear this flag since the application might have specified it, but it's invalid
378 // on non-compressed formats
379 result.flags &= ~VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT;
380 } else {
381 // Need to set this flag so that we can cast the output image into a non-compressed format
382 // so that the decompression shader can write to it.
383 result.flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT;
384 }
385
386 result.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
387 return result;
388 }
389
createCompressedMipmapImages(VulkanDispatch * vk,const VkImageCreateInfo & createInfo)390 void CompressedImageInfo::createCompressedMipmapImages(VulkanDispatch* vk,
391 const VkImageCreateInfo& createInfo) {
392 if (!mCompressedMipmaps.empty()) {
393 return;
394 }
395
396 VkImageCreateInfo createInfoCopy = createInfo;
397 createInfoCopy.format = mCompressedMipmapsFormat;
398 // Note: if you change the flags here, you must also change both versions of
399 // on_vkGetPhysicalDeviceImageFormatProperties in VkDecoderGlobalState
400 // TODO(gregschlom): Remove duplicated logic.
401 createInfoCopy.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
402 createInfoCopy.flags &= ~VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT;
403 createInfoCopy.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
404 createInfoCopy.mipLevels = 1;
405
406 mCompressedMipmaps.resize(mMipLevels);
407 for (uint32_t i = 0; i < mMipLevels; ++i) {
408 createInfoCopy.extent = compressedMipmapExtent(i);
409 vk->vkCreateImage(mDevice, &createInfoCopy, nullptr, &mCompressedMipmaps[i]);
410 }
411
412 // Compute the memory requirements for all the images (output image + compressed mipmaps)
413
414 vk->vkGetImageMemoryRequirements(mDevice, mOutputImage, &mMemoryRequirements);
415 checkValidAlignment(mMemoryRequirements.alignment);
416 std::vector<VkMemoryRequirements> mipmapsMemReqs(mMipLevels);
417 for (size_t i = 0; i < mMipLevels; ++i) {
418 vk->vkGetImageMemoryRequirements(mDevice, mCompressedMipmaps[i], &mipmapsMemReqs[i]);
419 checkValidAlignment(mipmapsMemReqs[i].alignment);
420 }
421
422 for (const auto& r : mipmapsMemReqs) {
423 // What we want here is the least common multiple of all the alignments. However, since
424 // alignments are always powers of 2, the lcm is simply the largest value.
425 if (r.alignment > mMemoryRequirements.alignment) {
426 mMemoryRequirements.alignment = r.alignment;
427 }
428 mMemoryRequirements.memoryTypeBits &= r.memoryTypeBits;
429 }
430
431 // At this point, we have the following:
432 // - mMemoryRequirements.size is the size of the output image
433 // - mMemoryRequirements.alignment is the least common multiple of all alignments
434 // - mMemoryRequirements.memoryTypeBits is the intersection of all the memoryTypeBits
435 // Now, compute the offsets of each mipmap image as well as the total memory size we need.
436 mMipmapOffsets.resize(mMipLevels);
437 for (size_t i = 0; i < mMipLevels; ++i) {
438 // This works because the alignment we request is the lcm of all alignments
439 mMipmapOffsets[i] =
440 nextAlignedOffset(mMemoryRequirements.size, mipmapsMemReqs[i].alignment);
441 mMemoryRequirements.size = mMipmapOffsets[i] + mipmapsMemReqs[i].size;
442 }
443 }
444
initAstcCpuDecompression(VulkanDispatch * vk,VkPhysicalDevice physicalDevice)445 void CompressedImageInfo::initAstcCpuDecompression(VulkanDispatch* vk,
446 VkPhysicalDevice physicalDevice) {
447 mAstcTexture = std::make_unique<AstcTexture>(vk, mDevice, physicalDevice, mExtent, mBlock.width,
448 mBlock.height, &AstcCpuDecompressor::get());
449 }
450
decompressIfNeeded(VulkanDispatch * vk,VkCommandBuffer commandBuffer,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,const VkImageMemoryBarrier & targetBarrier,std::vector<VkImageMemoryBarrier> & outputBarriers)451 bool CompressedImageInfo::decompressIfNeeded(VulkanDispatch* vk, VkCommandBuffer commandBuffer,
452 VkPipelineStageFlags srcStageMask,
453 VkPipelineStageFlags dstStageMask,
454 const VkImageMemoryBarrier& targetBarrier,
455 std::vector<VkImageMemoryBarrier>& outputBarriers) {
456 std::vector<VkImageMemoryBarrier> imageBarriers = getImageBarriers(targetBarrier);
457
458 if (!imageWillBecomeReadable(targetBarrier)) {
459 // We're not going to read from the image, no need to decompress it.
460 // Apply the target barrier to the compressed mipmaps and the decompressed image.
461 outputBarriers.insert(outputBarriers.end(), imageBarriers.begin(), imageBarriers.end());
462 return false;
463 }
464
465 VkResult result = initializeDecompressionPipeline(vk, mDevice);
466 if (result != VK_SUCCESS) {
467 WARN("Failed to initialize pipeline for texture decompression");
468 return false;
469 }
470
471 // Transition the layout of all the compressed mipmaps so that the shader can read from them.
472 for (auto& barrier : imageBarriers) {
473 barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
474 barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
475 }
476
477 // Transition the layout of the output image so that we can write to it.
478 imageBarriers.back().srcAccessMask = 0;
479 imageBarriers.back().oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
480 imageBarriers.back().dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
481 imageBarriers.back().newLayout = VK_IMAGE_LAYOUT_GENERAL;
482
483 // Do the layout transitions
484 vk->vkCmdPipelineBarrier(commandBuffer, srcStageMask, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
485 0, nullptr, 0, nullptr, imageBarriers.size(), imageBarriers.data());
486
487 // Run the decompression shader
488 decompress(vk, commandBuffer, getImageSubresourceRange(targetBarrier.subresourceRange));
489
490 // Finally, transition the layout of all images to match the target barrier.
491 for (auto& barrier : imageBarriers) {
492 barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
493 barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
494 barrier.dstAccessMask = targetBarrier.dstAccessMask;
495 barrier.newLayout = targetBarrier.newLayout;
496 }
497 // (adjust the last barrier since it's for the output image)
498 imageBarriers.back().srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
499
500 // Do the layout transitions
501 vk->vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dstStageMask, 0,
502 0, nullptr, 0, nullptr, imageBarriers.size(), imageBarriers.data());
503
504 return true;
505 }
506
decompressOnCpu(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkBufferImageCopy * pRegions,const VkDecoderContext & context)507 void CompressedImageInfo::decompressOnCpu(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
508 size_t astcDataSize, VkImage dstImage,
509 VkImageLayout dstImageLayout, uint32_t regionCount,
510 const VkBufferImageCopy* pRegions,
511 const VkDecoderContext& context) {
512 mAstcTexture->on_vkCmdCopyBufferToImage(commandBuffer, srcAstcData, astcDataSize, dstImage,
513 dstImageLayout, regionCount, pRegions, context);
514 }
515
decompressOnCpu(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkDecoderContext & context)516 void CompressedImageInfo::decompressOnCpu(VkCommandBuffer commandBuffer, uint8_t* srcAstcData, size_t astcDataSize,
517 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo, const VkDecoderContext& context) {
518 mAstcTexture->on_vkCmdCopyBufferToImage2(commandBuffer, srcAstcData, astcDataSize, pCopyBufferToImageInfo, context);
519 }
520
getMemoryRequirements() const521 VkMemoryRequirements CompressedImageInfo::getMemoryRequirements() const {
522 return mMemoryRequirements;
523 }
524
bindCompressedMipmapsMemory(VulkanDispatch * vk,VkDeviceMemory memory,VkDeviceSize memoryOffset)525 VkResult CompressedImageInfo::bindCompressedMipmapsMemory(VulkanDispatch* vk, VkDeviceMemory memory,
526 VkDeviceSize memoryOffset) {
527 VkResult result = VK_SUCCESS;
528 for (size_t i = 0; i < mCompressedMipmaps.size(); i++) {
529 VkResult res = vk->vkBindImageMemory(mDevice, mCompressedMipmaps[i], memory,
530 memoryOffset + mMipmapOffsets[i]);
531 if (res != VK_SUCCESS) result = res;
532 }
533 return result;
534 }
535
getBufferImageCopy(const VkBufferImageCopy & origRegion) const536 VkBufferImageCopy CompressedImageInfo::getBufferImageCopy(
537 const VkBufferImageCopy& origRegion) const {
538 VkBufferImageCopy region = origRegion;
539 uint32_t mipLevel = region.imageSubresource.mipLevel;
540 region.imageSubresource.mipLevel = 0;
541 region.bufferRowLength /= mBlock.width;
542 region.bufferImageHeight /= mBlock.height;
543 region.imageOffset.x /= mBlock.width;
544 region.imageOffset.y /= mBlock.height;
545 region.imageExtent = compressedMipmapPortion(region.imageExtent, mipLevel);
546 return region;
547 }
548
getBufferImageCopy(const VkBufferImageCopy2 & origRegion) const549 VkBufferImageCopy2 CompressedImageInfo::getBufferImageCopy(
550 const VkBufferImageCopy2& origRegion) const {
551 VkBufferImageCopy2 region = origRegion;
552 uint32_t mipLevel = region.imageSubresource.mipLevel;
553 region.imageSubresource.mipLevel = 0;
554 region.bufferRowLength /= mBlock.width;
555 region.bufferImageHeight /= mBlock.height;
556 region.imageOffset.x /= mBlock.width;
557 region.imageOffset.y /= mBlock.height;
558 region.imageExtent = compressedMipmapPortion(region.imageExtent, mipLevel);
559 return region;
560 }
561
562 // static
getCompressedMipmapsImageCopy(const VkImageCopy & origRegion,const CompressedImageInfo & srcImg,const CompressedImageInfo & dstImg,bool needEmulatedSrc,bool needEmulatedDst)563 VkImageCopy CompressedImageInfo::getCompressedMipmapsImageCopy(const VkImageCopy& origRegion,
564 const CompressedImageInfo& srcImg,
565 const CompressedImageInfo& dstImg,
566 bool needEmulatedSrc,
567 bool needEmulatedDst) {
568 VkImageCopy region = origRegion;
569 if (needEmulatedSrc) {
570 uint32_t mipLevel = region.srcSubresource.mipLevel;
571 region.srcSubresource.mipLevel = 0;
572 region.srcOffset.x /= srcImg.mBlock.width;
573 region.srcOffset.y /= srcImg.mBlock.height;
574 region.extent = srcImg.compressedMipmapPortion(region.extent, mipLevel);
575 }
576 if (needEmulatedDst) {
577 region.dstSubresource.mipLevel = 0;
578 region.dstOffset.x /= dstImg.mBlock.width;
579 region.dstOffset.y /= dstImg.mBlock.height;
580 }
581 return region;
582 }
583
getCompressedMipmapsImageCopy(const VkImageCopy2 & origRegion,const CompressedImageInfo & srcImg,const CompressedImageInfo & dstImg,bool needEmulatedSrc,bool needEmulatedDst)584 VkImageCopy2 CompressedImageInfo::getCompressedMipmapsImageCopy(const VkImageCopy2& origRegion,
585 const CompressedImageInfo& srcImg,
586 const CompressedImageInfo& dstImg,
587 bool needEmulatedSrc,
588 bool needEmulatedDst) {
589 VkImageCopy2 region = origRegion;
590 if (needEmulatedSrc) {
591 uint32_t mipLevel = region.srcSubresource.mipLevel;
592 region.srcSubresource.mipLevel = 0;
593 region.srcOffset.x /= srcImg.mBlock.width;
594 region.srcOffset.y /= srcImg.mBlock.height;
595 region.extent = srcImg.compressedMipmapPortion(region.extent, mipLevel);
596 }
597 if (needEmulatedDst) {
598 region.dstSubresource.mipLevel = 0;
599 region.dstOffset.x /= dstImg.mBlock.width;
600 region.dstOffset.y /= dstImg.mBlock.height;
601 }
602 return region;
603 }
604
destroy(VulkanDispatch * vk)605 void CompressedImageInfo::destroy(VulkanDispatch* vk) {
606 for (const auto& image : mCompressedMipmaps) {
607 vk->vkDestroyImage(mDevice, image, nullptr);
608 }
609 vk->vkDestroyDescriptorPool(mDevice, mDecompDescriptorPool, nullptr);
610 for (const auto& imageView : mCompressedMipmapsImageViews) {
611 vk->vkDestroyImageView(mDevice, imageView, nullptr);
612 }
613 for (const auto& imageView : mOutputImageViews) {
614 vk->vkDestroyImageView(mDevice, imageView, nullptr);
615 }
616 vk->vkDestroyImage(mDevice, mOutputImage, nullptr);
617 }
618
getImageBarriers(const VkImageMemoryBarrier & srcBarrier)619 std::vector<VkImageMemoryBarrier> CompressedImageInfo::getImageBarriers(
620 const VkImageMemoryBarrier& srcBarrier) {
621 const VkImageSubresourceRange range = getImageSubresourceRange(srcBarrier.subresourceRange);
622
623 std::vector<VkImageMemoryBarrier> imageBarriers;
624 imageBarriers.reserve(range.levelCount + 1);
625
626 // Add the barriers for the compressed mipmaps
627 VkImageMemoryBarrier mipmapBarrier = srcBarrier;
628 mipmapBarrier.subresourceRange.baseMipLevel = 0;
629 mipmapBarrier.subresourceRange.levelCount = 1;
630 imageBarriers.insert(imageBarriers.begin(), range.levelCount, mipmapBarrier);
631 for (uint32_t j = 0; j < range.levelCount; j++) {
632 imageBarriers[j].image = mCompressedMipmaps[range.baseMipLevel + j];
633 }
634
635 // Add a barrier for the output image
636 imageBarriers.push_back(srcBarrier);
637 imageBarriers.back().image = mOutputImage;
638
639 return imageBarriers;
640 }
641
getImageSubresourceRange(const VkImageSubresourceRange & range) const642 VkImageSubresourceRange CompressedImageInfo::getImageSubresourceRange(
643 const VkImageSubresourceRange& range) const {
644 VkImageSubresourceRange result = range;
645 if (result.levelCount == VK_REMAINING_MIP_LEVELS) {
646 result.levelCount = mMipLevels - range.baseMipLevel;
647 }
648 if (result.layerCount == VK_REMAINING_ARRAY_LAYERS) {
649 result.layerCount = mLayerCount - range.baseArrayLayer;
650 }
651 return result;
652 }
653
initializeDecompressionPipeline(VulkanDispatch * vk,VkDevice device)654 VkResult CompressedImageInfo::initializeDecompressionPipeline(VulkanDispatch* vk, VkDevice device) {
655 if (mDecompPipelineInitialized) {
656 return VK_SUCCESS;
657 }
658
659 mDecompPipeline = mPipelineManager->get(mCompressedFormat, mImageType);
660 if (mDecompPipeline == nullptr) {
661 ERR("Failed to initialize GPU decompression pipeline");
662 return VK_ERROR_INITIALIZATION_FAILED;
663 }
664
665 VkDescriptorPoolSize poolSize = {
666 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
667 .descriptorCount = 2 * mMipLevels,
668 };
669 VkDescriptorPoolCreateInfo dsPoolInfo = {
670 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
671 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
672 .maxSets = mMipLevels,
673 .poolSizeCount = 1,
674 .pPoolSizes = &poolSize,
675 };
676 VkResult result =
677 vk->vkCreateDescriptorPool(device, &dsPoolInfo, nullptr, &mDecompDescriptorPool);
678 if (result != VK_SUCCESS) {
679 ERR("GPU decompression error. vkCreateDescriptorPool failed: %d", result);
680 return result;
681 }
682
683 std::vector<VkDescriptorSetLayout> layouts(mMipLevels, mDecompPipeline->descriptorSetLayout());
684
685 VkDescriptorSetAllocateInfo dsInfo = {
686 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
687 .descriptorPool = mDecompDescriptorPool,
688 .descriptorSetCount = mMipLevels,
689 .pSetLayouts = layouts.data(),
690 };
691 mDecompDescriptorSets.resize(mMipLevels);
692 result = vk->vkAllocateDescriptorSets(device, &dsInfo, mDecompDescriptorSets.data());
693 if (result != VK_SUCCESS) {
694 ERR("GPU decompression error. vkAllocateDescriptorSets failed: %d", result);
695 return result;
696 }
697
698 VkFormat shaderFormat = getShaderFormat(mOutputFormat);
699 mCompressedMipmapsImageViews.resize(mMipLevels);
700 mOutputImageViews.resize(mMipLevels);
701
702 VkDescriptorImageInfo compressedMipmapsDescriptorImageInfo = {.imageLayout =
703 VK_IMAGE_LAYOUT_GENERAL};
704 VkDescriptorImageInfo mDecompDescriptorImageInfo = {.imageLayout = VK_IMAGE_LAYOUT_GENERAL};
705 VkWriteDescriptorSet writeDescriptorSets[2] = {
706 {
707 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
708 .dstBinding = 0,
709 .descriptorCount = 1,
710 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
711 .pImageInfo = &compressedMipmapsDescriptorImageInfo,
712 },
713 {
714 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
715 .dstBinding = 1,
716 .descriptorCount = 1,
717 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
718 .pImageInfo = &mDecompDescriptorImageInfo,
719 }};
720
721 for (uint32_t i = 0; i < mMipLevels; i++) {
722 mCompressedMipmapsImageViews[i] =
723 createDefaultImageView(vk, device, mCompressedMipmaps[i], mCompressedMipmapsFormat,
724 mImageType, 0, mLayerCount);
725 mOutputImageViews[i] = createDefaultImageView(vk, device, mOutputImage, shaderFormat,
726 mImageType, i, mLayerCount);
727 compressedMipmapsDescriptorImageInfo.imageView = mCompressedMipmapsImageViews[i];
728 mDecompDescriptorImageInfo.imageView = mOutputImageViews[i];
729 writeDescriptorSets[0].dstSet = mDecompDescriptorSets[i];
730 writeDescriptorSets[1].dstSet = mDecompDescriptorSets[i];
731 vk->vkUpdateDescriptorSets(device, 2, writeDescriptorSets, 0, nullptr);
732 }
733
734 mDecompPipelineInitialized = true;
735 return VK_SUCCESS;
736 }
737
decompress(VulkanDispatch * vk,VkCommandBuffer commandBuffer,const VkImageSubresourceRange & range)738 void CompressedImageInfo::decompress(VulkanDispatch* vk, VkCommandBuffer commandBuffer,
739 const VkImageSubresourceRange& range) {
740 vk->vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
741 mDecompPipeline->pipeline());
742 uint32_t dispatchZ = mExtent.depth == 1 ? range.layerCount : mExtent.depth;
743 bool perPixel = false; // Whether the shader operates per compressed block or per pixel
744 if (isEtc2()) {
745 const Etc2PushConstant pushConstant = {
746 .compFormat = (uint32_t)mCompressedFormat,
747 .baseLayer = mExtent.depth == 1 ? range.baseArrayLayer : 0};
748 vk->vkCmdPushConstants(commandBuffer, mDecompPipeline->pipelineLayout(),
749 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pushConstant), &pushConstant);
750 } else if (isAstc()) {
751 uint32_t smallBlock = false;
752 switch (mCompressedFormat) {
753 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
754 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
755 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
756 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
757 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
758 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
759 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
760 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
761 smallBlock = true;
762 break;
763 default:
764 break;
765 }
766 const AstcPushConstant pushConstant = {
767 .blockSize = {mBlock.width, mBlock.height},
768 .baseLayer = mExtent.depth == 1 ? range.baseArrayLayer : 0,
769 .smallBlock = smallBlock};
770 vk->vkCmdPushConstants(commandBuffer, mDecompPipeline->pipelineLayout(),
771 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pushConstant), &pushConstant);
772 // The old shader is per-block, the new shaders are per-pixel
773 perPixel = GpuDecompressionPipelineManager::astcDecoder() != AstcDecoder::Old;
774 }
775 for (uint32_t i = range.baseMipLevel; i < range.baseMipLevel + range.levelCount; i++) {
776 vk->vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
777 mDecompPipeline->pipelineLayout(), 0, 1,
778 mDecompDescriptorSets.data() + i, 0, nullptr);
779 VkExtent3D extent = perPixel ? mipmapExtent(i) : compressedMipmapExtent(i);
780 vk->vkCmdDispatch(commandBuffer, ceil_div(extent.width, 8), ceil_div(extent.height, 8),
781 dispatchZ);
782 }
783 }
784
mipmapExtent(uint32_t level) const785 VkExtent3D CompressedImageInfo::mipmapExtent(uint32_t level) const {
786 return {
787 .width = std::max<uint32_t>(mExtent.width >> level, 1),
788 .height = std::max<uint32_t>(mExtent.height >> level, 1),
789 .depth = std::max<uint32_t>(mExtent.depth >> level, 1),
790 };
791 }
792
compressedMipmapExtent(uint32_t level) const793 VkExtent3D CompressedImageInfo::compressedMipmapExtent(uint32_t level) const {
794 VkExtent3D result = mipmapExtent(level);
795 result.width = ceil_div(result.width, mBlock.width);
796 result.height = ceil_div(result.height, mBlock.height);
797 return result;
798 }
799
compressedMipmapPortion(const VkExtent3D & origExtent,uint32_t level) const800 VkExtent3D CompressedImageInfo::compressedMipmapPortion(const VkExtent3D& origExtent,
801 uint32_t level) const {
802 VkExtent3D maxExtent = compressedMipmapExtent(level);
803 return {
804 .width = std::min(ceil_div(origExtent.width, mBlock.width), maxExtent.width),
805 .height = std::min(ceil_div(origExtent.height, mBlock.height), maxExtent.height),
806 // TODO(gregschlom): this is correct for 2DArrays, but incorrect for 3D images. We should
807 // take the image type into account to do the right thing here. See also
808 // https://android-review.git.corp.google.com/c/device/generic/vulkan-cereal/+/2458549/comment/cfc7480f_912dd378/
809 .depth = origExtent.depth,
810 };
811 }
812
813 } // namespace vk
814 } // namespace gfxstream
815