1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktSparseResourcesImageSparseResidency.cpp
21 * \brief Sparse partially resident images tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferSparseBinding.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkBuilderUtil.hpp"
36 #include "vkImageUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkTypeUtil.hpp"
39
40 #include "deUniquePtr.hpp"
41 #include "deStringUtil.hpp"
42
43 #include <string>
44 #include <vector>
45
46 using namespace vk;
47
48 namespace vkt
49 {
50 namespace sparse
51 {
52 namespace
53 {
54
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)55 const std::string getCoordStr (const ImageType imageType,
56 const std::string& x,
57 const std::string& y,
58 const std::string& z)
59 {
60 switch (imageType)
61 {
62 case IMAGE_TYPE_1D:
63 case IMAGE_TYPE_BUFFER:
64 return x;
65
66 case IMAGE_TYPE_1D_ARRAY:
67 case IMAGE_TYPE_2D:
68 return "ivec2(" + x + "," + y + ")";
69
70 case IMAGE_TYPE_2D_ARRAY:
71 case IMAGE_TYPE_3D:
72 case IMAGE_TYPE_CUBE:
73 case IMAGE_TYPE_CUBE_ARRAY:
74 return "ivec3(" + x + "," + y + "," + z + ")";
75
76 default:
77 DE_ASSERT(false);
78 return "";
79 }
80 }
81
getNumUsedChannels(const tcu::TextureFormat & format)82 deUint32 getNumUsedChannels (const tcu::TextureFormat& format)
83 {
84 switch (format.order)
85 {
86 case tcu::TextureFormat::R: return 1;
87 case tcu::TextureFormat::A: return 1;
88 case tcu::TextureFormat::I: return 1;
89 case tcu::TextureFormat::L: return 1;
90 case tcu::TextureFormat::LA: return 2;
91 case tcu::TextureFormat::RG: return 2;
92 case tcu::TextureFormat::RA: return 2;
93 case tcu::TextureFormat::RGB: return 3;
94 case tcu::TextureFormat::RGBA: return 4;
95 case tcu::TextureFormat::ARGB: return 4;
96 case tcu::TextureFormat::BGR: return 3;
97 case tcu::TextureFormat::BGRA: return 4;
98 case tcu::TextureFormat::sR: return 1;
99 case tcu::TextureFormat::sRG: return 2;
100 case tcu::TextureFormat::sRGB: return 3;
101 case tcu::TextureFormat::sRGBA: return 4;
102 case tcu::TextureFormat::sBGR: return 3;
103 case tcu::TextureFormat::sBGRA: return 4;
104 case tcu::TextureFormat::D: return 1;
105 case tcu::TextureFormat::S: return 1;
106 case tcu::TextureFormat::DS: return 2;
107 default:
108 DE_ASSERT(DE_FALSE);
109 return 0;
110 }
111 }
112
alignedDivide(const VkExtent3D & extent,const VkExtent3D & divisor)113 tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
114 {
115 tcu::UVec3 result;
116
117 result.x() = extent.width / divisor.width + ((extent.width % divisor.width) ? 1u : 0u);
118 result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
119 result.z() = extent.depth / divisor.depth + ((extent.depth % divisor.depth) ? 1u : 0u);
120
121 return result;
122 }
123
computeWorkGroupSize(const tcu::UVec3 & gridSize)124 tcu::UVec3 computeWorkGroupSize (const tcu::UVec3& gridSize)
125 {
126 const deUint32 maxComputeWorkGroupInvocations = 128u;
127 const tcu::UVec3 maxComputeWorkGroupSize = tcu::UVec3(128u, 128u, 64u);
128
129 const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
130 const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations / xWorkGroupSize);
131 const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
132
133 return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
134 }
135
136 class ImageSparseResidencyCase : public TestCase
137 {
138 public:
139 ImageSparseResidencyCase (tcu::TestContext& testCtx,
140 const std::string& name,
141 const std::string& description,
142 const ImageType imageType,
143 const tcu::UVec3& imageSize,
144 const tcu::TextureFormat& format,
145 const glu::GLSLVersion glslVersion);
146
147 void initPrograms (SourceCollections& sourceCollections) const;
148 TestInstance* createInstance (Context& context) const;
149
150 private:
151 const ImageType m_imageType;
152 const tcu::UVec3 m_imageSize;
153 const tcu::TextureFormat m_format;
154 const glu::GLSLVersion m_glslVersion;
155 };
156
ImageSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const glu::GLSLVersion glslVersion)157 ImageSparseResidencyCase::ImageSparseResidencyCase (tcu::TestContext& testCtx,
158 const std::string& name,
159 const std::string& description,
160 const ImageType imageType,
161 const tcu::UVec3& imageSize,
162 const tcu::TextureFormat& format,
163 const glu::GLSLVersion glslVersion)
164 : TestCase (testCtx, name, description)
165 , m_imageType (imageType)
166 , m_imageSize (imageSize)
167 , m_format (format)
168 , m_glslVersion (glslVersion)
169 {
170 }
171
172
initPrograms(SourceCollections & sourceCollections) const173 void ImageSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
174 {
175 // Create compute program
176 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
177 const std::string imageTypeStr = getShaderImageType(m_format, m_imageType);
178 const std::string formatQualifierStr = getShaderImageFormatQualifier(m_format);
179 const std::string formatDataStr = getShaderImageDataType(m_format);
180 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
181 const tcu::UVec3 workGroupSize = computeWorkGroupSize(gridSize);
182
183 std::ostringstream src;
184 src << versionDecl << "\n"
185 << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
186 << "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
187 << "void main (void)\n"
188 << "{\n"
189 << " if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
190 << " if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
191 << " if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
192 << " {\n"
193 << " imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
194 << formatDataStr << "( int(gl_GlobalInvocationID.x) % 127, int(gl_GlobalInvocationID.y) % 127, int(gl_GlobalInvocationID.z) % 127, 1));\n"
195 << " }\n"
196 << "}\n";
197
198 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
199 }
200
201 class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
202 {
203 public:
204 ImageSparseResidencyInstance(Context& context,
205 const ImageType imageType,
206 const tcu::UVec3& imageSize,
207 const tcu::TextureFormat& format);
208
209 tcu::TestStatus iterate (void);
210
211 private:
212 const ImageType m_imageType;
213 const tcu::UVec3 m_imageSize;
214 const tcu::TextureFormat m_format;
215 };
216
ImageSparseResidencyInstance(Context & context,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format)217 ImageSparseResidencyInstance::ImageSparseResidencyInstance (Context& context,
218 const ImageType imageType,
219 const tcu::UVec3& imageSize,
220 const tcu::TextureFormat& format)
221 : SparseResourcesBaseInstance (context)
222 , m_imageType (imageType)
223 , m_imageSize (imageSize)
224 , m_format (format)
225 {
226 }
227
iterate(void)228 tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
229 {
230 const InstanceInterface& instance = m_context.getInstanceInterface();
231 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
232 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
233 const VkPhysicalDeviceFeatures deviceFeatures = getPhysicalDeviceFeatures(instance, physicalDevice);
234
235 switch (mapImageType(m_imageType))
236 {
237 case VK_IMAGE_TYPE_2D:
238 {
239 if (deviceFeatures.sparseResidencyImage2D == false)
240 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Sparse residency for 2D Image not supported");
241 }
242 break;
243 case VK_IMAGE_TYPE_3D:
244 {
245 if (deviceFeatures.sparseResidencyImage3D == false)
246 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Sparse residency for 3D Image not supported");
247
248 }
249 break;
250 default:
251 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Not supported image type");
252 };
253
254 // Check if the image format supports sparse operations
255 const std::vector<VkSparseImageFormatProperties> sparseImageFormatPropVec =
256 getPhysicalDeviceSparseImageFormatProperties(instance, physicalDevice, mapTextureFormat(m_format), mapImageType(m_imageType),
257 VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT, VK_IMAGE_TILING_OPTIMAL);
258
259 if (sparseImageFormatPropVec.size() == 0)
260 {
261 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "The image format does not support sparse operations");
262 }
263
264 const VkPhysicalDeviceProperties deviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
265
266 if (isImageSizeSupported(m_imageType, m_imageSize, deviceProperties.limits) == false)
267 {
268 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Image size not supported for device");
269 }
270
271 QueueRequirementsVec queueRequirements;
272 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
273 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
274
275 // Create logical device supporting both sparse and compute queues
276 if (!createDeviceSupportingQueues(queueRequirements))
277 {
278 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Could not create device supporting sparse and compute queue");
279 }
280
281 const VkPhysicalDeviceMemoryProperties deviceMemoryProperties = getPhysicalDeviceMemoryProperties(instance, physicalDevice);
282
283 // Create memory allocator for logical device
284 const de::UniquePtr<Allocator> allocator(new SimpleAllocator(deviceInterface, *m_logicalDevice, deviceMemoryProperties));
285
286 // Create queue supporting sparse binding operations
287 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
288
289 // Create queue supporting compute and transfer operations
290 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
291
292 VkImageCreateInfo imageCreateInfo;
293
294 imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; //VkStructureType sType;
295 imageCreateInfo.pNext = DE_NULL; //const void* pNext;
296 imageCreateInfo.flags = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT; //VkImageCreateFlags flags;
297 imageCreateInfo.imageType = mapImageType(m_imageType); //VkImageType imageType;
298 imageCreateInfo.format = mapTextureFormat(m_format); //VkFormat format;
299 imageCreateInfo.extent = makeExtent3D(getLayerSize(m_imageType, m_imageSize)); //VkExtent3D extent;
300 imageCreateInfo.mipLevels = 1u; //deUint32 mipLevels;
301 imageCreateInfo.arrayLayers = getNumLayers(m_imageType, m_imageSize); //deUint32 arrayLayers;
302 imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; //VkSampleCountFlagBits samples;
303 imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; //VkImageTiling tiling;
304 imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //VkImageLayout initialLayout;
305 imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
306 VK_IMAGE_USAGE_STORAGE_BIT; //VkImageUsageFlags usage;
307 imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; //VkSharingMode sharingMode;
308 imageCreateInfo.queueFamilyIndexCount = 0u; //deUint32 queueFamilyIndexCount;
309 imageCreateInfo.pQueueFamilyIndices = DE_NULL; //const deUint32* pQueueFamilyIndices;
310
311 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
312 {
313 imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
314 }
315
316 const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
317
318 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
319 {
320 imageCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; //VkSharingMode sharingMode;
321 imageCreateInfo.queueFamilyIndexCount = 2u; //deUint32 queueFamilyIndexCount;
322 imageCreateInfo.pQueueFamilyIndices = queueFamilyIndices; //const deUint32* pQueueFamilyIndices;
323 }
324
325 // Create sparse image
326 const Unique<VkImage> sparseImage(createImage(deviceInterface, *m_logicalDevice, &imageCreateInfo));
327
328 // Get image general memory requirements
329 const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, *m_logicalDevice, *sparseImage);
330
331 if (imageMemoryRequirements.size > deviceProperties.limits.sparseAddressSpaceSize)
332 {
333 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Required memory size for sparse resource exceeds device limits");
334 }
335
336 DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
337
338 // Get image sparse memory requirements
339 deUint32 sparseMemoryReqCount = 0;
340
341 deviceInterface.getImageSparseMemoryRequirements(*m_logicalDevice, *sparseImage, &sparseMemoryReqCount, DE_NULL);
342
343 DE_ASSERT(sparseMemoryReqCount != 0);
344
345 std::vector<VkSparseImageMemoryRequirements> sparseImageMemoryRequirements;
346 sparseImageMemoryRequirements.resize(sparseMemoryReqCount);
347
348 deviceInterface.getImageSparseMemoryRequirements(*m_logicalDevice, *sparseImage, &sparseMemoryReqCount, &sparseImageMemoryRequirements[0]);
349
350 // Make sure the image type includes color aspect
351 deUint32 colorAspectIndex = NO_MATCH_FOUND;
352
353 for (deUint32 memoryReqNdx = 0; memoryReqNdx < sparseMemoryReqCount; ++memoryReqNdx)
354 {
355 if (sparseImageMemoryRequirements[memoryReqNdx].formatProperties.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
356 {
357 colorAspectIndex = memoryReqNdx;
358 break;
359 }
360 }
361
362 if (colorAspectIndex == NO_MATCH_FOUND)
363 {
364 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
365 }
366
367 const VkSparseImageMemoryRequirements aspectRequirements = sparseImageMemoryRequirements[colorAspectIndex];
368 const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
369 const VkExtent3D imageGranularity = aspectRequirements.formatProperties.imageGranularity;
370
371 DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
372
373 typedef de::SharedPtr< Unique<VkDeviceMemory> > DeviceMemoryUniquePtr;
374
375 std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
376 std::vector<VkSparseMemoryBind> imageMipTailMemoryBinds;
377 std::vector<DeviceMemoryUniquePtr> deviceMemUniquePtrVec;
378 const deUint32 memoryType = findMatchingMemoryType(deviceMemoryProperties, imageMemoryRequirements, MemoryRequirement::Any);
379
380 if (memoryType == NO_MATCH_FOUND)
381 {
382 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "No matching memory type found");
383 }
384
385 // Bind device memory for each aspect
386 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
387 {
388 for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
389 {
390 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, mipLevelNdx);
391 const tcu::UVec3 numSparseBinds = alignedDivide(mipExtent, imageGranularity);
392 const tcu::UVec3 lastBlockExtent = tcu::UVec3(mipExtent.width % imageGranularity.width ? mipExtent.width % imageGranularity.width : imageGranularity.width,
393 mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
394 mipExtent.depth % imageGranularity.depth ? mipExtent.depth % imageGranularity.depth : imageGranularity.depth );
395
396 for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
397 for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
398 for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
399 {
400 const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
401
402 if (linearIndex % 2 == 1)
403 {
404 continue;
405 }
406
407 const VkMemoryAllocateInfo allocInfo =
408 {
409 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
410 DE_NULL, // const void* pNext;
411 imageMemoryRequirements.alignment, // VkDeviceSize allocationSize;
412 memoryType, // deUint32 memoryTypeIndex;
413 };
414
415 VkDeviceMemory deviceMemory = 0;
416 VK_CHECK(deviceInterface.allocateMemory(*m_logicalDevice, &allocInfo, DE_NULL, &deviceMemory));
417
418 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(deviceMemory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
419
420 VkOffset3D offset;
421 offset.x = x*imageGranularity.width;
422 offset.y = y*imageGranularity.height;
423 offset.z = z*imageGranularity.depth;
424
425 VkExtent3D extent;
426 extent.width = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
427 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
428 extent.depth = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
429
430 VkSparseImageMemoryBind imageMemoryBind;
431 imageMemoryBind.subresource.aspectMask = aspectMask;
432 imageMemoryBind.subresource.mipLevel = mipLevelNdx;
433 imageMemoryBind.subresource.arrayLayer = layerNdx;
434 imageMemoryBind.memory = deviceMemory;
435 imageMemoryBind.memoryOffset = 0u;
436 imageMemoryBind.flags = 0u;
437 imageMemoryBind.offset = offset;
438 imageMemoryBind.extent = extent;
439
440 imageResidencyMemoryBinds.push_back(imageMemoryBind);
441 }
442 }
443
444 if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
445 {
446 const VkMemoryAllocateInfo allocInfo =
447 {
448 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
449 DE_NULL, // const void* pNext;
450 aspectRequirements.imageMipTailSize, // VkDeviceSize allocationSize;
451 memoryType, // deUint32 memoryTypeIndex;
452 };
453
454 VkDeviceMemory deviceMemory = 0;
455 VK_CHECK(deviceInterface.allocateMemory(*m_logicalDevice, &allocInfo, DE_NULL, &deviceMemory));
456
457 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(deviceMemory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
458
459 VkSparseMemoryBind imageMipTailMemoryBind;
460
461 imageMipTailMemoryBind.resourceOffset = aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride;
462 imageMipTailMemoryBind.size = aspectRequirements.imageMipTailSize;
463 imageMipTailMemoryBind.memory = deviceMemory;
464 imageMipTailMemoryBind.memoryOffset = 0u;
465 imageMipTailMemoryBind.flags = 0u;
466
467 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
468 }
469 }
470
471 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
472 {
473 const VkMemoryAllocateInfo allocInfo =
474 {
475 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
476 DE_NULL, // const void* pNext;
477 aspectRequirements.imageMipTailSize, // VkDeviceSize allocationSize;
478 memoryType, // deUint32 memoryTypeIndex;
479 };
480
481 VkDeviceMemory deviceMemory = 0;
482 VK_CHECK(deviceInterface.allocateMemory(*m_logicalDevice, &allocInfo, DE_NULL, &deviceMemory));
483
484 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(deviceMemory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
485
486 VkSparseMemoryBind imageMipTailMemoryBind;
487
488 imageMipTailMemoryBind.resourceOffset = aspectRequirements.imageMipTailOffset;
489 imageMipTailMemoryBind.size = aspectRequirements.imageMipTailSize;
490 imageMipTailMemoryBind.memory = deviceMemory;
491 imageMipTailMemoryBind.memoryOffset = 0u;
492 imageMipTailMemoryBind.flags = 0u;
493
494 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
495 }
496
497 const Unique<VkSemaphore> imageMemoryBindSemaphore(makeSemaphore(deviceInterface, *m_logicalDevice));
498
499 VkBindSparseInfo bindSparseInfo =
500 {
501 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
502 DE_NULL, //const void* pNext;
503 0u, //deUint32 waitSemaphoreCount;
504 DE_NULL, //const VkSemaphore* pWaitSemaphores;
505 0u, //deUint32 bufferBindCount;
506 DE_NULL, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
507 0u, //deUint32 imageOpaqueBindCount;
508 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
509 0u, //deUint32 imageBindCount;
510 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
511 1u, //deUint32 signalSemaphoreCount;
512 &imageMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
513 };
514
515 VkSparseImageMemoryBindInfo imageResidencyBindInfo;
516 VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
517
518 if (imageResidencyMemoryBinds.size() > 0)
519 {
520 imageResidencyBindInfo.image = *sparseImage;
521 imageResidencyBindInfo.bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
522 imageResidencyBindInfo.pBinds = &imageResidencyMemoryBinds[0];
523
524 bindSparseInfo.imageBindCount = 1u;
525 bindSparseInfo.pImageBinds = &imageResidencyBindInfo;
526 }
527
528 if (imageMipTailMemoryBinds.size() > 0)
529 {
530 imageMipTailBindInfo.image = *sparseImage;
531 imageMipTailBindInfo.bindCount = static_cast<deUint32>(imageMipTailMemoryBinds.size());
532 imageMipTailBindInfo.pBinds = &imageMipTailMemoryBinds[0];
533
534 bindSparseInfo.imageOpaqueBindCount = 1u;
535 bindSparseInfo.pImageOpaqueBinds = &imageMipTailBindInfo;
536 }
537
538 // Submit sparse bind commands for execution
539 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
540
541 // Create command buffer for compute and transfer oparations
542 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, *m_logicalDevice, computeQueue.queueFamilyIndex));
543 const Unique<VkCommandBuffer> commandBuffer(makeCommandBuffer(deviceInterface, *m_logicalDevice, *commandPool));
544
545 // Start recording commands
546 beginCommandBuffer(deviceInterface, *commandBuffer);
547
548 // Create descriptor set layout
549 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
550 DescriptorSetLayoutBuilder()
551 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
552 .build(deviceInterface, *m_logicalDevice));
553
554 // Create and bind compute pipeline
555 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, *m_logicalDevice, m_context.getBinaryCollection().get("comp"), DE_NULL));
556 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, *m_logicalDevice, *descriptorSetLayout));
557 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, *m_logicalDevice, *pipelineLayout, *shaderModule));
558
559 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
560
561 // Create and bind descriptor set
562 const Unique<VkDescriptorPool> descriptorPool(
563 DescriptorPoolBuilder()
564 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
565 .build(deviceInterface, *m_logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
566
567 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, *m_logicalDevice, *descriptorPool, *descriptorSetLayout));
568
569 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
570 const Unique<VkImageView> imageView(makeImageView(deviceInterface, *m_logicalDevice, *sparseImage, mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange));
571 const VkDescriptorImageInfo sparseImageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
572
573 DescriptorSetUpdateBuilder()
574 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
575 .update(deviceInterface, *m_logicalDevice);
576
577 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
578
579 const VkImageMemoryBarrier sparseImageLayoutChangeBarrier
580 = makeImageMemoryBarrier(
581 0u, VK_ACCESS_SHADER_WRITE_BIT,
582 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
583 *sparseImage, subresourceRange);
584
585 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageLayoutChangeBarrier);
586
587 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
588 const tcu::UVec3 workGroupSize = computeWorkGroupSize(gridSize);
589
590 const deUint32 xWorkGroupCount = gridSize.x() / workGroupSize.x() + (gridSize.x() % workGroupSize.x() ? 1u : 0u);
591 const deUint32 yWorkGroupCount = gridSize.y() / workGroupSize.y() + (gridSize.y() % workGroupSize.y() ? 1u : 0u);
592 const deUint32 zWorkGroupCount = gridSize.z() / workGroupSize.z() + (gridSize.z() % workGroupSize.z() ? 1u : 0u);
593
594 const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
595
596 if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
597 maxComputeWorkGroupCount.y() < yWorkGroupCount ||
598 maxComputeWorkGroupCount.z() < zWorkGroupCount)
599 {
600 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Image size is not supported");
601 }
602
603 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
604
605 const VkImageMemoryBarrier sparseImageTrasferBarrier
606 = makeImageMemoryBarrier(
607 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
608 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
609 *sparseImage, subresourceRange);
610
611 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageTrasferBarrier);
612
613 const deUint32 imageSizeInBytes = getNumPixels(m_imageType, m_imageSize) * tcu::getPixelSize(m_format);
614 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
615
616 const de::UniquePtr<Buffer> outputBuffer(new Buffer(deviceInterface, *m_logicalDevice, *allocator, outputBufferCreateInfo, MemoryRequirement::HostVisible));
617
618 const VkBufferImageCopy bufferImageCopy = makeBufferImageCopy(imageCreateInfo.extent, imageCreateInfo.arrayLayers);
619
620 deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *sparseImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outputBuffer->get(), 1u, &bufferImageCopy);
621
622 const VkBufferMemoryBarrier outputBufferHostReadBarrier
623 = makeBufferMemoryBarrier(
624 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
625 outputBuffer->get(), 0u, imageSizeInBytes);
626
627 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
628
629 // End recording commands
630 endCommandBuffer(deviceInterface, *commandBuffer);
631
632 // The stage at which execution is going to wait for finish of sparse binding operations
633 const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
634
635 // Submit commands for execution and wait for completion
636 submitCommandsAndWait(deviceInterface, *m_logicalDevice, computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits);
637
638 // Retrieve data from buffer to host memory
639 const Allocation& allocation = outputBuffer->getAllocation();
640
641 invalidateMappedMemoryRange(deviceInterface, *m_logicalDevice, allocation.getMemory(), allocation.getOffset(), imageSizeInBytes);
642
643 const deUint8* outputData = static_cast<const deUint8*>(allocation.getHostPtr());
644 tcu::TestStatus testStatus = tcu::TestStatus::pass("Passed");
645
646 const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData);
647
648 // Validate results
649 if( aspectRequirements.imageMipTailFirstLod > 0u )
650 {
651 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
652 const tcu::UVec3 numSparseBinds = alignedDivide(mipExtent, imageGranularity);
653 const tcu::UVec3 lastBlockExtent = tcu::UVec3( mipExtent.width % imageGranularity.width ? mipExtent.width % imageGranularity.width : imageGranularity.width,
654 mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
655 mipExtent.depth % imageGranularity.depth ? mipExtent.depth % imageGranularity.depth : imageGranularity.depth);
656
657 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
658 {
659 for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
660 for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
661 for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
662 {
663 VkExtent3D offset;
664 offset.width = x*imageGranularity.width;
665 offset.height = y*imageGranularity.height;
666 offset.depth = z*imageGranularity.depth + layerNdx*numSparseBinds.z()*imageGranularity.depth;
667
668 VkExtent3D extent;
669 extent.width = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
670 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
671 extent.depth = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
672
673 const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
674
675 if (linearIndex % 2 == 0)
676 {
677 for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
678 for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
679 for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
680 {
681 const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
682 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
683
684 if (memcmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format)))
685 {
686 testStatus = tcu::TestStatus::fail("Failed");
687 goto verificationFinished;
688 }
689 }
690 }
691 else
692 {
693 if (deviceProperties.sparseProperties.residencyNonResidentStrict)
694 {
695 for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
696 for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
697 for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
698 {
699 const tcu::UVec4 referenceValue = tcu::UVec4(0u, 0u, 0u, 0u);
700 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
701
702 if (memcmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format)))
703 {
704 testStatus = tcu::TestStatus::fail("Failed");
705 goto verificationFinished;
706 }
707 }
708 }
709 }
710 }
711 }
712 }
713 else
714 {
715 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
716
717 for (deUint32 offsetZ = 0u; offsetZ < mipExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
718 for (deUint32 offsetY = 0u; offsetY < mipExtent.height; ++offsetY)
719 for (deUint32 offsetX = 0u; offsetX < mipExtent.width; ++offsetX)
720 {
721 const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
722 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
723
724 if (memcmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format)))
725 {
726 testStatus = tcu::TestStatus::fail("Failed");
727 goto verificationFinished;
728 }
729 }
730 }
731
732 verificationFinished:
733
734 // Wait for sparse queue to become idle
735 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
736
737 return testStatus;
738 }
739
createInstance(Context & context) const740 TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
741 {
742 return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format);
743 }
744
745 } // anonymous ns
746
createImageSparseResidencyTests(tcu::TestContext & testCtx)747 tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
748 {
749 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency", "Buffer Sparse Residency"));
750
751 static const deUint32 sizeCountPerImageType = 3u;
752
753 struct ImageParameters
754 {
755 ImageType imageType;
756 tcu::UVec3 imageSizes[sizeCountPerImageType];
757 };
758
759 static const ImageParameters imageParametersArray[] =
760 {
761 { IMAGE_TYPE_2D, { tcu::UVec3(512u, 256u, 1u), tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u, 137u, 1u) } },
762 { IMAGE_TYPE_2D_ARRAY, { tcu::UVec3(512u, 256u, 6u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u) } },
763 { IMAGE_TYPE_CUBE, { tcu::UVec3(512u, 256u, 1u), tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u, 137u, 1u) } },
764 { IMAGE_TYPE_CUBE_ARRAY, { tcu::UVec3(512u, 256u, 6u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u) } },
765 { IMAGE_TYPE_3D, { tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u) } }
766 };
767
768 static const tcu::TextureFormat formats[] =
769 {
770 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
771 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT16),
772 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT8),
773 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT32),
774 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT16),
775 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT8),
776 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
777 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
778 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
779 };
780
781 for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
782 {
783 const ImageType imageType = imageParametersArray[imageTypeNdx].imageType;
784 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
785
786 for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
787 {
788 const tcu::TextureFormat& format = formats[formatNdx];
789 de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
790
791 for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
792 {
793 const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
794
795 std::ostringstream stream;
796 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
797
798 formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440));
799 }
800 imageTypeGroup->addChild(formatGroup.release());
801 }
802 testGroup->addChild(imageTypeGroup.release());
803 }
804
805 return testGroup.release();
806 }
807
808 } // sparse
809 } // vkt
810