1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktSparseResourcesImageSparseResidency.cpp
21 * \brief Sparse partially resident images tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferSparseBinding.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkImageUtil.hpp"
38 #include "vkQueryUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41
42 #include "deUniquePtr.hpp"
43 #include "deStringUtil.hpp"
44
45 #include <string>
46 #include <vector>
47
48 using namespace vk;
49
50 namespace vkt
51 {
52 namespace sparse
53 {
54 namespace
55 {
56
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)57 const std::string getCoordStr (const ImageType imageType,
58 const std::string& x,
59 const std::string& y,
60 const std::string& z)
61 {
62 switch (imageType)
63 {
64 case IMAGE_TYPE_1D:
65 case IMAGE_TYPE_BUFFER:
66 return x;
67
68 case IMAGE_TYPE_1D_ARRAY:
69 case IMAGE_TYPE_2D:
70 return "ivec2(" + x + "," + y + ")";
71
72 case IMAGE_TYPE_2D_ARRAY:
73 case IMAGE_TYPE_3D:
74 case IMAGE_TYPE_CUBE:
75 case IMAGE_TYPE_CUBE_ARRAY:
76 return "ivec3(" + x + "," + y + "," + z + ")";
77
78 default:
79 DE_ASSERT(false);
80 return "";
81 }
82 }
83
computeWorkGroupSize(const tcu::UVec3 & gridSize)84 tcu::UVec3 computeWorkGroupSize (const tcu::UVec3& gridSize)
85 {
86 const deUint32 maxComputeWorkGroupInvocations = 128u;
87 const tcu::UVec3 maxComputeWorkGroupSize = tcu::UVec3(128u, 128u, 64u);
88
89 const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
90 const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations / xWorkGroupSize);
91 const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
92
93 return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
94 }
95
96 class ImageSparseResidencyCase : public TestCase
97 {
98 public:
99 ImageSparseResidencyCase (tcu::TestContext& testCtx,
100 const std::string& name,
101 const std::string& description,
102 const ImageType imageType,
103 const tcu::UVec3& imageSize,
104 const tcu::TextureFormat& format,
105 const glu::GLSLVersion glslVersion,
106 const bool useDeviceGroups);
107
108 void initPrograms (SourceCollections& sourceCollections) const;
109 TestInstance* createInstance (Context& context) const;
110
111 private:
112 const bool m_useDeviceGroups;
113 const ImageType m_imageType;
114 const tcu::UVec3 m_imageSize;
115 const tcu::TextureFormat m_format;
116 const glu::GLSLVersion m_glslVersion;
117 };
118
ImageSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)119 ImageSparseResidencyCase::ImageSparseResidencyCase (tcu::TestContext& testCtx,
120 const std::string& name,
121 const std::string& description,
122 const ImageType imageType,
123 const tcu::UVec3& imageSize,
124 const tcu::TextureFormat& format,
125 const glu::GLSLVersion glslVersion,
126 const bool useDeviceGroups)
127 : TestCase (testCtx, name, description)
128 , m_useDeviceGroups (useDeviceGroups)
129 , m_imageType (imageType)
130 , m_imageSize (imageSize)
131 , m_format (format)
132 , m_glslVersion (glslVersion)
133 {
134 }
135
initPrograms(SourceCollections & sourceCollections) const136 void ImageSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
137 {
138 // Create compute program
139 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
140 const std::string imageTypeStr = getShaderImageType(m_format, m_imageType);
141 const std::string formatQualifierStr = getShaderImageFormatQualifier(m_format);
142 const std::string formatDataStr = getShaderImageDataType(m_format);
143 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
144 const tcu::UVec3 workGroupSize = computeWorkGroupSize(gridSize);
145
146 std::ostringstream src;
147 src << versionDecl << "\n"
148 << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
149 << "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
150 << "void main (void)\n"
151 << "{\n"
152 << " if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
153 << " if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
154 << " if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
155 << " {\n"
156 << " imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
157 << formatDataStr << "( int(gl_GlobalInvocationID.x) % 127, int(gl_GlobalInvocationID.y) % 127, int(gl_GlobalInvocationID.z) % 127, 1));\n"
158 << " }\n"
159 << "}\n";
160
161 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
162 }
163
164 class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
165 {
166 public:
167 ImageSparseResidencyInstance(Context& context,
168 const ImageType imageType,
169 const tcu::UVec3& imageSize,
170 const tcu::TextureFormat& format,
171 const bool useDeviceGroups);
172
173
174 tcu::TestStatus iterate (void);
175
176 private:
177 const bool m_useDeviceGroups;
178 const ImageType m_imageType;
179 const tcu::UVec3 m_imageSize;
180 const tcu::TextureFormat m_format;
181 };
182
ImageSparseResidencyInstance(Context & context,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const bool useDeviceGroups)183 ImageSparseResidencyInstance::ImageSparseResidencyInstance (Context& context,
184 const ImageType imageType,
185 const tcu::UVec3& imageSize,
186 const tcu::TextureFormat& format,
187 const bool useDeviceGroups)
188 : SparseResourcesBaseInstance (context, useDeviceGroups)
189 , m_useDeviceGroups (useDeviceGroups)
190 , m_imageType (imageType)
191 , m_imageSize (imageSize)
192 , m_format (format)
193 {
194 }
195
iterate(void)196 tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
197 {
198 const InstanceInterface& instance = m_context.getInstanceInterface();
199
200 {
201 // Create logical device supporting both sparse and compute queues
202 QueueRequirementsVec queueRequirements;
203 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
204 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
205
206 createDeviceSupportingQueues(queueRequirements);
207 }
208
209 VkImageCreateInfo imageCreateInfo;
210 VkSparseImageMemoryRequirements aspectRequirements;
211 VkExtent3D imageGranularity;
212 std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
213
214 const DeviceInterface& deviceInterface = getDeviceInterface();
215 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
216 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
217
218 // Go through all physical devices
219 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
220 {
221 const deUint32 firstDeviceID = physDevID;
222 const deUint32 secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
223
224 const VkPhysicalDevice physicalDevice = getPhysicalDevice(firstDeviceID);
225 const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
226
227 // Check if image size does not exceed device limits
228 if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
229 TCU_THROW(NotSupportedError, "Image size not supported for device");
230
231 // Check if device supports sparse operations for image type
232 if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
233 TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
234
235 imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
236 imageCreateInfo.pNext = DE_NULL;
237 imageCreateInfo.flags = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
238 imageCreateInfo.imageType = mapImageType(m_imageType);
239 imageCreateInfo.format = mapTextureFormat(m_format);
240 imageCreateInfo.extent = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
241 imageCreateInfo.mipLevels = 1u;
242 imageCreateInfo.arrayLayers = getNumLayers(m_imageType, m_imageSize);
243 imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
244 imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
245 imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
246 imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
247 VK_IMAGE_USAGE_STORAGE_BIT;
248 imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
249 imageCreateInfo.queueFamilyIndexCount = 0u;
250 imageCreateInfo.pQueueFamilyIndices = DE_NULL;
251
252 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
253 {
254 imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
255 }
256
257 // Check if device supports sparse operations for image format
258 if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
259 TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
260
261 // Create sparse image
262 const Unique<VkImage> sparseImage(createImage(deviceInterface, getDevice(), &imageCreateInfo));
263
264 // Create sparse image memory bind semaphore
265 const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
266
267 {
268 // Get image general memory requirements
269 const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
270
271 if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
272 TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
273
274 DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
275
276 // Get sparse image sparse memory requirements
277 const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
278
279 DE_ASSERT(sparseMemoryRequirements.size() != 0);
280
281 const deUint32 colorAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
282 const deUint32 metadataAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_METADATA_BIT);
283
284 if (colorAspectIndex == NO_MATCH_FOUND)
285 TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
286
287 aspectRequirements = sparseMemoryRequirements[colorAspectIndex];
288 imageGranularity = aspectRequirements.formatProperties.imageGranularity;
289
290 const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
291
292 DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
293
294 std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
295 std::vector<VkSparseMemoryBind> imageMipTailMemoryBinds;
296
297 const deUint32 memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID), imageMemoryRequirements, MemoryRequirement::Any);
298
299 if (memoryType == NO_MATCH_FOUND)
300 return tcu::TestStatus::fail("No matching memory type found");
301
302 if (firstDeviceID != secondDeviceID)
303 {
304 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
305 const deUint32 heapIndex = getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
306 deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
307
308 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT) == 0) ||
309 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
310 {
311 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
312 }
313 }
314
315 // Bind device memory for each aspect
316 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
317 {
318 for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
319 {
320 const VkImageSubresource subresource = { aspectMask, mipLevelNdx, layerNdx };
321 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, mipLevelNdx);
322 const tcu::UVec3 numSparseBinds = alignedDivide(mipExtent, imageGranularity);
323 const tcu::UVec3 lastBlockExtent = tcu::UVec3(mipExtent.width % imageGranularity.width ? mipExtent.width % imageGranularity.width : imageGranularity.width,
324 mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
325 mipExtent.depth % imageGranularity.depth ? mipExtent.depth % imageGranularity.depth : imageGranularity.depth);
326 for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
327 for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
328 for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
329 {
330 const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
331
332 if (linearIndex % 2u == 1u)
333 {
334 continue;
335 }
336
337 VkOffset3D offset;
338 offset.x = x*imageGranularity.width;
339 offset.y = y*imageGranularity.height;
340 offset.z = z*imageGranularity.depth;
341
342 VkExtent3D extent;
343 extent.width = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
344 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
345 extent.depth = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
346
347 const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
348 imageMemoryRequirements.alignment, memoryType, subresource, offset, extent);
349
350 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
351
352 imageResidencyMemoryBinds.push_back(imageMemoryBind);
353 }
354 }
355
356 if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
357 {
358 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
359 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
360
361 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
362
363 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
364 }
365
366 // Metadata
367 if (metadataAspectIndex != NO_MATCH_FOUND)
368 {
369 const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
370
371 if (!(metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
372 {
373 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
374 metadataAspectRequirements.imageMipTailSize, memoryType,
375 metadataAspectRequirements.imageMipTailOffset + layerNdx * metadataAspectRequirements.imageMipTailStride,
376 VK_SPARSE_MEMORY_BIND_METADATA_BIT);
377
378 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
379
380 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
381 }
382 }
383 }
384
385 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
386 {
387 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
388 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
389
390 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
391
392 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
393 }
394
395 // Metadata
396 if (metadataAspectIndex != NO_MATCH_FOUND)
397 {
398 const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
399
400 if ((metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
401 {
402 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
403 metadataAspectRequirements.imageMipTailSize, memoryType, metadataAspectRequirements.imageMipTailOffset,
404 VK_SPARSE_MEMORY_BIND_METADATA_BIT);
405
406 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
407
408 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
409 }
410 }
411
412 const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
413 {
414 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR, //VkStructureType sType;
415 DE_NULL, //const void* pNext;
416 firstDeviceID, //deUint32 resourceDeviceIndex;
417 secondDeviceID, //deUint32 memoryDeviceIndex;
418 };
419
420 VkBindSparseInfo bindSparseInfo =
421 {
422 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
423 m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, //const void* pNext;
424 0u, //deUint32 waitSemaphoreCount;
425 DE_NULL, //const VkSemaphore* pWaitSemaphores;
426 0u, //deUint32 bufferBindCount;
427 DE_NULL, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
428 0u, //deUint32 imageOpaqueBindCount;
429 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
430 0u, //deUint32 imageBindCount;
431 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
432 1u, //deUint32 signalSemaphoreCount;
433 &imageMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
434 };
435
436 VkSparseImageMemoryBindInfo imageResidencyBindInfo;
437 VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
438
439 if (imageResidencyMemoryBinds.size() > 0)
440 {
441 imageResidencyBindInfo.image = *sparseImage;
442 imageResidencyBindInfo.bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
443 imageResidencyBindInfo.pBinds = &imageResidencyMemoryBinds[0];
444
445 bindSparseInfo.imageBindCount = 1u;
446 bindSparseInfo.pImageBinds = &imageResidencyBindInfo;
447 }
448
449 if (imageMipTailMemoryBinds.size() > 0)
450 {
451 imageMipTailBindInfo.image = *sparseImage;
452 imageMipTailBindInfo.bindCount = static_cast<deUint32>(imageMipTailMemoryBinds.size());
453 imageMipTailBindInfo.pBinds = &imageMipTailMemoryBinds[0];
454
455 bindSparseInfo.imageOpaqueBindCount = 1u;
456 bindSparseInfo.pImageOpaqueBinds = &imageMipTailBindInfo;
457 }
458
459 // Submit sparse bind commands for execution
460 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
461 }
462
463 // Create command buffer for compute and transfer oparations
464 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
465 const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
466
467 // Start recording commands
468 beginCommandBuffer(deviceInterface, *commandBuffer);
469
470 // Create descriptor set layout
471 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
472 DescriptorSetLayoutBuilder()
473 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
474 .build(deviceInterface, getDevice()));
475
476 // Create and bind compute pipeline
477 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
478 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
479 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
480
481 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
482
483 // Create and bind descriptor set
484 const Unique<VkDescriptorPool> descriptorPool(
485 DescriptorPoolBuilder()
486 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
487 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
488
489 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
490
491 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
492 const Unique<VkImageView> imageView(makeImageView(deviceInterface, getDevice(), *sparseImage, mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange));
493 const VkDescriptorImageInfo sparseImageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
494
495 DescriptorSetUpdateBuilder()
496 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
497 .update(deviceInterface, getDevice());
498
499 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
500
501 {
502 const VkImageMemoryBarrier sparseImageLayoutChangeBarrier = makeImageMemoryBarrier
503 (
504 0u,
505 VK_ACCESS_SHADER_WRITE_BIT,
506 VK_IMAGE_LAYOUT_UNDEFINED,
507 VK_IMAGE_LAYOUT_GENERAL,
508 *sparseImage,
509 subresourceRange,
510 sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
511 sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED
512 );
513
514 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageLayoutChangeBarrier);
515 }
516
517 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
518
519 {
520 const tcu::UVec3 workGroupSize = computeWorkGroupSize(gridSize);
521
522 const deUint32 xWorkGroupCount = gridSize.x() / workGroupSize.x() + (gridSize.x() % workGroupSize.x() ? 1u : 0u);
523 const deUint32 yWorkGroupCount = gridSize.y() / workGroupSize.y() + (gridSize.y() % workGroupSize.y() ? 1u : 0u);
524 const deUint32 zWorkGroupCount = gridSize.z() / workGroupSize.z() + (gridSize.z() % workGroupSize.z() ? 1u : 0u);
525
526 const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
527
528 if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
529 maxComputeWorkGroupCount.y() < yWorkGroupCount ||
530 maxComputeWorkGroupCount.z() < zWorkGroupCount)
531 {
532 TCU_THROW(NotSupportedError, "Image size is not supported");
533 }
534
535 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
536 }
537
538 {
539 const VkImageMemoryBarrier sparseImageTrasferBarrier = makeImageMemoryBarrier
540 (
541 VK_ACCESS_SHADER_WRITE_BIT,
542 VK_ACCESS_TRANSFER_READ_BIT,
543 VK_IMAGE_LAYOUT_GENERAL,
544 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
545 *sparseImage,
546 subresourceRange
547 );
548
549 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageTrasferBarrier);
550 }
551
552 const deUint32 imageSizeInBytes = getNumPixels(m_imageType, m_imageSize) * tcu::getPixelSize(m_format);
553 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
554 const Unique<VkBuffer> outputBuffer (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
555 const de::UniquePtr<Allocation> outputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
556
557 {
558 const VkBufferImageCopy bufferImageCopy = makeBufferImageCopy(imageCreateInfo.extent, imageCreateInfo.arrayLayers);
559
560 deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *sparseImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &bufferImageCopy);
561 }
562
563 {
564 const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier
565 (
566 VK_ACCESS_TRANSFER_WRITE_BIT,
567 VK_ACCESS_HOST_READ_BIT,
568 *outputBuffer,
569 0u,
570 imageSizeInBytes
571 );
572
573 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
574 }
575
576 // End recording commands
577 endCommandBuffer(deviceInterface, *commandBuffer);
578
579 // The stage at which execution is going to wait for finish of sparse binding operations
580 const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
581
582 // Submit commands for execution and wait for completion
583 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits,
584 0, DE_NULL, m_useDeviceGroups, firstDeviceID);
585
586 // Retrieve data from buffer to host memory
587 invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
588
589 const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
590 const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData);
591
592 // Wait for sparse queue to become idle
593 //vsk fails:
594 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
595
596 // Validate results
597 if( aspectRequirements.imageMipTailFirstLod > 0u )
598 {
599 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
600 const tcu::UVec3 numSparseBinds = alignedDivide(mipExtent, imageGranularity);
601 const tcu::UVec3 lastBlockExtent = tcu::UVec3( mipExtent.width % imageGranularity.width ? mipExtent.width % imageGranularity.width : imageGranularity.width,
602 mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
603 mipExtent.depth % imageGranularity.depth ? mipExtent.depth % imageGranularity.depth : imageGranularity.depth);
604
605 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
606 {
607 for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
608 for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
609 for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
610 {
611 VkExtent3D offset;
612 offset.width = x*imageGranularity.width;
613 offset.height = y*imageGranularity.height;
614 offset.depth = z*imageGranularity.depth + layerNdx*numSparseBinds.z()*imageGranularity.depth;
615
616 VkExtent3D extent;
617 extent.width = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
618 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
619 extent.depth = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
620
621 const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
622
623 if (linearIndex % 2u == 0u)
624 {
625 for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
626 for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
627 for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
628 {
629 const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
630 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
631
632 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
633 return tcu::TestStatus::fail("Failed");
634 }
635 }
636 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
637 {
638 for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
639 for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
640 for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
641 {
642 const tcu::UVec4 referenceValue = tcu::UVec4(0u, 0u, 0u, 0u);
643 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
644
645 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
646 return tcu::TestStatus::fail("Failed");
647 }
648 }
649 }
650 }
651 }
652 else
653 {
654 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
655
656 for (deUint32 offsetZ = 0u; offsetZ < mipExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
657 for (deUint32 offsetY = 0u; offsetY < mipExtent.height; ++offsetY)
658 for (deUint32 offsetX = 0u; offsetX < mipExtent.width; ++offsetX)
659 {
660 const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
661 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
662
663 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
664 return tcu::TestStatus::fail("Failed");
665 }
666 }
667 }
668
669 return tcu::TestStatus::pass("Passed");
670 }
671
createInstance(Context & context) const672 TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
673 {
674 return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format, m_useDeviceGroups);
675 }
676
677 } // anonymous ns
678
createImageSparseResidencyTestsCommon(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> testGroup,const bool useDeviceGroup=false)679 tcu::TestCaseGroup* createImageSparseResidencyTestsCommon (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup> testGroup, const bool useDeviceGroup = false)
680 {
681 static const deUint32 sizeCountPerImageType = 3u;
682
683 struct ImageParameters
684 {
685 ImageType imageType;
686 tcu::UVec3 imageSizes[sizeCountPerImageType];
687 };
688
689 static const ImageParameters imageParametersArray[] =
690 {
691 { IMAGE_TYPE_2D, { tcu::UVec3(512u, 256u, 1u), tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u, 137u, 1u) } },
692 { IMAGE_TYPE_2D_ARRAY, { tcu::UVec3(512u, 256u, 6u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u) } },
693 { IMAGE_TYPE_CUBE, { tcu::UVec3(256u, 256u, 1u), tcu::UVec3(128u, 128u, 1u), tcu::UVec3(137u, 137u, 1u) } },
694 { IMAGE_TYPE_CUBE_ARRAY, { tcu::UVec3(256u, 256u, 6u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(137u, 137u, 3u) } },
695 { IMAGE_TYPE_3D, { tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u) } }
696 };
697
698 static const tcu::TextureFormat formats[] =
699 {
700 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
701 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT16),
702 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT8),
703 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT32),
704 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT16),
705 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT8),
706 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
707 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
708 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
709 };
710
711 for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
712 {
713 const ImageType imageType = imageParametersArray[imageTypeNdx].imageType;
714 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
715
716 for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
717 {
718 const tcu::TextureFormat& format = formats[formatNdx];
719 de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
720
721 for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
722 {
723 const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
724
725 std::ostringstream stream;
726 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
727
728 formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440, useDeviceGroup));
729 }
730 imageTypeGroup->addChild(formatGroup.release());
731 }
732 testGroup->addChild(imageTypeGroup.release());
733 }
734
735 return testGroup.release();
736 }
737
createImageSparseResidencyTests(tcu::TestContext & testCtx)738 tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
739 {
740 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency", "Buffer Sparse Residency"));
741 return createImageSparseResidencyTestsCommon(testCtx, testGroup);
742 }
743
createDeviceGroupImageSparseResidencyTests(tcu::TestContext & testCtx)744 tcu::TestCaseGroup* createDeviceGroupImageSparseResidencyTests (tcu::TestContext& testCtx)
745 {
746 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "device_group_image_sparse_residency", "Buffer Sparse Residency"));
747 return createImageSparseResidencyTestsCommon(testCtx, testGroup, true);
748 }
749
750 } // sparse
751 } // vkt
752