1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesImageSparseResidency.cpp
21  * \brief Sparse partially resident images tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferSparseBinding.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkImageUtil.hpp"
38 #include "vkQueryUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41 
42 #include "deUniquePtr.hpp"
43 #include "deStringUtil.hpp"
44 
45 #include <string>
46 #include <vector>
47 
48 using namespace vk;
49 
50 namespace vkt
51 {
52 namespace sparse
53 {
54 namespace
55 {
56 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)57 const std::string getCoordStr  (const ImageType		imageType,
58 								const std::string&	x,
59 								const std::string&	y,
60 								const std::string&	z)
61 {
62 	switch (imageType)
63 	{
64 		case IMAGE_TYPE_1D:
65 		case IMAGE_TYPE_BUFFER:
66 			return x;
67 
68 		case IMAGE_TYPE_1D_ARRAY:
69 		case IMAGE_TYPE_2D:
70 			return "ivec2(" + x + "," + y + ")";
71 
72 		case IMAGE_TYPE_2D_ARRAY:
73 		case IMAGE_TYPE_3D:
74 		case IMAGE_TYPE_CUBE:
75 		case IMAGE_TYPE_CUBE_ARRAY:
76 			return "ivec3(" + x + "," + y + "," + z + ")";
77 
78 		default:
79 			DE_ASSERT(false);
80 			return "";
81 	}
82 }
83 
computeWorkGroupSize(const tcu::UVec3 & gridSize)84 tcu::UVec3 computeWorkGroupSize (const tcu::UVec3& gridSize)
85 {
86 	const deUint32		maxComputeWorkGroupInvocations	= 128u;
87 	const tcu::UVec3	maxComputeWorkGroupSize			= tcu::UVec3(128u, 128u, 64u);
88 
89 	const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
90 	const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations /  xWorkGroupSize);
91 	const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
92 
93 	return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
94 }
95 
96 class ImageSparseResidencyCase : public TestCase
97 {
98 public:
99 					ImageSparseResidencyCase	(tcu::TestContext&			testCtx,
100 												 const std::string&			name,
101 												 const std::string&			description,
102 												 const ImageType			imageType,
103 												 const tcu::UVec3&			imageSize,
104 												 const tcu::TextureFormat&	format,
105 												 const glu::GLSLVersion		glslVersion,
106 												 const bool					useDeviceGroups);
107 
108 	void			initPrograms				(SourceCollections&			sourceCollections) const;
109 	TestInstance*	createInstance				(Context&					context) const;
110 
111 private:
112 	const bool					m_useDeviceGroups;
113 	const ImageType				m_imageType;
114 	const tcu::UVec3			m_imageSize;
115 	const tcu::TextureFormat	m_format;
116 	const glu::GLSLVersion		m_glslVersion;
117 };
118 
ImageSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)119 ImageSparseResidencyCase::ImageSparseResidencyCase (tcu::TestContext&			testCtx,
120 													const std::string&			name,
121 													const std::string&			description,
122 													const ImageType				imageType,
123 													const tcu::UVec3&			imageSize,
124 													const tcu::TextureFormat&	format,
125 													const glu::GLSLVersion		glslVersion,
126 													const bool					useDeviceGroups)
127 	: TestCase				(testCtx, name, description)
128 	, m_useDeviceGroups		(useDeviceGroups)
129 	, m_imageType			(imageType)
130 	, m_imageSize			(imageSize)
131 	, m_format				(format)
132 	, m_glslVersion			(glslVersion)
133 {
134 }
135 
initPrograms(SourceCollections & sourceCollections) const136 void ImageSparseResidencyCase::initPrograms (SourceCollections&	sourceCollections) const
137 {
138 	// Create compute program
139 	const char* const versionDecl			= glu::getGLSLVersionDeclaration(m_glslVersion);
140 	const std::string imageTypeStr			= getShaderImageType(m_format, m_imageType);
141 	const std::string formatQualifierStr	= getShaderImageFormatQualifier(m_format);
142 	const std::string formatDataStr			= getShaderImageDataType(m_format);
143 	const tcu::UVec3  gridSize				= getShaderGridSize(m_imageType, m_imageSize);
144 	const tcu::UVec3  workGroupSize			= computeWorkGroupSize(gridSize);
145 
146 	std::ostringstream src;
147 	src << versionDecl << "\n"
148 		<< "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
149 		<< "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
150 		<< "void main (void)\n"
151 		<< "{\n"
152 		<< "	if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
153 		<< "	if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
154 		<< "	if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
155 		<< "	{\n"
156 		<< "		imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
157 		<< formatDataStr << "( int(gl_GlobalInvocationID.x) % 127, int(gl_GlobalInvocationID.y) % 127, int(gl_GlobalInvocationID.z) % 127, 1));\n"
158 		<< "	}\n"
159 		<< "}\n";
160 
161 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
162 }
163 
164 class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
165 {
166 public:
167 					ImageSparseResidencyInstance(Context&									 context,
168 												 const ImageType							 imageType,
169 												 const tcu::UVec3&							 imageSize,
170 												 const tcu::TextureFormat&					 format,
171 												 const bool									 useDeviceGroups);
172 
173 
174 	tcu::TestStatus	iterate						(void);
175 
176 private:
177 	const bool					m_useDeviceGroups;
178 	const ImageType				m_imageType;
179 	const tcu::UVec3			m_imageSize;
180 	const tcu::TextureFormat	m_format;
181 };
182 
ImageSparseResidencyInstance(Context & context,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const bool useDeviceGroups)183 ImageSparseResidencyInstance::ImageSparseResidencyInstance (Context&					context,
184 															const ImageType				imageType,
185 															const tcu::UVec3&			imageSize,
186 															const tcu::TextureFormat&	format,
187 															const bool					useDeviceGroups)
188 	: SparseResourcesBaseInstance	(context, useDeviceGroups)
189 	, m_useDeviceGroups				(useDeviceGroups)
190 	, m_imageType					(imageType)
191 	, m_imageSize					(imageSize)
192 	, m_format						(format)
193 {
194 }
195 
iterate(void)196 tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
197 {
198 	const InstanceInterface&			instance = m_context.getInstanceInterface();
199 
200 	{
201 		// Create logical device supporting both sparse and compute queues
202 		QueueRequirementsVec queueRequirements;
203 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
204 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
205 
206 		createDeviceSupportingQueues(queueRequirements);
207 	}
208 
209 	VkImageCreateInfo					imageCreateInfo;
210 	VkSparseImageMemoryRequirements		aspectRequirements;
211 	VkExtent3D							imageGranularity;
212 	std::vector<DeviceMemorySp>			deviceMemUniquePtrVec;
213 
214 	const DeviceInterface&	deviceInterface	= getDeviceInterface();
215 	const Queue&			sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
216 	const Queue&			computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
217 
218 	// Go through all physical devices
219 	for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
220 	{
221 		const deUint32						firstDeviceID				= physDevID;
222 		const deUint32						secondDeviceID				= (firstDeviceID + 1) % m_numPhysicalDevices;
223 
224 		const VkPhysicalDevice				physicalDevice				= getPhysicalDevice(firstDeviceID);
225 		const VkPhysicalDeviceProperties	physicalDeviceProperties	= getPhysicalDeviceProperties(instance, physicalDevice);
226 
227 		// Check if image size does not exceed device limits
228 		if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
229 			TCU_THROW(NotSupportedError, "Image size not supported for device");
230 
231 		// Check if device supports sparse operations for image type
232 		if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
233 			TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
234 
235 		imageCreateInfo.sType					= VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
236 		imageCreateInfo.pNext					= DE_NULL;
237 		imageCreateInfo.flags					= VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
238 		imageCreateInfo.imageType				= mapImageType(m_imageType);
239 		imageCreateInfo.format					= mapTextureFormat(m_format);
240 		imageCreateInfo.extent					= makeExtent3D(getLayerSize(m_imageType, m_imageSize));
241 		imageCreateInfo.mipLevels				= 1u;
242 		imageCreateInfo.arrayLayers				= getNumLayers(m_imageType, m_imageSize);
243 		imageCreateInfo.samples					= VK_SAMPLE_COUNT_1_BIT;
244 		imageCreateInfo.tiling					= VK_IMAGE_TILING_OPTIMAL;
245 		imageCreateInfo.initialLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
246 		imageCreateInfo.usage					= VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
247 												  VK_IMAGE_USAGE_STORAGE_BIT;
248 		imageCreateInfo.sharingMode				= VK_SHARING_MODE_EXCLUSIVE;
249 		imageCreateInfo.queueFamilyIndexCount	= 0u;
250 		imageCreateInfo.pQueueFamilyIndices		= DE_NULL;
251 
252 		if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
253 		{
254 			imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
255 		}
256 
257 		// Check if device supports sparse operations for image format
258 		if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
259 			TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
260 
261 		// Create sparse image
262 		const Unique<VkImage> sparseImage(createImage(deviceInterface, getDevice(), &imageCreateInfo));
263 
264 		// Create sparse image memory bind semaphore
265 		const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
266 
267 		{
268 			// Get image general memory requirements
269 			const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
270 
271 			if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
272 				TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
273 
274 			DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
275 
276 			// Get sparse image sparse memory requirements
277 			const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
278 
279 			DE_ASSERT(sparseMemoryRequirements.size() != 0);
280 
281 			const deUint32 colorAspectIndex		= getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
282 			const deUint32 metadataAspectIndex	= getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_METADATA_BIT);
283 
284 			if (colorAspectIndex == NO_MATCH_FOUND)
285 				TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
286 
287 			aspectRequirements	= sparseMemoryRequirements[colorAspectIndex];
288 			imageGranularity	= aspectRequirements.formatProperties.imageGranularity;
289 
290 			const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
291 
292 			DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
293 
294 			std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
295 			std::vector<VkSparseMemoryBind>		 imageMipTailMemoryBinds;
296 
297 			const deUint32						 memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID), imageMemoryRequirements, MemoryRequirement::Any);
298 
299 			if (memoryType == NO_MATCH_FOUND)
300 				return tcu::TestStatus::fail("No matching memory type found");
301 
302 			if (firstDeviceID != secondDeviceID)
303 			{
304 				VkPeerMemoryFeatureFlags	peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
305 				const deUint32				heapIndex = getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
306 				deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
307 
308 				if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT)    == 0) ||
309 					((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
310 				{
311 					TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
312 				}
313 			}
314 
315 			// Bind device memory for each aspect
316 			for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
317 			{
318 				for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
319 				{
320 					const VkImageSubresource subresource		= { aspectMask, mipLevelNdx, layerNdx };
321 					const VkExtent3D		 mipExtent			= mipLevelExtents(imageCreateInfo.extent, mipLevelNdx);
322 					const tcu::UVec3		 numSparseBinds		= alignedDivide(mipExtent, imageGranularity);
323 					const tcu::UVec3		 lastBlockExtent	= tcu::UVec3(mipExtent.width  % imageGranularity.width  ? mipExtent.width   % imageGranularity.width  : imageGranularity.width,
324 																			 mipExtent.height % imageGranularity.height ? mipExtent.height  % imageGranularity.height : imageGranularity.height,
325 																			 mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth   % imageGranularity.depth  : imageGranularity.depth);
326 					for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
327 					for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
328 					for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
329 					{
330 						const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
331 
332 						if (linearIndex % 2u == 1u)
333 						{
334 							continue;
335 						}
336 
337 						VkOffset3D offset;
338 						offset.x = x*imageGranularity.width;
339 						offset.y = y*imageGranularity.height;
340 						offset.z = z*imageGranularity.depth;
341 
342 						VkExtent3D extent;
343 						extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
344 						extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
345 						extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
346 
347 						const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
348 							imageMemoryRequirements.alignment, memoryType, subresource, offset, extent);
349 
350 						deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
351 
352 						imageResidencyMemoryBinds.push_back(imageMemoryBind);
353 					}
354 				}
355 
356 				if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
357 				{
358 					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
359 						aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
360 
361 					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
362 
363 					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
364 				}
365 
366 				// Metadata
367 				if (metadataAspectIndex != NO_MATCH_FOUND)
368 				{
369 					const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
370 
371 					if (!(metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
372 					{
373 						const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
374 							metadataAspectRequirements.imageMipTailSize, memoryType,
375 							metadataAspectRequirements.imageMipTailOffset + layerNdx * metadataAspectRequirements.imageMipTailStride,
376 							VK_SPARSE_MEMORY_BIND_METADATA_BIT);
377 
378 						deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
379 
380 						imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
381 					}
382 				}
383 			}
384 
385 			if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
386 			{
387 				const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
388 					aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
389 
390 				deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
391 
392 				imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
393 			}
394 
395 			// Metadata
396 			if (metadataAspectIndex != NO_MATCH_FOUND)
397 			{
398 				const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
399 
400 				if ((metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
401 				{
402 					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
403 						metadataAspectRequirements.imageMipTailSize, memoryType, metadataAspectRequirements.imageMipTailOffset,
404 						VK_SPARSE_MEMORY_BIND_METADATA_BIT);
405 
406 					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
407 
408 					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
409 				}
410 			}
411 
412 			const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
413 			{
414 				VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR,	//VkStructureType							sType;
415 				DE_NULL,												//const void*								pNext;
416 				firstDeviceID,											//deUint32									resourceDeviceIndex;
417 				secondDeviceID,											//deUint32									memoryDeviceIndex;
418 			};
419 
420 			VkBindSparseInfo bindSparseInfo =
421 			{
422 				VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,						//VkStructureType							sType;
423 				m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,	//const void*								pNext;
424 				0u,														//deUint32									waitSemaphoreCount;
425 				DE_NULL,												//const VkSemaphore*						pWaitSemaphores;
426 				0u,														//deUint32									bufferBindCount;
427 				DE_NULL,												//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
428 				0u,														//deUint32									imageOpaqueBindCount;
429 				DE_NULL,												//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
430 				0u,														//deUint32									imageBindCount;
431 				DE_NULL,												//const VkSparseImageMemoryBindInfo*		pImageBinds;
432 				1u,														//deUint32									signalSemaphoreCount;
433 				&imageMemoryBindSemaphore.get()							//const VkSemaphore*						pSignalSemaphores;
434 			};
435 
436 			VkSparseImageMemoryBindInfo		  imageResidencyBindInfo;
437 			VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
438 
439 			if (imageResidencyMemoryBinds.size() > 0)
440 			{
441 				imageResidencyBindInfo.image		= *sparseImage;
442 				imageResidencyBindInfo.bindCount	= static_cast<deUint32>(imageResidencyMemoryBinds.size());
443 				imageResidencyBindInfo.pBinds		= &imageResidencyMemoryBinds[0];
444 
445 				bindSparseInfo.imageBindCount		= 1u;
446 				bindSparseInfo.pImageBinds			= &imageResidencyBindInfo;
447 			}
448 
449 			if (imageMipTailMemoryBinds.size() > 0)
450 			{
451 				imageMipTailBindInfo.image			= *sparseImage;
452 				imageMipTailBindInfo.bindCount		= static_cast<deUint32>(imageMipTailMemoryBinds.size());
453 				imageMipTailBindInfo.pBinds			= &imageMipTailMemoryBinds[0];
454 
455 				bindSparseInfo.imageOpaqueBindCount = 1u;
456 				bindSparseInfo.pImageOpaqueBinds	= &imageMipTailBindInfo;
457 			}
458 
459 			// Submit sparse bind commands for execution
460 			VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
461 		}
462 
463 		// Create command buffer for compute and transfer oparations
464 		const Unique<VkCommandPool>	  commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
465 		const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
466 
467 		// Start recording commands
468 		beginCommandBuffer(deviceInterface, *commandBuffer);
469 
470 		// Create descriptor set layout
471 		const Unique<VkDescriptorSetLayout> descriptorSetLayout(
472 			DescriptorSetLayoutBuilder()
473 			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
474 			.build(deviceInterface, getDevice()));
475 
476 		// Create and bind compute pipeline
477 		const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
478 		const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
479 		const Unique<VkPipeline>		computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
480 
481 		deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
482 
483 		// Create and bind descriptor set
484 		const Unique<VkDescriptorPool> descriptorPool(
485 			DescriptorPoolBuilder()
486 			.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
487 			.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
488 
489 		const Unique<VkDescriptorSet>	descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
490 
491 		const VkImageSubresourceRange	subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
492 		const Unique<VkImageView>		imageView(makeImageView(deviceInterface, getDevice(), *sparseImage, mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange));
493 		const VkDescriptorImageInfo		sparseImageInfo  = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
494 
495 		DescriptorSetUpdateBuilder()
496 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
497 			.update(deviceInterface, getDevice());
498 
499 		deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
500 
501 		{
502 			const VkImageMemoryBarrier sparseImageLayoutChangeBarrier = makeImageMemoryBarrier
503 			(
504 				0u,
505 				VK_ACCESS_SHADER_WRITE_BIT,
506 				VK_IMAGE_LAYOUT_UNDEFINED,
507 				VK_IMAGE_LAYOUT_GENERAL,
508 				*sparseImage,
509 				subresourceRange,
510 				sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
511 				sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED
512 				);
513 
514 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageLayoutChangeBarrier);
515 		}
516 
517 		const tcu::UVec3  gridSize = getShaderGridSize(m_imageType, m_imageSize);
518 
519 		{
520 			const tcu::UVec3  workGroupSize = computeWorkGroupSize(gridSize);
521 
522 			const deUint32 xWorkGroupCount = gridSize.x() / workGroupSize.x() + (gridSize.x() % workGroupSize.x() ? 1u : 0u);
523 			const deUint32 yWorkGroupCount = gridSize.y() / workGroupSize.y() + (gridSize.y() % workGroupSize.y() ? 1u : 0u);
524 			const deUint32 zWorkGroupCount = gridSize.z() / workGroupSize.z() + (gridSize.z() % workGroupSize.z() ? 1u : 0u);
525 
526 			const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
527 
528 			if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
529 				maxComputeWorkGroupCount.y() < yWorkGroupCount ||
530 				maxComputeWorkGroupCount.z() < zWorkGroupCount)
531 			{
532 				TCU_THROW(NotSupportedError, "Image size is not supported");
533 			}
534 
535 			deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
536 		}
537 
538 		{
539 			const VkImageMemoryBarrier sparseImageTrasferBarrier = makeImageMemoryBarrier
540 			(
541 				VK_ACCESS_SHADER_WRITE_BIT,
542 				VK_ACCESS_TRANSFER_READ_BIT,
543 				VK_IMAGE_LAYOUT_GENERAL,
544 				VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
545 				*sparseImage,
546 				subresourceRange
547 			);
548 
549 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageTrasferBarrier);
550 		}
551 
552 		const deUint32					imageSizeInBytes		= getNumPixels(m_imageType, m_imageSize) * tcu::getPixelSize(m_format);
553 		const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
554 		const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
555 		const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
556 
557 		{
558 			const VkBufferImageCopy bufferImageCopy = makeBufferImageCopy(imageCreateInfo.extent, imageCreateInfo.arrayLayers);
559 
560 			deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *sparseImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &bufferImageCopy);
561 		}
562 
563 		{
564 			const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier
565 			(
566 				VK_ACCESS_TRANSFER_WRITE_BIT,
567 				VK_ACCESS_HOST_READ_BIT,
568 				*outputBuffer,
569 				0u,
570 				imageSizeInBytes
571 			);
572 
573 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
574 		}
575 
576 		// End recording commands
577 		endCommandBuffer(deviceInterface, *commandBuffer);
578 
579 		// The stage at which execution is going to wait for finish of sparse binding operations
580 		const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
581 
582 		// Submit commands for execution and wait for completion
583 		submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits,
584 			0, DE_NULL, m_useDeviceGroups, firstDeviceID);
585 
586 		// Retrieve data from buffer to host memory
587 		invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
588 
589 		const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
590 		const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData);
591 
592 		// Wait for sparse queue to become idle
593 		//vsk fails:
594 		deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
595 
596 		// Validate results
597 		if( aspectRequirements.imageMipTailFirstLod > 0u )
598 		{
599 			const VkExtent3D		 mipExtent		 = mipLevelExtents(imageCreateInfo.extent, 0u);
600 			const tcu::UVec3		 numSparseBinds  = alignedDivide(mipExtent, imageGranularity);
601 			const tcu::UVec3		 lastBlockExtent = tcu::UVec3(	mipExtent.width  % imageGranularity.width  ? mipExtent.width  % imageGranularity.width  : imageGranularity.width,
602 																	mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
603 																	mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth  % imageGranularity.depth  : imageGranularity.depth);
604 
605 			for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
606 			{
607 				for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
608 				for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
609 				for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
610 				{
611 					VkExtent3D offset;
612 					offset.width  = x*imageGranularity.width;
613 					offset.height = y*imageGranularity.height;
614 					offset.depth  = z*imageGranularity.depth + layerNdx*numSparseBinds.z()*imageGranularity.depth;
615 
616 					VkExtent3D extent;
617 					extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
618 					extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
619 					extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
620 
621 					const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
622 
623 					if (linearIndex % 2u == 0u)
624 					{
625 						for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
626 						for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
627 						for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
628 						{
629 							const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
630 							const tcu::UVec4 outputValue	= pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
631 
632 							if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
633 								return tcu::TestStatus::fail("Failed");
634 						}
635 					}
636 					else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
637 					{
638 						for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
639 						for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
640 						for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
641 						{
642 							const tcu::UVec4 referenceValue = tcu::UVec4(0u, 0u, 0u, 0u);
643 							const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
644 
645 							if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
646 								return tcu::TestStatus::fail("Failed");
647 						}
648 					}
649 				}
650 			}
651 		}
652 		else
653 		{
654 			const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
655 
656 			for (deUint32 offsetZ = 0u; offsetZ < mipExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
657 			for (deUint32 offsetY = 0u; offsetY < mipExtent.height; ++offsetY)
658 			for (deUint32 offsetX = 0u; offsetX < mipExtent.width;  ++offsetX)
659 			{
660 				const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
661 				const tcu::UVec4 outputValue	= pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
662 
663 				if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
664 					return tcu::TestStatus::fail("Failed");
665 			}
666 		}
667 	}
668 
669 	return tcu::TestStatus::pass("Passed");
670 }
671 
createInstance(Context & context) const672 TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
673 {
674 	return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format, m_useDeviceGroups);
675 }
676 
677 } // anonymous ns
678 
createImageSparseResidencyTestsCommon(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> testGroup,const bool useDeviceGroup=false)679 tcu::TestCaseGroup* createImageSparseResidencyTestsCommon (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup> testGroup, const bool useDeviceGroup = false)
680 {
681 	static const deUint32 sizeCountPerImageType = 3u;
682 
683 	struct ImageParameters
684 	{
685 		ImageType	imageType;
686 		tcu::UVec3	imageSizes[sizeCountPerImageType];
687 	};
688 
689 	static const ImageParameters imageParametersArray[] =
690 	{
691 		{ IMAGE_TYPE_2D,		 { tcu::UVec3(512u, 256u, 1u),  tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u,  137u, 1u) } },
692 		{ IMAGE_TYPE_2D_ARRAY,	 { tcu::UVec3(512u, 256u, 6u),	tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } },
693 		{ IMAGE_TYPE_CUBE,		 { tcu::UVec3(256u, 256u, 1u),	tcu::UVec3(128u,  128u, 1u), tcu::UVec3(137u, 137u, 1u) } },
694 		{ IMAGE_TYPE_CUBE_ARRAY, { tcu::UVec3(256u, 256u, 6u),	tcu::UVec3(128u,  128u, 8u), tcu::UVec3(137u, 137u, 3u) } },
695 		{ IMAGE_TYPE_3D,		 { tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } }
696 	};
697 
698 	static const tcu::TextureFormat formats[] =
699 	{
700 		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT32),
701 		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT16),
702 		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT8),
703 		tcu::TextureFormat(tcu::TextureFormat::RG,	 tcu::TextureFormat::SIGNED_INT32),
704 		tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT16),
705 		tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT8),
706 		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
707 		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
708 		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
709 	};
710 
711 	for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
712 	{
713 		const ImageType					imageType = imageParametersArray[imageTypeNdx].imageType;
714 		de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
715 
716 		for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
717 		{
718 			const tcu::TextureFormat&		format = formats[formatNdx];
719 			de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
720 
721 			for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
722 			{
723 				const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
724 
725 				std::ostringstream stream;
726 				stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
727 
728 				formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440, useDeviceGroup));
729 			}
730 			imageTypeGroup->addChild(formatGroup.release());
731 		}
732 		testGroup->addChild(imageTypeGroup.release());
733 	}
734 
735 	return testGroup.release();
736 }
737 
createImageSparseResidencyTests(tcu::TestContext & testCtx)738 tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
739 {
740 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency", "Buffer Sparse Residency"));
741 	return createImageSparseResidencyTestsCommon(testCtx, testGroup);
742 }
743 
createDeviceGroupImageSparseResidencyTests(tcu::TestContext & testCtx)744 tcu::TestCaseGroup* createDeviceGroupImageSparseResidencyTests (tcu::TestContext& testCtx)
745 {
746 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "device_group_image_sparse_residency", "Buffer Sparse Residency"));
747 	return createImageSparseResidencyTestsCommon(testCtx, testGroup, true);
748 }
749 
750 } // sparse
751 } // vkt
752