1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Sparse buffer tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSparseResourcesTestsUtil.hpp"
28 #include "vktSparseResourcesBase.hpp"
29 #include "vktSparseResourcesBufferSparseBinding.hpp"
30 #include "vktSparseResourcesBufferSparseResidency.hpp"
31 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
32 
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkMemUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkQueryUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 
44 #include "tcuTestLog.hpp"
45 
46 #include "deUniquePtr.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deMath.h"
49 
50 #include <string>
51 #include <vector>
52 #include <map>
53 
54 using namespace vk;
55 using de::MovePtr;
56 using de::UniquePtr;
57 using de::SharedPtr;
58 using tcu::Vec4;
59 using tcu::IVec2;
60 using tcu::IVec4;
61 
62 namespace vkt
63 {
64 namespace sparse
65 {
66 namespace
67 {
68 
69 typedef SharedPtr<UniquePtr<Allocation> > AllocationSp;
70 
71 enum
72 {
73 	RENDER_SIZE		= 128,				//!< framebuffer size in pixels
74 	GRID_SIZE		= RENDER_SIZE / 8,	//!< number of grid tiles in a row
75 };
76 
77 enum TestFlagBits
78 {
79 												//   sparseBinding is implied
80 	TEST_FLAG_ALIASED				= 1u << 0,	//!< sparseResidencyAliased
81 	TEST_FLAG_RESIDENCY				= 1u << 1,	//!< sparseResidencyBuffer
82 	TEST_FLAG_NON_RESIDENT_STRICT	= 1u << 2,	//!< residencyNonResidentStrict
83 	TEST_FLAG_ENABLE_DEVICE_GROUPS	= 1u << 3,	//!< device groups are enabled
84 };
85 typedef deUint32 TestFlags;
86 
87 //! SparseAllocationBuilder output. Owns the allocated memory.
88 struct SparseAllocation
89 {
90 	deUint32							numResourceChunks;
91 	VkDeviceSize						resourceSize;		//!< buffer size in bytes
92 	std::vector<AllocationSp>			allocations;		//!< actual allocated memory
93 	std::vector<VkSparseMemoryBind>		memoryBinds;		//!< memory binds backing the resource
94 	deUint32							memoryType;			//!< memory type (same for all allocations)
95 	deUint32							heapIndex;			//!< memory heap index
96 };
97 
98 //! Utility to lay out memory allocations for a sparse buffer, including holes and aliased regions.
99 //! Will allocate memory upon building.
100 class SparseAllocationBuilder
101 {
102 public:
103 								SparseAllocationBuilder	(void);
104 
105 	// \note "chunk" is the smallest (due to alignment) bindable amount of memory
106 
107 	SparseAllocationBuilder&	addMemoryHole			(const deUint32 numChunks = 1u);
108 	SparseAllocationBuilder&	addResourceHole			(const deUint32 numChunks = 1u);
109 	SparseAllocationBuilder&	addMemoryBind			(const deUint32 numChunks = 1u);
110 	SparseAllocationBuilder&	addAliasedMemoryBind	(const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks = 1u);
111 	SparseAllocationBuilder&	addMemoryAllocation		(void);
112 
113 	MovePtr<SparseAllocation>	build					(const InstanceInterface&	instanceInterface,
114 														 const VkPhysicalDevice		physicalDevice,
115 														 const DeviceInterface&		vk,
116 														 const VkDevice				device,
117 														 Allocator&					allocator,
118 														 VkBufferCreateInfo			referenceCreateInfo,		//!< buffer size is ignored in this info
119 														 const VkDeviceSize			minChunkSize = 0ull) const;	//!< make sure chunks are at least this big
120 
121 private:
122 	struct MemoryBind
123 	{
124 		deUint32	allocationNdx;
125 		deUint32	resourceChunkNdx;
126 		deUint32	memoryChunkNdx;
127 		deUint32	numChunks;
128 	};
129 
130 	deUint32					m_allocationNdx;
131 	deUint32					m_resourceChunkNdx;
132 	deUint32					m_memoryChunkNdx;
133 	std::vector<MemoryBind>		m_memoryBinds;
134 	std::vector<deUint32>		m_chunksPerAllocation;
135 
136 };
137 
SparseAllocationBuilder(void)138 SparseAllocationBuilder::SparseAllocationBuilder (void)
139 	: m_allocationNdx		(0)
140 	, m_resourceChunkNdx	(0)
141 	, m_memoryChunkNdx		(0)
142 {
143 	m_chunksPerAllocation.push_back(0);
144 }
145 
addMemoryHole(const deUint32 numChunks)146 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryHole (const deUint32 numChunks)
147 {
148 	m_memoryChunkNdx						+= numChunks;
149 	m_chunksPerAllocation[m_allocationNdx]	+= numChunks;
150 
151 	return *this;
152 }
153 
addResourceHole(const deUint32 numChunks)154 SparseAllocationBuilder& SparseAllocationBuilder::addResourceHole (const deUint32 numChunks)
155 {
156 	m_resourceChunkNdx += numChunks;
157 
158 	return *this;
159 }
160 
addMemoryAllocation(void)161 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryAllocation (void)
162 {
163 	DE_ASSERT(m_memoryChunkNdx != 0);	// doesn't make sense to have an empty allocation
164 
165 	m_allocationNdx  += 1;
166 	m_memoryChunkNdx  = 0;
167 	m_chunksPerAllocation.push_back(0);
168 
169 	return *this;
170 }
171 
addMemoryBind(const deUint32 numChunks)172 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryBind (const deUint32 numChunks)
173 {
174 	const MemoryBind memoryBind =
175 	{
176 		m_allocationNdx,
177 		m_resourceChunkNdx,
178 		m_memoryChunkNdx,
179 		numChunks
180 	};
181 	m_memoryBinds.push_back(memoryBind);
182 
183 	m_resourceChunkNdx						+= numChunks;
184 	m_memoryChunkNdx						+= numChunks;
185 	m_chunksPerAllocation[m_allocationNdx]	+= numChunks;
186 
187 	return *this;
188 }
189 
addAliasedMemoryBind(const deUint32 allocationNdx,const deUint32 chunkOffset,const deUint32 numChunks)190 SparseAllocationBuilder& SparseAllocationBuilder::addAliasedMemoryBind	(const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks)
191 {
192 	DE_ASSERT(allocationNdx <= m_allocationNdx);
193 
194 	const MemoryBind memoryBind =
195 	{
196 		allocationNdx,
197 		m_resourceChunkNdx,
198 		chunkOffset,
199 		numChunks
200 	};
201 	m_memoryBinds.push_back(memoryBind);
202 
203 	m_resourceChunkNdx += numChunks;
204 
205 	return *this;
206 }
207 
build(const InstanceInterface & instanceInterface,const VkPhysicalDevice physicalDevice,const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkBufferCreateInfo referenceCreateInfo,const VkDeviceSize minChunkSize) const208 MovePtr<SparseAllocation> SparseAllocationBuilder::build (const InstanceInterface&			instanceInterface,
209 														  const VkPhysicalDevice			physicalDevice,
210 														  const DeviceInterface&			vk,
211 														  const VkDevice					device,
212 														  Allocator&						allocator,
213 														  VkBufferCreateInfo				referenceCreateInfo,
214 														  const VkDeviceSize				minChunkSize) const
215 {
216 
217 	MovePtr<SparseAllocation>	sparseAllocation			(new SparseAllocation());
218 
219 								referenceCreateInfo.size	= sizeof(deUint32);
220 	const Unique<VkBuffer>		refBuffer					(createBuffer(vk, device, &referenceCreateInfo));
221 	const VkMemoryRequirements	memoryRequirements			= getBufferMemoryRequirements(vk, device, *refBuffer);
222 	const VkDeviceSize			chunkSize					= std::max(memoryRequirements.alignment, static_cast<VkDeviceSize>(deAlign64(minChunkSize, memoryRequirements.alignment)));
223 	const deUint32				memoryTypeNdx				= findMatchingMemoryType(instanceInterface, physicalDevice, memoryRequirements, MemoryRequirement::Any);
224 	VkMemoryAllocateInfo		allocInfo					=
225 	{
226 		VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,	//	VkStructureType			sType;
227 		DE_NULL,								//	const void*				pNext;
228 		memoryRequirements.size,				//	VkDeviceSize			allocationSize;
229 		memoryTypeNdx,							//	deUint32				memoryTypeIndex;
230 	};
231 
232 	for (std::vector<deUint32>::const_iterator numChunksIter = m_chunksPerAllocation.begin(); numChunksIter != m_chunksPerAllocation.end(); ++numChunksIter)
233 	{
234 		allocInfo.allocationSize = *numChunksIter * chunkSize;
235 		sparseAllocation->allocations.push_back(makeDeSharedPtr(allocator.allocate(allocInfo, (VkDeviceSize)0)));
236 	}
237 
238 	for (std::vector<MemoryBind>::const_iterator memBindIter = m_memoryBinds.begin(); memBindIter != m_memoryBinds.end(); ++memBindIter)
239 	{
240 		const Allocation&			alloc	= **sparseAllocation->allocations[memBindIter->allocationNdx];
241 		const VkSparseMemoryBind	bind	=
242 		{
243 			memBindIter->resourceChunkNdx * chunkSize,							// VkDeviceSize               resourceOffset;
244 			memBindIter->numChunks * chunkSize,									// VkDeviceSize               size;
245 			alloc.getMemory(),													// VkDeviceMemory             memory;
246 			alloc.getOffset() + memBindIter->memoryChunkNdx * chunkSize,		// VkDeviceSize               memoryOffset;
247 			(VkSparseMemoryBindFlags)0,											// VkSparseMemoryBindFlags    flags;
248 		};
249 		sparseAllocation->memoryBinds.push_back(bind);
250 		referenceCreateInfo.size = std::max(referenceCreateInfo.size, bind.resourceOffset + bind.size);
251 	}
252 
253 	sparseAllocation->resourceSize		= referenceCreateInfo.size;
254 	sparseAllocation->numResourceChunks = m_resourceChunkNdx;
255 	sparseAllocation->memoryType		= memoryTypeNdx;
256 	sparseAllocation->heapIndex			= getHeapIndexForMemoryType(instanceInterface, physicalDevice, memoryTypeNdx);
257 
258 	return sparseAllocation;
259 }
260 
makeImageCreateInfo(const VkFormat format,const IVec2 & size,const VkImageUsageFlags usage)261 VkImageCreateInfo makeImageCreateInfo (const VkFormat format, const IVec2& size, const VkImageUsageFlags usage)
262 {
263 	const VkImageCreateInfo imageParams =
264 	{
265 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,			// VkStructureType			sType;
266 		DE_NULL,										// const void*				pNext;
267 		(VkImageCreateFlags)0,							// VkImageCreateFlags		flags;
268 		VK_IMAGE_TYPE_2D,								// VkImageType				imageType;
269 		format,											// VkFormat					format;
270 		makeExtent3D(size.x(), size.y(), 1),			// VkExtent3D				extent;
271 		1u,												// deUint32					mipLevels;
272 		1u,												// deUint32					arrayLayers;
273 		VK_SAMPLE_COUNT_1_BIT,							// VkSampleCountFlagBits	samples;
274 		VK_IMAGE_TILING_OPTIMAL,						// VkImageTiling			tiling;
275 		usage,											// VkImageUsageFlags		usage;
276 		VK_SHARING_MODE_EXCLUSIVE,						// VkSharingMode			sharingMode;
277 		0u,												// deUint32					queueFamilyIndexCount;
278 		DE_NULL,										// const deUint32*			pQueueFamilyIndices;
279 		VK_IMAGE_LAYOUT_UNDEFINED,						// VkImageLayout			initialLayout;
280 	};
281 	return imageParams;
282 }
283 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkRenderPass renderPass,const IVec2 renderSize,const VkPrimitiveTopology topology,const deUint32 stageCount,const VkPipelineShaderStageCreateInfo * pStages)284 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&					vk,
285 									   const VkDevice							device,
286 									   const VkPipelineLayout					pipelineLayout,
287 									   const VkRenderPass						renderPass,
288 									   const IVec2								renderSize,
289 									   const VkPrimitiveTopology				topology,
290 									   const deUint32							stageCount,
291 									   const VkPipelineShaderStageCreateInfo*	pStages)
292 {
293 	const VkVertexInputBindingDescription vertexInputBindingDescription =
294 	{
295 		0u,								// uint32_t				binding;
296 		sizeof(Vec4),					// uint32_t				stride;
297 		VK_VERTEX_INPUT_RATE_VERTEX,	// VkVertexInputRate	inputRate;
298 	};
299 
300 	const VkVertexInputAttributeDescription vertexInputAttributeDescription =
301 	{
302 		0u,									// uint32_t			location;
303 		0u,									// uint32_t			binding;
304 		VK_FORMAT_R32G32B32A32_SFLOAT,		// VkFormat			format;
305 		0u,									// uint32_t			offset;
306 	};
307 
308 	const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo =
309 	{
310 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// VkStructureType                             sType;
311 		DE_NULL,														// const void*                                 pNext;
312 		(VkPipelineVertexInputStateCreateFlags)0,						// VkPipelineVertexInputStateCreateFlags       flags;
313 		1u,																// uint32_t                                    vertexBindingDescriptionCount;
314 		&vertexInputBindingDescription,									// const VkVertexInputBindingDescription*      pVertexBindingDescriptions;
315 		1u,																// uint32_t                                    vertexAttributeDescriptionCount;
316 		&vertexInputAttributeDescription,								// const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions;
317 	};
318 
319 	const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo =
320 	{
321 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType                             sType;
322 		DE_NULL,														// const void*                                 pNext;
323 		(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags     flags;
324 		topology,														// VkPrimitiveTopology                         topology;
325 		VK_FALSE,														// VkBool32                                    primitiveRestartEnable;
326 	};
327 
328 	const VkViewport	viewport	= makeViewport(renderSize);
329 	const VkRect2D		scissor		= makeRect2D(renderSize);
330 
331 	const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo =
332 	{
333 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,			// VkStructureType                             sType;
334 		DE_NULL,														// const void*                                 pNext;
335 		(VkPipelineViewportStateCreateFlags)0,							// VkPipelineViewportStateCreateFlags          flags;
336 		1u,																// uint32_t                                    viewportCount;
337 		&viewport,														// const VkViewport*                           pViewports;
338 		1u,																// uint32_t                                    scissorCount;
339 		&scissor,														// const VkRect2D*                             pScissors;
340 	};
341 
342 	const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo =
343 	{
344 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType                          sType;
345 		DE_NULL,														// const void*                              pNext;
346 		(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags  flags;
347 		VK_FALSE,														// VkBool32                                 depthClampEnable;
348 		VK_FALSE,														// VkBool32                                 rasterizerDiscardEnable;
349 		VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
350 		VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
351 		VK_FRONT_FACE_COUNTER_CLOCKWISE,								// VkFrontFace								frontFace;
352 		VK_FALSE,														// VkBool32									depthBiasEnable;
353 		0.0f,															// float									depthBiasConstantFactor;
354 		0.0f,															// float									depthBiasClamp;
355 		0.0f,															// float									depthBiasSlopeFactor;
356 		1.0f,															// float									lineWidth;
357 	};
358 
359 	const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo =
360 	{
361 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,		// VkStructureType							sType;
362 		DE_NULL,														// const void*								pNext;
363 		(VkPipelineMultisampleStateCreateFlags)0,						// VkPipelineMultisampleStateCreateFlags	flags;
364 		VK_SAMPLE_COUNT_1_BIT,											// VkSampleCountFlagBits					rasterizationSamples;
365 		VK_FALSE,														// VkBool32									sampleShadingEnable;
366 		0.0f,															// float									minSampleShading;
367 		DE_NULL,														// const VkSampleMask*						pSampleMask;
368 		VK_FALSE,														// VkBool32									alphaToCoverageEnable;
369 		VK_FALSE														// VkBool32									alphaToOneEnable;
370 	};
371 
372 	const VkStencilOpState stencilOpState = makeStencilOpState(
373 		VK_STENCIL_OP_KEEP,				// stencil fail
374 		VK_STENCIL_OP_KEEP,				// depth & stencil pass
375 		VK_STENCIL_OP_KEEP,				// depth only fail
376 		VK_COMPARE_OP_ALWAYS,			// compare op
377 		0u,								// compare mask
378 		0u,								// write mask
379 		0u);							// reference
380 
381 	VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo =
382 	{
383 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,		// VkStructureType							sType;
384 		DE_NULL,														// const void*								pNext;
385 		(VkPipelineDepthStencilStateCreateFlags)0,						// VkPipelineDepthStencilStateCreateFlags	flags;
386 		VK_FALSE,														// VkBool32									depthTestEnable;
387 		VK_FALSE,														// VkBool32									depthWriteEnable;
388 		VK_COMPARE_OP_LESS,												// VkCompareOp								depthCompareOp;
389 		VK_FALSE,														// VkBool32									depthBoundsTestEnable;
390 		VK_FALSE,														// VkBool32									stencilTestEnable;
391 		stencilOpState,													// VkStencilOpState							front;
392 		stencilOpState,													// VkStencilOpState							back;
393 		0.0f,															// float									minDepthBounds;
394 		1.0f,															// float									maxDepthBounds;
395 	};
396 
397 	const VkColorComponentFlags					colorComponentsAll					= VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
398 	const VkPipelineColorBlendAttachmentState	pipelineColorBlendAttachmentState	=
399 	{
400 		VK_FALSE,						// VkBool32					blendEnable;
401 		VK_BLEND_FACTOR_ONE,			// VkBlendFactor			srcColorBlendFactor;
402 		VK_BLEND_FACTOR_ZERO,			// VkBlendFactor			dstColorBlendFactor;
403 		VK_BLEND_OP_ADD,				// VkBlendOp				colorBlendOp;
404 		VK_BLEND_FACTOR_ONE,			// VkBlendFactor			srcAlphaBlendFactor;
405 		VK_BLEND_FACTOR_ZERO,			// VkBlendFactor			dstAlphaBlendFactor;
406 		VK_BLEND_OP_ADD,				// VkBlendOp				alphaBlendOp;
407 		colorComponentsAll,				// VkColorComponentFlags	colorWriteMask;
408 	};
409 
410 	const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo =
411 	{
412 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,		// VkStructureType								sType;
413 		DE_NULL,														// const void*									pNext;
414 		(VkPipelineColorBlendStateCreateFlags)0,						// VkPipelineColorBlendStateCreateFlags			flags;
415 		VK_FALSE,														// VkBool32										logicOpEnable;
416 		VK_LOGIC_OP_COPY,												// VkLogicOp									logicOp;
417 		1u,																// deUint32										attachmentCount;
418 		&pipelineColorBlendAttachmentState,								// const VkPipelineColorBlendAttachmentState*	pAttachments;
419 		{ 0.0f, 0.0f, 0.0f, 0.0f },										// float										blendConstants[4];
420 	};
421 
422 	const VkGraphicsPipelineCreateInfo graphicsPipelineInfo =
423 	{
424 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
425 		DE_NULL,											// const void*										pNext;
426 		(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
427 		stageCount,											// deUint32											stageCount;
428 		pStages,											// const VkPipelineShaderStageCreateInfo*			pStages;
429 		&vertexInputStateInfo,								// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
430 		&pipelineInputAssemblyStateInfo,					// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
431 		DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
432 		&pipelineViewportStateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
433 		&pipelineRasterizationStateInfo,					// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
434 		&pipelineMultisampleStateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
435 		&pipelineDepthStencilStateInfo,						// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
436 		&pipelineColorBlendStateInfo,						// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
437 		DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
438 		pipelineLayout,										// VkPipelineLayout									layout;
439 		renderPass,											// VkRenderPass										renderPass;
440 		0u,													// deUint32											subpass;
441 		DE_NULL,											// VkPipeline										basePipelineHandle;
442 		0,													// deInt32											basePipelineIndex;
443 	};
444 
445 	return createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
446 }
447 
448 //! Return true if there are any red (or all zero) pixels in the image
imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)449 bool imageHasErrorPixels (const tcu::ConstPixelBufferAccess image)
450 {
451 	const Vec4 errorColor	= Vec4(1.0f, 0.0f, 0.0f, 1.0f);
452 	const Vec4 blankColor	= Vec4();
453 
454 	for (int y = 0; y < image.getHeight(); ++y)
455 	for (int x = 0; x < image.getWidth(); ++x)
456 	{
457 		const Vec4 color = image.getPixel(x, y);
458 		if (color == errorColor || color == blankColor)
459 			return true;
460 	}
461 
462 	return false;
463 }
464 
465 class Renderer
466 {
467 public:
468 	typedef std::map<VkShaderStageFlagBits, const VkSpecializationInfo*>	SpecializationMap;
469 
470 	//! Use the delegate to bind descriptor sets, vertex buffers, etc. and make a draw call
471 	struct Delegate
472 	{
~Delegatevkt::sparse::__anonf4c2cf810111::Renderer::Delegate473 		virtual			~Delegate		(void) {}
474 		virtual void	rendererDraw	(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const = 0;
475 	};
476 
Renderer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const deUint32 queueFamilyIndex,const VkDescriptorSetLayout descriptorSetLayout,BinaryCollection & binaryCollection,const std::string & vertexName,const std::string & fragmentName,const VkBuffer colorBuffer,const IVec2 & renderSize,const VkFormat colorFormat,const Vec4 & clearColor,const VkPrimitiveTopology topology,SpecializationMap specMap=SpecializationMap ())477 	Renderer (const DeviceInterface&		vk,
478 			  const VkDevice				device,
479 			  Allocator&					allocator,
480 			  const deUint32				queueFamilyIndex,
481 			  const VkDescriptorSetLayout	descriptorSetLayout,	//!< may be NULL, if no descriptors are used
482 			  BinaryCollection&				binaryCollection,
483 			  const std::string&			vertexName,
484 			  const std::string&			fragmentName,
485 			  const VkBuffer				colorBuffer,
486 			  const IVec2&					renderSize,
487 			  const VkFormat				colorFormat,
488 			  const Vec4&					clearColor,
489 			  const VkPrimitiveTopology		topology,
490 			  SpecializationMap				specMap = SpecializationMap())
491 		: m_colorBuffer				(colorBuffer)
492 		, m_renderSize				(renderSize)
493 		, m_colorFormat				(colorFormat)
494 		, m_colorSubresourceRange	(makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u))
495 		, m_clearColor				(clearColor)
496 		, m_topology				(topology)
497 		, m_descriptorSetLayout		(descriptorSetLayout)
498 	{
499 		m_colorImage		= makeImage		(vk, device, makeImageCreateInfo(m_colorFormat, m_renderSize, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT));
500 		m_colorImageAlloc	= bindImage		(vk, device, allocator, *m_colorImage, MemoryRequirement::Any);
501 		m_colorAttachment	= makeImageView	(vk, device, *m_colorImage, VK_IMAGE_VIEW_TYPE_2D, m_colorFormat, m_colorSubresourceRange);
502 
503 		m_vertexModule		= createShaderModule	(vk, device, binaryCollection.get(vertexName), 0u);
504 		m_fragmentModule	= createShaderModule	(vk, device, binaryCollection.get(fragmentName), 0u);
505 
506 		const VkPipelineShaderStageCreateInfo pShaderStages[] =
507 		{
508 			{
509 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
510 				DE_NULL,													// const void*							pNext;
511 				(VkPipelineShaderStageCreateFlags)0,						// VkPipelineShaderStageCreateFlags		flags;
512 				VK_SHADER_STAGE_VERTEX_BIT,									// VkShaderStageFlagBits				stage;
513 				*m_vertexModule,											// VkShaderModule						module;
514 				"main",														// const char*							pName;
515 				specMap[VK_SHADER_STAGE_VERTEX_BIT],						// const VkSpecializationInfo*			pSpecializationInfo;
516 			},
517 			{
518 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
519 				DE_NULL,													// const void*							pNext;
520 				(VkPipelineShaderStageCreateFlags)0,						// VkPipelineShaderStageCreateFlags		flags;
521 				VK_SHADER_STAGE_FRAGMENT_BIT,								// VkShaderStageFlagBits				stage;
522 				*m_fragmentModule,											// VkShaderModule						module;
523 				"main",														// const char*							pName;
524 				specMap[VK_SHADER_STAGE_FRAGMENT_BIT],						// const VkSpecializationInfo*			pSpecializationInfo;
525 			}
526 		};
527 
528 		m_renderPass		= makeRenderPass		(vk, device, m_colorFormat);
529 		m_framebuffer		= makeFramebuffer		(vk, device, *m_renderPass, 1u, &m_colorAttachment.get(),
530 													 static_cast<deUint32>(m_renderSize.x()), static_cast<deUint32>(m_renderSize.y()));
531 		m_pipelineLayout	= makePipelineLayout	(vk, device, m_descriptorSetLayout);
532 		m_pipeline			= makeGraphicsPipeline	(vk, device, *m_pipelineLayout, *m_renderPass, m_renderSize, m_topology, DE_LENGTH_OF_ARRAY(pShaderStages), pShaderStages);
533 		m_cmdPool			= makeCommandPool		(vk, device, queueFamilyIndex);
534 		m_cmdBuffer			= allocateCommandBuffer	(vk, device, *m_cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
535 	}
536 
draw(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const Delegate & drawDelegate,const bool useDeviceGroups,const deUint32 deviceID) const537 	void draw (const DeviceInterface&	vk,
538 			   const VkDevice			device,
539 			   const VkQueue			queue,
540 			   const Delegate&			drawDelegate,
541 			   const bool				useDeviceGroups,
542 			   const deUint32			deviceID) const
543 	{
544 		beginCommandBuffer(vk, *m_cmdBuffer);
545 
546 		beginRenderPass(vk, *m_cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_renderSize.x(), m_renderSize.y()), m_clearColor);
547 
548 		vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
549 		drawDelegate.rendererDraw(*m_pipelineLayout, *m_cmdBuffer);
550 
551 		endRenderPass(vk, *m_cmdBuffer);
552 
553 		copyImageToBuffer(vk, *m_cmdBuffer, *m_colorImage, m_colorBuffer, m_renderSize);
554 
555 		endCommandBuffer(vk, *m_cmdBuffer);
556 		submitCommandsAndWait(vk, device, queue, *m_cmdBuffer, 0U, DE_NULL, DE_NULL, 0U, DE_NULL, useDeviceGroups, deviceID);
557 	}
558 
559 private:
560 	const VkBuffer					m_colorBuffer;
561 	const IVec2						m_renderSize;
562 	const VkFormat					m_colorFormat;
563 	const VkImageSubresourceRange	m_colorSubresourceRange;
564 	const Vec4						m_clearColor;
565 	const VkPrimitiveTopology		m_topology;
566 	const VkDescriptorSetLayout		m_descriptorSetLayout;
567 
568 	Move<VkImage>					m_colorImage;
569 	MovePtr<Allocation>				m_colorImageAlloc;
570 	Move<VkImageView>				m_colorAttachment;
571 	Move<VkShaderModule>			m_vertexModule;
572 	Move<VkShaderModule>			m_fragmentModule;
573 	Move<VkRenderPass>				m_renderPass;
574 	Move<VkFramebuffer>				m_framebuffer;
575 	Move<VkPipelineLayout>			m_pipelineLayout;
576 	Move<VkPipeline>				m_pipeline;
577 	Move<VkCommandPool>				m_cmdPool;
578 	Move<VkCommandBuffer>			m_cmdBuffer;
579 
580 	// "deleted"
581 				Renderer	(const Renderer&);
582 	Renderer&	operator=	(const Renderer&);
583 };
584 
bindSparseBuffer(const DeviceInterface & vk,const VkDevice device,const VkQueue sparseQueue,const VkBuffer buffer,const SparseAllocation & sparseAllocation,const bool useDeviceGroups,deUint32 resourceDevId,deUint32 memoryDeviceId)585 void bindSparseBuffer (const DeviceInterface& vk, const VkDevice device, const VkQueue sparseQueue, const VkBuffer buffer, const SparseAllocation& sparseAllocation,
586 						const bool useDeviceGroups, deUint32 resourceDevId, deUint32 memoryDeviceId)
587 {
588 	const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo =
589 	{
590 		buffer,														// VkBuffer                     buffer;
591 		static_cast<deUint32>(sparseAllocation.memoryBinds.size()),	// uint32_t                     bindCount;
592 		&sparseAllocation.memoryBinds[0],							// const VkSparseMemoryBind*    pBinds;
593 	};
594 
595 	const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
596 	{
597 		VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR,		//VkStructureType							sType;
598 		DE_NULL,													//const void*								pNext;
599 		resourceDevId,												//deUint32									resourceDeviceIndex;
600 		memoryDeviceId,												//deUint32									memoryDeviceIndex;
601 	};
602 
603 	const VkBindSparseInfo bindInfo =
604 	{
605 		VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,							// VkStructureType                             sType;
606 		useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,		// const void*                                 pNext;
607 		0u,															// uint32_t                                    waitSemaphoreCount;
608 		DE_NULL,													// const VkSemaphore*                          pWaitSemaphores;
609 		1u,															// uint32_t                                    bufferBindCount;
610 		&sparseBufferMemoryBindInfo,								// const VkSparseBufferMemoryBindInfo*         pBufferBinds;
611 		0u,															// uint32_t                                    imageOpaqueBindCount;
612 		DE_NULL,													// const VkSparseImageOpaqueMemoryBindInfo*    pImageOpaqueBinds;
613 		0u,															// uint32_t                                    imageBindCount;
614 		DE_NULL,													// const VkSparseImageMemoryBindInfo*          pImageBinds;
615 		0u,															// uint32_t                                    signalSemaphoreCount;
616 		DE_NULL,													// const VkSemaphore*                          pSignalSemaphores;
617 	};
618 
619 	const Unique<VkFence> fence(createFence(vk, device));
620 
621 	VK_CHECK(vk.queueBindSparse(sparseQueue, 1u, &bindInfo, *fence));
622 	VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~0ull));
623 }
624 
625 class SparseBufferTestInstance : public SparseResourcesBaseInstance, Renderer::Delegate
626 {
627 public:
SparseBufferTestInstance(Context & context,const TestFlags flags)628 	SparseBufferTestInstance (Context& context, const TestFlags flags)
629 		: SparseResourcesBaseInstance	(context, (flags & TEST_FLAG_ENABLE_DEVICE_GROUPS) != 0)
630 		, m_aliased						((flags & TEST_FLAG_ALIASED)   != 0)
631 		, m_residency					((flags & TEST_FLAG_RESIDENCY) != 0)
632 		, m_nonResidentStrict			((flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0)
633 		, m_renderSize					(RENDER_SIZE, RENDER_SIZE)
634 		, m_colorFormat					(VK_FORMAT_R8G8B8A8_UNORM)
635 		, m_colorBufferSize				(m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(m_colorFormat)))
636 	{
637 		{
638 			QueueRequirementsVec requirements;
639 			requirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
640 			requirements.push_back(QueueRequirements(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 1u));
641 
642 			createDeviceSupportingQueues(requirements);
643 		}
644 		const VkPhysicalDeviceFeatures	features	= getPhysicalDeviceFeatures(m_context.getInstanceInterface(), getPhysicalDevice());
645 
646 		if (!features.sparseBinding)
647 			TCU_THROW(NotSupportedError, "Missing feature: sparseBinding");
648 
649 		if (m_residency && !features.sparseResidencyBuffer)
650 			TCU_THROW(NotSupportedError, "Missing feature: sparseResidencyBuffer");
651 
652 		if (m_aliased && !features.sparseResidencyAliased)
653 			TCU_THROW(NotSupportedError, "Missing feature: sparseResidencyAliased");
654 
655 		if (m_nonResidentStrict && !m_context.getDeviceProperties().sparseProperties.residencyNonResidentStrict)
656 			TCU_THROW(NotSupportedError, "Missing sparse property: residencyNonResidentStrict");
657 
658 		const DeviceInterface& vk		= getDeviceInterface();
659 		m_sparseQueue					= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0u);
660 		m_universalQueue				= getQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0u);
661 
662 		m_sharedQueueFamilyIndices[0]	= m_sparseQueue.queueFamilyIndex;
663 		m_sharedQueueFamilyIndices[1]	= m_universalQueue.queueFamilyIndex;
664 
665 		m_colorBuffer					= makeBuffer(vk, getDevice(), makeBufferCreateInfo(m_colorBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT));
666 		m_colorBufferAlloc				= bindBuffer(vk, getDevice(), getAllocator(), *m_colorBuffer, MemoryRequirement::HostVisible);
667 
668 		deMemset(m_colorBufferAlloc->getHostPtr(), 0, static_cast<std::size_t>(m_colorBufferSize));
669 		flushAlloc(vk, getDevice(), *m_colorBufferAlloc);
670 	}
671 
672 protected:
getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const673 	VkBufferCreateInfo getSparseBufferCreateInfo (const VkBufferUsageFlags usage) const
674 	{
675 		VkBufferCreateFlags	flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
676 		if (m_residency)
677 			flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
678 		if (m_aliased)
679 			flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
680 
681 		VkBufferCreateInfo referenceBufferCreateInfo =
682 		{
683 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,				// VkStructureType        sType;
684 			DE_NULL,											// const void*            pNext;
685 			flags,												// VkBufferCreateFlags    flags;
686 			0u,	// override later								// VkDeviceSize           size;
687 			VK_BUFFER_USAGE_TRANSFER_DST_BIT | usage,			// VkBufferUsageFlags     usage;
688 			VK_SHARING_MODE_EXCLUSIVE,							// VkSharingMode          sharingMode;
689 			0u,													// uint32_t               queueFamilyIndexCount;
690 			DE_NULL,											// const uint32_t*        pQueueFamilyIndices;
691 		};
692 
693 		if (m_sparseQueue.queueFamilyIndex != m_universalQueue.queueFamilyIndex)
694 		{
695 			referenceBufferCreateInfo.sharingMode			= VK_SHARING_MODE_CONCURRENT;
696 			referenceBufferCreateInfo.queueFamilyIndexCount	= DE_LENGTH_OF_ARRAY(m_sharedQueueFamilyIndices);
697 			referenceBufferCreateInfo.pQueueFamilyIndices	= m_sharedQueueFamilyIndices;
698 		}
699 
700 		return referenceBufferCreateInfo;
701 	}
702 
draw(const VkPrimitiveTopology topology,const VkDescriptorSetLayout descriptorSetLayout=DE_NULL,Renderer::SpecializationMap specMap=Renderer::SpecializationMap (),bool useDeviceGroups=false,deUint32 deviceID=0)703 	void draw (const VkPrimitiveTopology	topology,
704 			   const VkDescriptorSetLayout	descriptorSetLayout	= DE_NULL,
705 			   Renderer::SpecializationMap	specMap				= Renderer::SpecializationMap(),
706 			   bool							useDeviceGroups		= false,
707 			   deUint32						deviceID			= 0)
708 	{
709 		const UniquePtr<Renderer> renderer(new Renderer(
710 			getDeviceInterface(), getDevice(), getAllocator(), m_universalQueue.queueFamilyIndex, descriptorSetLayout,
711 			m_context.getBinaryCollection(), "vert", "frag", *m_colorBuffer, m_renderSize, m_colorFormat, Vec4(1.0f, 0.0f, 0.0f, 1.0f), topology, specMap));
712 
713 		renderer->draw(getDeviceInterface(), getDevice(), m_universalQueue.queueHandle, *this, useDeviceGroups, deviceID);
714 	}
715 
isResultImageCorrect(void) const716 	bool isResultImageCorrect (void) const
717 	{
718 		invalidateAlloc(getDeviceInterface(), getDevice(), *m_colorBufferAlloc);
719 
720 		const tcu::ConstPixelBufferAccess resultImage (mapVkFormat(m_colorFormat), m_renderSize.x(), m_renderSize.y(), 1u, m_colorBufferAlloc->getHostPtr());
721 
722 		m_context.getTestContext().getLog()
723 			<< tcu::LogImageSet("Result", "Result") << tcu::LogImage("color0", "", resultImage) << tcu::TestLog::EndImageSet;
724 
725 		return !imageHasErrorPixels(resultImage);
726 	}
727 
728 	const bool							m_aliased;
729 	const bool							m_residency;
730 	const bool							m_nonResidentStrict;
731 
732 	Queue								m_sparseQueue;
733 	Queue								m_universalQueue;
734 
735 private:
736 	const IVec2							m_renderSize;
737 	const VkFormat						m_colorFormat;
738 	const VkDeviceSize					m_colorBufferSize;
739 
740 	Move<VkBuffer>						m_colorBuffer;
741 	MovePtr<Allocation>					m_colorBufferAlloc;
742 
743 	deUint32							m_sharedQueueFamilyIndices[2];
744 };
745 
initProgramsDrawWithUBO(vk::SourceCollections & programCollection,const TestFlags flags)746 void initProgramsDrawWithUBO (vk::SourceCollections& programCollection, const TestFlags flags)
747 {
748 	// Vertex shader
749 	{
750 		std::ostringstream src;
751 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
752 			<< "\n"
753 			<< "layout(location = 0) in vec4 in_position;\n"
754 			<< "\n"
755 			<< "out gl_PerVertex {\n"
756 			<< "    vec4 gl_Position;\n"
757 			<< "};\n"
758 			<< "\n"
759 			<< "void main(void)\n"
760 			<< "{\n"
761 			<< "    gl_Position = in_position;\n"
762 			<< "}\n";
763 
764 		programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
765 	}
766 
767 	// Fragment shader
768 	{
769 		const bool			aliased				= (flags & TEST_FLAG_ALIASED) != 0;
770 		const bool			residency			= (flags & TEST_FLAG_RESIDENCY) != 0;
771 		const bool			nonResidentStrict	= (flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0;
772 		const std::string	valueExpr			= (aliased ? "ivec4(3*(ndx % nonAliasedSize) ^ 127, 0, 0, 0)" : "ivec4(3*ndx ^ 127, 0, 0, 0)");
773 
774 		std::ostringstream src;
775 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
776 			<< "\n"
777 			<< "layout(location = 0) out vec4 o_color;\n"
778 			<< "\n"
779 			<< "layout(constant_id = 1) const int dataSize  = 1;\n"
780 			<< "layout(constant_id = 2) const int chunkSize = 1;\n"
781 			<< "\n"
782 			<< "layout(set = 0, binding = 0, std140) uniform SparseBuffer {\n"
783 			<< "    ivec4 data[dataSize];\n"
784 			<< "} ubo;\n"
785 			<< "\n"
786 			<< "void main(void)\n"
787 			<< "{\n"
788 			<< "    const int fragNdx        = int(gl_FragCoord.x) + " << RENDER_SIZE << " * int(gl_FragCoord.y);\n"
789 			<< "    const int pageSize       = " << RENDER_SIZE << " * " << RENDER_SIZE << ";\n"
790 			<< "    const int numChunks      = dataSize / chunkSize;\n";
791 
792 		if (aliased)
793 			src << "    const int nonAliasedSize = (numChunks > 1 ? dataSize - chunkSize : dataSize);\n";
794 
795 		src << "    bool      ok             = true;\n"
796 			<< "\n"
797 			<< "    for (int ndx = fragNdx; ndx < dataSize; ndx += pageSize)\n"
798 			<< "    {\n";
799 
800 		if (residency && nonResidentStrict)
801 		{
802 			src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
803 				<< "            ok = ok && (ubo.data[ndx] == ivec4(0));\n"
804 				<< "        else\n"
805 				<< "            ok = ok && (ubo.data[ndx] == " + valueExpr + ");\n";
806 		}
807 		else if (residency)
808 		{
809 			src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
810 				<< "            continue;\n"
811 				<< "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
812 		}
813 		else
814 			src << "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
815 
816 		src << "    }\n"
817 			<< "\n"
818 			<< "    if (ok)\n"
819 			<< "        o_color = vec4(0.0, 1.0, 0.0, 1.0);\n"
820 			<< "    else\n"
821 			<< "        o_color = vec4(1.0, 0.0, 0.0, 1.0);\n"
822 			<< "}\n";
823 
824 		programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
825 	}
826 }
827 
828 //! Sparse buffer backing a UBO
829 class UBOTestInstance : public SparseBufferTestInstance
830 {
831 public:
UBOTestInstance(Context & context,const TestFlags flags)832 	UBOTestInstance (Context& context, const TestFlags flags)
833 		: SparseBufferTestInstance	(context, flags)
834 	{
835 	}
836 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const837 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
838 	{
839 		const DeviceInterface&	vk				= getDeviceInterface();
840 		const VkDeviceSize		vertexOffset	= 0ull;
841 
842 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
843 		vk.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
844 		vk.cmdDraw				(cmdBuffer, 4u, 1u, 0u, 0u);
845 	}
846 
iterate(void)847 	tcu::TestStatus iterate (void)
848 	{
849 		const InstanceInterface&	instance			= m_context.getInstanceInterface();
850 		const DeviceInterface&		vk					= getDeviceInterface();
851 		MovePtr<SparseAllocation>	sparseAllocation;
852 		Move<VkBuffer>				sparseBuffer;
853 		Move<VkBuffer>				sparseBufferAliased;
854 		bool						setupDescriptors	= true;
855 
856 		// Go through all physical devices
857 		for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
858 		{
859 			const deUint32	firstDeviceID	= physDevID;
860 			const deUint32	secondDeviceID	= (firstDeviceID + 1) % m_numPhysicalDevices;
861 
862 			// Set up the sparse buffer
863 			{
864 				VkBufferCreateInfo	referenceBufferCreateInfo	= getSparseBufferCreateInfo(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
865 				const VkDeviceSize	minChunkSize				= 512u;	// make sure the smallest allocation is at least this big
866 				deUint32			numMaxChunks				= 0u;
867 
868 				// Check how many chunks we can allocate given the alignment and size requirements of UBOs
869 				{
870 					const UniquePtr<SparseAllocation> minAllocation(SparseAllocationBuilder()
871 						.addMemoryBind()
872 						.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize));
873 
874 					numMaxChunks = deMaxu32(static_cast<deUint32>(m_context.getDeviceProperties().limits.maxUniformBufferRange / minAllocation->resourceSize), 1u);
875 				}
876 
877 				if (numMaxChunks < 4)
878 				{
879 					sparseAllocation = SparseAllocationBuilder()
880 						.addMemoryBind()
881 						.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
882 				}
883 				else
884 				{
885 					// Try to use a non-trivial memory allocation scheme to make it different from a non-sparse binding
886 					SparseAllocationBuilder builder;
887 					builder.addMemoryBind();
888 
889 					if (m_residency)
890 						builder.addResourceHole();
891 
892 					builder
893 						.addMemoryAllocation()
894 						.addMemoryHole()
895 						.addMemoryBind();
896 
897 					if (m_aliased)
898 						builder.addAliasedMemoryBind(0u, 0u);
899 
900 					sparseAllocation = builder.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
901 					DE_ASSERT(sparseAllocation->resourceSize <= m_context.getDeviceProperties().limits.maxUniformBufferRange);
902 				}
903 
904 				if (firstDeviceID != secondDeviceID)
905 				{
906 					VkPeerMemoryFeatureFlags	peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
907 					vk.getDeviceGroupPeerMemoryFeatures(getDevice(), sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
908 
909 					if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT)    == 0) ||
910 						((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
911 					{
912 						TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
913 					}
914 				}
915 
916 				// Create the buffer
917 				referenceBufferCreateInfo.size	= sparseAllocation->resourceSize;
918 				sparseBuffer					= makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
919 				bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBuffer, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
920 
921 				if (m_aliased)
922 				{
923 					sparseBufferAliased = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
924 					bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBufferAliased, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
925 				}
926 			}
927 
928 			// Set uniform data
929 			{
930 				const bool					hasAliasedChunk		= (m_aliased && sparseAllocation->memoryBinds.size() > 1u);
931 				const VkDeviceSize			chunkSize			= sparseAllocation->resourceSize / sparseAllocation->numResourceChunks;
932 				const VkDeviceSize			stagingBufferSize	= sparseAllocation->resourceSize - (hasAliasedChunk ? chunkSize : 0);
933 				const deUint32				numBufferEntries	= static_cast<deUint32>(stagingBufferSize / sizeof(IVec4));
934 
935 				const Unique<VkBuffer>		stagingBuffer		(makeBuffer(vk, getDevice(), makeBufferCreateInfo(stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT)));
936 				const UniquePtr<Allocation>	stagingBufferAlloc	(bindBuffer(vk, getDevice(), getAllocator(), *stagingBuffer, MemoryRequirement::HostVisible));
937 
938 				{
939 					// If aliased chunk is used, the staging buffer is smaller than the sparse buffer and we don't overwrite the last chunk
940 					IVec4* const pData = static_cast<IVec4*>(stagingBufferAlloc->getHostPtr());
941 					for (deUint32 i = 0; i < numBufferEntries; ++i)
942 						pData[i] = IVec4(3*i ^ 127, 0, 0, 0);
943 
944 					flushAlloc(vk, getDevice(), *stagingBufferAlloc);
945 
946 					const VkBufferCopy copyRegion =
947 					{
948 						0ull,						// VkDeviceSize    srcOffset;
949 						0ull,						// VkDeviceSize    dstOffset;
950 						stagingBufferSize,			// VkDeviceSize    size;
951 					};
952 
953 					const Unique<VkCommandPool>		cmdPool		(makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
954 					const Unique<VkCommandBuffer>	cmdBuffer	(allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
955 
956 					beginCommandBuffer	(vk, *cmdBuffer);
957 					vk.cmdCopyBuffer	(*cmdBuffer, *stagingBuffer, *sparseBuffer, 1u, &copyRegion);
958 					endCommandBuffer	(vk, *cmdBuffer);
959 
960 					submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
961 					// Once the fence is signaled, the write is also available to the aliasing buffer.
962 				}
963 			}
964 
965 			// Make sure that we don't try to access a larger range than is allowed. This only applies to a single chunk case.
966 			const deUint32 maxBufferRange = deMinu32(static_cast<deUint32>(sparseAllocation->resourceSize), m_context.getDeviceProperties().limits.maxUniformBufferRange);
967 
968 			// Descriptor sets
969 			{
970 				// Setup only once
971 				if (setupDescriptors)
972 				{
973 					m_descriptorSetLayout = DescriptorSetLayoutBuilder()
974 						.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT)
975 						.build(vk, getDevice());
976 
977 					m_descriptorPool = DescriptorPoolBuilder()
978 						.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
979 						.build(vk, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
980 
981 					m_descriptorSet = makeDescriptorSet(vk, getDevice(), *m_descriptorPool, *m_descriptorSetLayout);
982 					setupDescriptors = false;
983 				}
984 
985 				const VkBuffer					buffer				= (m_aliased ? *sparseBufferAliased : *sparseBuffer);
986 				const VkDescriptorBufferInfo	sparseBufferInfo	= makeDescriptorBufferInfo(buffer, 0ull, maxBufferRange);
987 
988 				DescriptorSetUpdateBuilder()
989 					.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &sparseBufferInfo)
990 					.update(vk, getDevice());
991 			}
992 
993 			// Vertex data
994 			{
995 				const Vec4 vertexData[] =
996 				{
997 					Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
998 					Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
999 					Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
1000 					Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
1001 				};
1002 
1003 				const VkDeviceSize	vertexBufferSize	= sizeof(vertexData);
1004 
1005 				m_vertexBuffer		= makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1006 				m_vertexBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1007 
1008 				deMemcpy(m_vertexBufferAlloc->getHostPtr(), &vertexData[0], vertexBufferSize);
1009 				flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1010 			}
1011 
1012 			// Draw
1013 			{
1014 				std::vector<deInt32> specializationData;
1015 				{
1016 					const deUint32	numBufferEntries	= maxBufferRange / static_cast<deUint32>(sizeof(IVec4));
1017 					const deUint32	numEntriesPerChunk	= numBufferEntries / sparseAllocation->numResourceChunks;
1018 
1019 					specializationData.push_back(numBufferEntries);
1020 					specializationData.push_back(numEntriesPerChunk);
1021 				}
1022 
1023 				const VkSpecializationMapEntry	specMapEntries[] =
1024 				{
1025 					{
1026 						1u,					// uint32_t    constantID;
1027 						0u,					// uint32_t    offset;
1028 						sizeof(deInt32),	// size_t      size;
1029 					},
1030 					{
1031 						2u,					// uint32_t    constantID;
1032 						sizeof(deInt32),	// uint32_t    offset;
1033 						sizeof(deInt32),	// size_t      size;
1034 					},
1035 				};
1036 
1037 				const VkSpecializationInfo specInfo =
1038 				{
1039 					DE_LENGTH_OF_ARRAY(specMapEntries),		// uint32_t                           mapEntryCount;
1040 					specMapEntries,							// const VkSpecializationMapEntry*    pMapEntries;
1041 					sizeInBytes(specializationData),		// size_t                             dataSize;
1042 					getDataOrNullptr(specializationData),	// const void*                        pData;
1043 				};
1044 
1045 				Renderer::SpecializationMap	specMap;
1046 				specMap[VK_SHADER_STAGE_FRAGMENT_BIT] = &specInfo;
1047 
1048 				draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, *m_descriptorSetLayout, specMap, usingDeviceGroups(), firstDeviceID);
1049 			}
1050 
1051 			if(!isResultImageCorrect())
1052 				return tcu::TestStatus::fail("Some buffer values were incorrect");
1053 		}
1054 		return tcu::TestStatus::pass("Pass");
1055 	}
1056 
1057 private:
1058 	Move<VkBuffer>					m_vertexBuffer;
1059 	MovePtr<Allocation>				m_vertexBufferAlloc;
1060 
1061 	Move<VkDescriptorSetLayout>		m_descriptorSetLayout;
1062 	Move<VkDescriptorPool>			m_descriptorPool;
1063 	Move<VkDescriptorSet>			m_descriptorSet;
1064 };
1065 
initProgramsDrawGrid(vk::SourceCollections & programCollection,const TestFlags flags)1066 void initProgramsDrawGrid (vk::SourceCollections& programCollection, const TestFlags flags)
1067 {
1068 	DE_UNREF(flags);
1069 
1070 	// Vertex shader
1071 	{
1072 		std::ostringstream src;
1073 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1074 			<< "\n"
1075 			<< "layout(location = 0) in  vec4 in_position;\n"
1076 			<< "layout(location = 0) out int  out_ndx;\n"
1077 			<< "\n"
1078 			<< "out gl_PerVertex {\n"
1079 			<< "    vec4 gl_Position;\n"
1080 			<< "};\n"
1081 			<< "\n"
1082 			<< "void main(void)\n"
1083 			<< "{\n"
1084 			<< "    gl_Position = in_position;\n"
1085 			<< "    out_ndx     = gl_VertexIndex;\n"
1086 			<< "}\n";
1087 
1088 		programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1089 	}
1090 
1091 	// Fragment shader
1092 	{
1093 		std::ostringstream src;
1094 		src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1095 			<< "\n"
1096 			<< "layout(location = 0) flat in  int  in_ndx;\n"
1097 			<< "layout(location = 0)      out vec4 o_color;\n"
1098 			<< "\n"
1099 			<< "void main(void)\n"
1100 			<< "{\n"
1101 			<< "    if (in_ndx % 2 == 0)\n"
1102 			<< "        o_color = vec4(vec3(1.0), 1.0);\n"
1103 			<< "    else\n"
1104 			<< "        o_color = vec4(vec3(0.75), 1.0);\n"
1105 			<< "}\n";
1106 
1107 		programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1108 	}
1109 }
1110 
1111 //! Generate vertex positions for a grid of tiles composed of two triangles each (6 vertices)
generateGrid(void * pRawData,const float step,const float ox,const float oy,const deUint32 numX,const deUint32 numY,const float z=0.0f)1112 void generateGrid (void* pRawData, const float step, const float ox, const float oy, const deUint32 numX, const deUint32 numY, const float z = 0.0f)
1113 {
1114 	typedef Vec4 (*TilePtr)[6];
1115 
1116 	TilePtr const pData = static_cast<TilePtr>(pRawData);
1117 	{
1118 		for (deUint32 iy = 0; iy < numY; ++iy)
1119 		for (deUint32 ix = 0; ix < numX; ++ix)
1120 		{
1121 			const deUint32	ndx	= ix + numX * iy;
1122 			const float		x	= ox + step * static_cast<float>(ix);
1123 			const float		y	= oy + step * static_cast<float>(iy);
1124 
1125 			pData[ndx][0] = Vec4(x + step,	y,			z, 1.0f);
1126 			pData[ndx][1] = Vec4(x,			y,			z, 1.0f);
1127 			pData[ndx][2] = Vec4(x,			y + step,	z, 1.0f);
1128 
1129 			pData[ndx][3] = Vec4(x,			y + step,	z, 1.0f);
1130 			pData[ndx][4] = Vec4(x + step,	y + step,	z, 1.0f);
1131 			pData[ndx][5] = Vec4(x + step,	y,			z, 1.0f);
1132 		}
1133 	}
1134 }
1135 
1136 //! Base test for a sparse buffer backing a vertex/index buffer
1137 class DrawGridTestInstance : public SparseBufferTestInstance
1138 {
1139 public:
DrawGridTestInstance(Context & context,const TestFlags flags,const VkBufferUsageFlags usage,const VkDeviceSize minChunkSize)1140 	DrawGridTestInstance (Context& context, const TestFlags flags, const VkBufferUsageFlags usage, const VkDeviceSize minChunkSize)
1141 		: SparseBufferTestInstance	(context, flags)
1142 		, m_bufferUsage				(usage)
1143 		, m_minChunkSize			(minChunkSize)
1144 	{
1145 	}
1146 
createResources(deUint32 memoryDeviceIndex)1147 	void createResources (deUint32 memoryDeviceIndex)
1148 	{
1149 		const InstanceInterface&	instance					= m_context.getInstanceInterface();
1150 		const DeviceInterface&		vk							= getDeviceInterface();
1151 		VkBufferCreateInfo			referenceBufferCreateInfo	= getSparseBufferCreateInfo(m_bufferUsage);
1152 
1153 		{
1154 			// Allocate two chunks, each covering half of the viewport
1155 			SparseAllocationBuilder builder;
1156 			builder.addMemoryBind();
1157 
1158 			if (m_residency)
1159 				builder.addResourceHole();
1160 
1161 			builder
1162 				.addMemoryAllocation()
1163 				.addMemoryHole()
1164 				.addMemoryBind();
1165 
1166 			if (m_aliased)
1167 				builder.addAliasedMemoryBind(0u, 0u);
1168 
1169 			m_sparseAllocation	= builder.build(instance, getPhysicalDevice(memoryDeviceIndex), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, m_minChunkSize);
1170 		}
1171 
1172 		// Create the buffer
1173 		referenceBufferCreateInfo.size	= m_sparseAllocation->resourceSize;
1174 		m_sparseBuffer					= makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1175 
1176 		m_perDrawBufferOffset	= m_sparseAllocation->resourceSize / m_sparseAllocation->numResourceChunks;
1177 		m_stagingBufferSize		= 2 * m_perDrawBufferOffset;
1178 		m_stagingBuffer			= makeBuffer(vk, getDevice(), makeBufferCreateInfo(m_stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
1179 		m_stagingBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_stagingBuffer, MemoryRequirement::HostVisible);
1180 	}
1181 
iterate(void)1182 	tcu::TestStatus iterate (void)
1183 	{
1184 		const DeviceInterface&	vk	= getDeviceInterface();
1185 
1186 		for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
1187 		{
1188 			const deUint32	firstDeviceID	= physDevID;
1189 			const deUint32	secondDeviceID	= (firstDeviceID + 1) % m_numPhysicalDevices;
1190 
1191 			createResources(secondDeviceID);
1192 
1193 			if (firstDeviceID != secondDeviceID)
1194 			{
1195 				VkPeerMemoryFeatureFlags	peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
1196 				vk.getDeviceGroupPeerMemoryFeatures(getDevice(), m_sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
1197 
1198 				if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT)    == 0) ||
1199 					((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
1200 				{
1201 					TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
1202 				}
1203 			}
1204 
1205 			// Bind the memory
1206 			bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *m_sparseBuffer, *m_sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
1207 
1208 			initializeBuffers();
1209 
1210 			// Upload to the sparse buffer
1211 			{
1212 				flushAlloc(vk, getDevice(), *m_stagingBufferAlloc);
1213 
1214 				VkDeviceSize	firstChunkOffset	= 0ull;
1215 				VkDeviceSize	secondChunkOffset	= m_perDrawBufferOffset;
1216 
1217 				if (m_residency)
1218 					secondChunkOffset += m_perDrawBufferOffset;
1219 
1220 				if (m_aliased)
1221 					firstChunkOffset = secondChunkOffset + m_perDrawBufferOffset;
1222 
1223 				const VkBufferCopy copyRegions[] =
1224 				{
1225 					{
1226 						0ull,						// VkDeviceSize    srcOffset;
1227 						firstChunkOffset,			// VkDeviceSize    dstOffset;
1228 						m_perDrawBufferOffset,		// VkDeviceSize    size;
1229 					},
1230 					{
1231 						m_perDrawBufferOffset,		// VkDeviceSize    srcOffset;
1232 						secondChunkOffset,			// VkDeviceSize    dstOffset;
1233 						m_perDrawBufferOffset,		// VkDeviceSize    size;
1234 					},
1235 				};
1236 
1237 				const Unique<VkCommandPool>		cmdPool		(makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1238 				const Unique<VkCommandBuffer>	cmdBuffer	(allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1239 
1240 				beginCommandBuffer	(vk, *cmdBuffer);
1241 				vk.cmdCopyBuffer	(*cmdBuffer, *m_stagingBuffer, *m_sparseBuffer, DE_LENGTH_OF_ARRAY(copyRegions), copyRegions);
1242 				endCommandBuffer	(vk, *cmdBuffer);
1243 
1244 				submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1245 			}
1246 
1247 
1248 			Renderer::SpecializationMap	specMap;
1249 			draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, DE_NULL, specMap, usingDeviceGroups(), firstDeviceID);
1250 
1251 			if(!isResultImageCorrect())
1252 				return tcu::TestStatus::fail("Some buffer values were incorrect");
1253 		}
1254 		return tcu::TestStatus::pass("Pass");
1255 	}
1256 
1257 protected:
1258 	virtual void				initializeBuffers		(void) = 0;
1259 
1260 	const VkBufferUsageFlags	m_bufferUsage;
1261 	const VkDeviceSize			m_minChunkSize;
1262 
1263 	VkDeviceSize				m_perDrawBufferOffset;
1264 
1265 	VkDeviceSize				m_stagingBufferSize;
1266 	Move<VkBuffer>				m_stagingBuffer;
1267 	MovePtr<Allocation>			m_stagingBufferAlloc;
1268 
1269 	MovePtr<SparseAllocation>	m_sparseAllocation;
1270 	Move<VkBuffer>				m_sparseBuffer;
1271 };
1272 
1273 //! Sparse buffer backing a vertex input buffer
1274 class VertexBufferTestInstance : public DrawGridTestInstance
1275 {
1276 public:
VertexBufferTestInstance(Context & context,const TestFlags flags)1277 	VertexBufferTestInstance (Context& context, const TestFlags flags)
1278 		: DrawGridTestInstance	(context,
1279 								 flags,
1280 								 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
1281 								 GRID_SIZE * GRID_SIZE * 6 * sizeof(Vec4))
1282 	{
1283 	}
1284 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1285 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1286 	{
1287 		DE_UNREF(pipelineLayout);
1288 
1289 		m_context.getTestContext().getLog()
1290 			<< tcu::TestLog::Message << "Drawing a grid of triangles backed by a sparse vertex buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1291 
1292 		const DeviceInterface&	vk				= getDeviceInterface();
1293 		const deUint32			vertexCount		= 6 * (GRID_SIZE * GRID_SIZE) / 2;
1294 		VkDeviceSize			vertexOffset	= 0ull;
1295 
1296 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1297 		vk.cmdDraw				(cmdBuffer, vertexCount, 1u, 0u, 0u);
1298 
1299 		vertexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1300 
1301 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1302 		vk.cmdDraw				(cmdBuffer, vertexCount, 1u, 0u, 0u);
1303 	}
1304 
initializeBuffers(void)1305 	void initializeBuffers (void)
1306 	{
1307 		deUint8*	pData	= static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr());
1308 		const float	step	= 2.0f / static_cast<float>(GRID_SIZE);
1309 
1310 		// Prepare data for two draw calls
1311 		generateGrid(pData,							step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE/2);
1312 		generateGrid(pData + m_perDrawBufferOffset,	step, -1.0f,  0.0f, GRID_SIZE, GRID_SIZE/2);
1313 	}
1314 };
1315 
1316 //! Sparse buffer backing an index buffer
1317 class IndexBufferTestInstance : public DrawGridTestInstance
1318 {
1319 public:
IndexBufferTestInstance(Context & context,const TestFlags flags)1320 	IndexBufferTestInstance (Context& context, const TestFlags flags)
1321 		: DrawGridTestInstance	(context,
1322 								 flags,
1323 								 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
1324 								 GRID_SIZE * GRID_SIZE * 6 * sizeof(deUint32))
1325 		, m_halfVertexCount		(6 * (GRID_SIZE * GRID_SIZE) / 2)
1326 	{
1327 	}
1328 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1329 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1330 	{
1331 		DE_UNREF(pipelineLayout);
1332 
1333 		m_context.getTestContext().getLog()
1334 			<< tcu::TestLog::Message << "Drawing a grid of triangles from a sparse index buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1335 
1336 		const DeviceInterface&	vk				= getDeviceInterface();
1337 		const VkDeviceSize		vertexOffset	= 0ull;
1338 		VkDeviceSize			indexOffset		= 0ull;
1339 
1340 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1341 
1342 		vk.cmdBindIndexBuffer	(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1343 		vk.cmdDrawIndexed		(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1344 
1345 		indexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1346 
1347 		vk.cmdBindIndexBuffer	(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1348 		vk.cmdDrawIndexed		(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1349 	}
1350 
initializeBuffers(void)1351 	void initializeBuffers (void)
1352 	{
1353 		// Vertex buffer
1354 		const DeviceInterface&	vk					= getDeviceInterface();
1355 		const VkDeviceSize		vertexBufferSize	= 2 * m_halfVertexCount * sizeof(Vec4);
1356 								m_vertexBuffer		= makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1357 								m_vertexBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1358 
1359 		{
1360 			const float	step = 2.0f / static_cast<float>(GRID_SIZE);
1361 
1362 			generateGrid(m_vertexBufferAlloc->getHostPtr(), step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE);
1363 
1364 			flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1365 		}
1366 
1367 		// Sparse index buffer
1368 		for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1369 		{
1370 			deUint8* const	pData		= static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1371 			deUint32* const	pIndexData	= reinterpret_cast<deUint32*>(pData);
1372 			const deUint32	ndxBase		= chunkNdx * m_halfVertexCount;
1373 
1374 			for (deUint32 i = 0u; i < m_halfVertexCount; ++i)
1375 				pIndexData[i] = ndxBase + i;
1376 		}
1377 	}
1378 
1379 private:
1380 	const deUint32			m_halfVertexCount;
1381 	Move<VkBuffer>			m_vertexBuffer;
1382 	MovePtr<Allocation>		m_vertexBufferAlloc;
1383 };
1384 
1385 //! Draw from a sparse indirect buffer
1386 class IndirectBufferTestInstance : public DrawGridTestInstance
1387 {
1388 public:
IndirectBufferTestInstance(Context & context,const TestFlags flags)1389 	IndirectBufferTestInstance (Context& context, const TestFlags flags)
1390 		: DrawGridTestInstance	(context,
1391 								 flags,
1392 								 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
1393 								 sizeof(VkDrawIndirectCommand))
1394 	{
1395 	}
1396 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1397 	void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1398 	{
1399 		DE_UNREF(pipelineLayout);
1400 
1401 		m_context.getTestContext().getLog()
1402 			<< tcu::TestLog::Message << "Drawing two triangles covering the whole viewport. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1403 
1404 		const DeviceInterface&	vk				= getDeviceInterface();
1405 		const VkDeviceSize		vertexOffset	= 0ull;
1406 		VkDeviceSize			indirectOffset	= 0ull;
1407 
1408 		vk.cmdBindVertexBuffers	(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1409 		vk.cmdDrawIndirect		(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1410 
1411 		indirectOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1412 
1413 		vk.cmdDrawIndirect		(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1414 	}
1415 
initializeBuffers(void)1416 	void initializeBuffers (void)
1417 	{
1418 		// Vertex buffer
1419 		const DeviceInterface&	vk					= getDeviceInterface();
1420 		const VkDeviceSize		vertexBufferSize	= 2 * 3 * sizeof(Vec4);
1421 								m_vertexBuffer		= makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1422 								m_vertexBufferAlloc	= bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1423 
1424 		{
1425 			generateGrid(m_vertexBufferAlloc->getHostPtr(), 2.0f, -1.0f, -1.0f, 1, 1);
1426 			flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1427 		}
1428 
1429 		// Indirect buffer
1430 		for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1431 		{
1432 			deUint8* const					pData		= static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1433 			VkDrawIndirectCommand* const	pCmdData	= reinterpret_cast<VkDrawIndirectCommand*>(pData);
1434 
1435 			pCmdData->firstVertex	= 3u * chunkNdx;
1436 			pCmdData->firstInstance	= 0u;
1437 			pCmdData->vertexCount	= 3u;
1438 			pCmdData->instanceCount	= 1u;
1439 		}
1440 	}
1441 
1442 private:
1443 	Move<VkBuffer>			m_vertexBuffer;
1444 	MovePtr<Allocation>		m_vertexBufferAlloc;
1445 };
1446 
1447 //! Similar to the class in vktTestCaseUtil.hpp, but uses Arg0 directly rather than through a InstanceFunction1
1448 template<typename Arg0>
1449 class FunctionProgramsSimple1
1450 {
1451 public:
1452 	typedef void	(*Function)				(vk::SourceCollections& dst, Arg0 arg0);
FunctionProgramsSimple1(Function func)1453 					FunctionProgramsSimple1	(Function func) : m_func(func)							{}
init(vk::SourceCollections & dst,const Arg0 & arg0) const1454 	void			init					(vk::SourceCollections& dst, const Arg0& arg0) const	{ m_func(dst, arg0); }
1455 
1456 private:
1457 	const Function	m_func;
1458 };
1459 
1460 //! Convenience function to create a TestCase based on a freestanding initPrograms and a TestInstance implementation
1461 template<typename TestInstanceT, typename Arg0>
createTestInstanceWithPrograms(tcu::TestContext & testCtx,const std::string & name,const std::string & desc,typename FunctionProgramsSimple1<Arg0>::Function initPrograms,Arg0 arg0)1462 TestCase* createTestInstanceWithPrograms (tcu::TestContext&									testCtx,
1463 										  const std::string&								name,
1464 										  const std::string&								desc,
1465 										  typename FunctionProgramsSimple1<Arg0>::Function	initPrograms,
1466 										  Arg0												arg0)
1467 {
1468 	return new InstanceFactory1<TestInstanceT, Arg0, FunctionProgramsSimple1<Arg0> >(
1469 		testCtx, tcu::NODETYPE_SELF_VALIDATE, name, desc, FunctionProgramsSimple1<Arg0>(initPrograms), arg0);
1470 }
1471 
populateTestGroup(tcu::TestCaseGroup * parentGroup)1472 void populateTestGroup (tcu::TestCaseGroup* parentGroup)
1473 {
1474 	const struct
1475 	{
1476 		std::string		name;
1477 		TestFlags		flags;
1478 	} groups[] =
1479 	{
1480 		{ "sparse_binding",										0u,													},
1481 		{ "sparse_binding_aliased",								TEST_FLAG_ALIASED,									},
1482 		{ "sparse_residency",									TEST_FLAG_RESIDENCY,								},
1483 		{ "sparse_residency_aliased",							TEST_FLAG_RESIDENCY | TEST_FLAG_ALIASED,			},
1484 		{ "sparse_residency_non_resident_strict",				TEST_FLAG_RESIDENCY | TEST_FLAG_NON_RESIDENT_STRICT,},
1485 	};
1486 
1487 	const int numGroupsIncludingNonResidentStrict	= DE_LENGTH_OF_ARRAY(groups);
1488 	const int numGroupsDefaultList					= numGroupsIncludingNonResidentStrict - 1;
1489 	std::string devGroupPrefix						= "device_group_";
1490 
1491 	// Transfer
1492 	{
1493 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "transfer", ""));
1494 		{
1495 			MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding", ""));
1496 			addBufferSparseBindingTests(subGroup.get(), false);
1497 			group->addChild(subGroup.release());
1498 
1499 			MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding", ""));
1500 			addBufferSparseBindingTests(subGroupDeviceGroups.get(), true);
1501 			group->addChild(subGroupDeviceGroups.release());
1502 		}
1503 		parentGroup->addChild(group.release());
1504 	}
1505 
1506 	// SSBO
1507 	{
1508 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ssbo", ""));
1509 		{
1510 			MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding_aliased", ""));
1511 			addBufferSparseMemoryAliasingTests(subGroup.get(), false);
1512 			group->addChild(subGroup.release());
1513 
1514 			MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding_aliased", ""));
1515 			addBufferSparseMemoryAliasingTests(subGroupDeviceGroups.get(), true);
1516 			group->addChild(subGroupDeviceGroups.release());
1517 		}
1518 		{
1519 			MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_residency", ""));
1520 			addBufferSparseResidencyTests(subGroup.get(), false);
1521 			group->addChild(subGroup.release());
1522 
1523 			MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_residency", ""));
1524 			addBufferSparseResidencyTests(subGroupDeviceGroups.get(), true);
1525 			group->addChild(subGroupDeviceGroups.release());
1526 		}
1527 		parentGroup->addChild(group.release());
1528 	}
1529 
1530 	// UBO
1531 	{
1532 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ubo", ""));
1533 
1534 		for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1535 		{
1536 			group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags));
1537 		}
1538 		for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1539 		{
1540 			group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1541 		}
1542 		parentGroup->addChild(group.release());
1543 	}
1544 
1545 	// Vertex buffer
1546 	{
1547 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "vertex_buffer", ""));
1548 
1549 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1550 		{
1551 			group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1552 		}
1553 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1554 		{
1555 			group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1556 		}
1557 
1558 		parentGroup->addChild(group.release());
1559 	}
1560 
1561 	// Index buffer
1562 	{
1563 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "index_buffer", ""));
1564 
1565 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1566 		{
1567 			group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1568 		}
1569 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1570 		{
1571 			group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1572 		}
1573 
1574 		parentGroup->addChild(group.release());
1575 	}
1576 
1577 	// Indirect buffer
1578 	{
1579 		MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "indirect_buffer", ""));
1580 
1581 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1582 		{
1583 			group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1584 		}
1585 		for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1586 		{
1587 			group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), (devGroupPrefix +  groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1588 		}
1589 
1590 		parentGroup->addChild(group.release());
1591 	}
1592 }
1593 
1594 } // anonymous ns
1595 
createSparseBufferTests(tcu::TestContext & testCtx)1596 tcu::TestCaseGroup* createSparseBufferTests (tcu::TestContext& testCtx)
1597 {
1598 	return createTestGroup(testCtx, "buffer", "Sparse buffer usage tests", populateTestGroup);
1599 }
1600 
1601 } // sparse
1602 } // vkt
1603