1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesBufferMemoryAliasing.cpp
21  * \brief Sparse buffer memory aliasing tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41 #include "vkObjUtil.hpp"
42 
43 #include "deStringUtil.hpp"
44 #include "deUniquePtr.hpp"
45 
46 #include <string>
47 #include <vector>
48 
49 using namespace vk;
50 
51 namespace vkt
52 {
53 namespace sparse
54 {
55 namespace
56 {
57 
58 enum ShaderParameters
59 {
60 	SIZE_OF_UINT_IN_SHADER	= 4u,
61 	MODULO_DIVISOR			= 1024u
62 };
63 
computeWorkGroupSize(const deUint32 numInvocations)64 tcu::UVec3 computeWorkGroupSize (const deUint32 numInvocations)
65 {
66 	const deUint32		maxComputeWorkGroupInvocations	= 128u;
67 	const tcu::UVec3	maxComputeWorkGroupSize			= tcu::UVec3(128u, 128u, 64u);
68 	deUint32			numInvocationsLeft				= numInvocations;
69 
70 	const deUint32 xWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
71 	numInvocationsLeft = numInvocationsLeft / xWorkGroupSize + ((numInvocationsLeft % xWorkGroupSize) ? 1u : 0u);
72 
73 	const deUint32 yWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations / xWorkGroupSize);
74 	numInvocationsLeft = numInvocationsLeft / yWorkGroupSize + ((numInvocationsLeft % yWorkGroupSize) ? 1u : 0u);
75 
76 	const deUint32 zWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
77 	numInvocationsLeft = numInvocationsLeft / zWorkGroupSize + ((numInvocationsLeft % zWorkGroupSize) ? 1u : 0u);
78 
79 	return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
80 }
81 
82 class BufferSparseMemoryAliasingCase : public TestCase
83 {
84 public:
85 					BufferSparseMemoryAliasingCase	(tcu::TestContext&		testCtx,
86 													 const std::string&		name,
87 													 const std::string&		description,
88 													 const deUint32			bufferSize,
89 													 const glu::GLSLVersion	glslVersion,
90 													 const bool				useDeviceGroups);
91 
92 	void			initPrograms					(SourceCollections&		sourceCollections) const;
93 	TestInstance*	createInstance					(Context&				context) const;
94 	virtual void	checkSupport					(Context&				context) const;
95 
96 private:
97 	const	deUint32			m_bufferSizeInBytes;
98 	const	glu::GLSLVersion	m_glslVersion;
99 	const	bool				m_useDeviceGroups;
100 };
101 
BufferSparseMemoryAliasingCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 bufferSize,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)102 BufferSparseMemoryAliasingCase::BufferSparseMemoryAliasingCase (tcu::TestContext&		testCtx,
103 																const std::string&		name,
104 																const std::string&		description,
105 																const deUint32			bufferSize,
106 																const glu::GLSLVersion	glslVersion,
107 																const bool				useDeviceGroups)
108 	: TestCase				(testCtx, name, description)
109 	, m_bufferSizeInBytes	(bufferSize)
110 	, m_glslVersion			(glslVersion)
111 	, m_useDeviceGroups		(useDeviceGroups)
112 {
113 }
114 
checkSupport(Context & context) const115 void BufferSparseMemoryAliasingCase::checkSupport (Context& context) const
116 {
117 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
118 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_ALIASED);
119 }
120 
initPrograms(SourceCollections & sourceCollections) const121 void BufferSparseMemoryAliasingCase::initPrograms (SourceCollections& sourceCollections) const
122 {
123 	// Create compute program
124 	const char* const versionDecl		= glu::getGLSLVersionDeclaration(m_glslVersion);
125 	const deUint32	  numInvocations	= m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
126 	const tcu::UVec3  workGroupSize		= computeWorkGroupSize(numInvocations);
127 
128 	std::ostringstream src;
129 	src << versionDecl << "\n"
130 		<< "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in;\n"
131 		<< "layout(set = 0, binding = 0, std430) writeonly buffer Output\n"
132 		<< "{\n"
133 		<< "	uint result[];\n"
134 		<< "} sb_out;\n"
135 		<< "\n"
136 		<< "void main (void)\n"
137 		<< "{\n"
138 		<< "	uint index = gl_GlobalInvocationID.x + (gl_GlobalInvocationID.y + gl_GlobalInvocationID.z*gl_NumWorkGroups.y*gl_WorkGroupSize.y)*gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
139 		<< "	if ( index < " << m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER << "u )\n"
140 		<< "	{\n"
141 		<< "		sb_out.result[index] = index % " << MODULO_DIVISOR << "u;\n"
142 		<< "	}\n"
143 		<< "}\n";
144 
145 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
146 }
147 
148 class BufferSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
149 {
150 public:
151 					BufferSparseMemoryAliasingInstance	(Context&					context,
152 														 const deUint32				bufferSize,
153 														 const bool					useDeviceGroups);
154 
155 	tcu::TestStatus	iterate								(void);
156 
157 private:
158 	const deUint32			m_bufferSizeInBytes;
159 	const deUint32			m_useDeviceGroups;
160 
161 };
162 
BufferSparseMemoryAliasingInstance(Context & context,const deUint32 bufferSize,const bool useDeviceGroups)163 BufferSparseMemoryAliasingInstance::BufferSparseMemoryAliasingInstance (Context&		context,
164 																		const deUint32	bufferSize,
165 																		const bool		useDeviceGroups)
166 	: SparseResourcesBaseInstance	(context, useDeviceGroups)
167 	, m_bufferSizeInBytes			(bufferSize)
168 	, m_useDeviceGroups				(useDeviceGroups)
169 {
170 }
171 
iterate(void)172 tcu::TestStatus BufferSparseMemoryAliasingInstance::iterate (void)
173 {
174 	const InstanceInterface&		instance		= m_context.getInstanceInterface();
175 	{
176 		// Create logical device supporting both sparse and compute operations
177 		QueueRequirementsVec queueRequirements;
178 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
179 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
180 
181 		createDeviceSupportingQueues(queueRequirements);
182 	}
183 	const vk::VkPhysicalDevice&	physicalDevice	= getPhysicalDevice();
184 	const DeviceInterface&		deviceInterface	= getDeviceInterface();
185 	const Queue&				sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
186 	const Queue&				computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
187 
188 	// Go through all physical devices
189 	for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
190 	{
191 		const deUint32	firstDeviceID = physDevID;
192 		const deUint32	secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
193 
194 		VkBufferCreateInfo bufferCreateInfo =
195 		{
196 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
197 			DE_NULL,								// const void*			pNext;
198 			VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
199 			VK_BUFFER_CREATE_SPARSE_ALIASED_BIT,	// VkBufferCreateFlags	flags;
200 			m_bufferSizeInBytes,					// VkDeviceSize			size;
201 			VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
202 			VK_BUFFER_USAGE_TRANSFER_SRC_BIT,		// VkBufferUsageFlags	usage;
203 			VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
204 			0u,										// deUint32				queueFamilyIndexCount;
205 			DE_NULL									// const deUint32*		pQueueFamilyIndices;
206 		};
207 
208 		const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
209 
210 		if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
211 		{
212 			bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
213 			bufferCreateInfo.queueFamilyIndexCount = 2u;
214 			bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices;
215 		}
216 
217 		// Create sparse buffers
218 		const Unique<VkBuffer> sparseBufferWrite(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
219 		const Unique<VkBuffer> sparseBufferRead(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
220 
221 		// Create sparse buffers memory bind semaphore
222 		const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
223 
224 		const VkMemoryRequirements	bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBufferWrite);
225 
226 		if (bufferMemRequirements.size > getPhysicalDeviceProperties(instance, physicalDevice).limits.sparseAddressSpaceSize)
227 			TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
228 
229 		DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
230 
231 		const deUint32 memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID), bufferMemRequirements, MemoryRequirement::Any);
232 
233 		if (memoryType == NO_MATCH_FOUND)
234 			return tcu::TestStatus::fail("No matching memory type found");
235 
236 		if (firstDeviceID != secondDeviceID)
237 		{
238 			VkPeerMemoryFeatureFlags	peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
239 			const deUint32				heapIndex = getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
240 			deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
241 
242 			if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT)    == 0) ||
243 				((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
244 			{
245 				TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
246 			}
247 		}
248 
249 		const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.size, memoryType, 0u);
250 
251 		Move<VkDeviceMemory> deviceMemoryPtr(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL));
252 
253 		{
254 			const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo[2] =
255 			{
256 				makeSparseBufferMemoryBindInfo
257 				(*sparseBufferWrite,	//VkBuffer					buffer;
258 				1u,						//deUint32					bindCount;
259 				&sparseMemoryBind		//const VkSparseMemoryBind*	Binds;
260 				),
261 
262 				makeSparseBufferMemoryBindInfo
263 				(*sparseBufferRead,		//VkBuffer					buffer;
264 				1u,						//deUint32					bindCount;
265 				&sparseMemoryBind		//const VkSparseMemoryBind*	Binds;
266 				)
267 			};
268 
269 			const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
270 			{
271 				VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR,	//VkStructureType							sType;
272 				DE_NULL,												//const void*								pNext;
273 				firstDeviceID,											//deUint32									resourceDeviceIndex;
274 				secondDeviceID,											//deUint32									memoryDeviceIndex;
275 			};
276 
277 			const VkBindSparseInfo bindSparseInfo =
278 			{
279 				VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,						//VkStructureType							sType;
280 				m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,	//const void*								pNext;
281 				0u,														//deUint32									waitSemaphoreCount;
282 				DE_NULL,												//const VkSemaphore*						pWaitSemaphores;
283 				2u,														//deUint32									bufferBindCount;
284 				sparseBufferMemoryBindInfo,								//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
285 				0u,														//deUint32									imageOpaqueBindCount;
286 				DE_NULL,												//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
287 				0u,														//deUint32									imageBindCount;
288 				DE_NULL,												//const VkSparseImageMemoryBindInfo*		pImageBinds;
289 				1u,														//deUint32									signalSemaphoreCount;
290 				&bufferMemoryBindSemaphore.get()						//const VkSemaphore*						pSignalSemaphores;
291 			};
292 
293 			// Submit sparse bind commands for execution
294 			VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
295 		}
296 
297 		// Create output buffer
298 		const VkBufferCreateInfo		outputBufferCreateInfo = makeBufferCreateInfo(m_bufferSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
299 		const Unique<VkBuffer>			outputBuffer(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
300 		const de::UniquePtr<Allocation>	outputBufferAlloc(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
301 
302 		// Create command buffer for compute and data transfer operations
303 		const Unique<VkCommandPool>	  commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
304 		const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
305 
306 		// Start recording commands
307 		beginCommandBuffer(deviceInterface, *commandBuffer);
308 
309 		// Create descriptor set
310 		const Unique<VkDescriptorSetLayout> descriptorSetLayout(
311 			DescriptorSetLayoutBuilder()
312 			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
313 			.build(deviceInterface, getDevice()));
314 
315 		// Create compute pipeline
316 		const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
317 		const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
318 		const Unique<VkPipeline>		computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
319 
320 		deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
321 
322 		// Create descriptor set
323 		const Unique<VkDescriptorPool> descriptorPool(
324 			DescriptorPoolBuilder()
325 			.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
326 			.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
327 
328 		const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
329 
330 		{
331 			const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBufferWrite, 0u, m_bufferSizeInBytes);
332 
333 			DescriptorSetUpdateBuilder()
334 				.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
335 				.update(deviceInterface, getDevice());
336 		}
337 
338 		deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
339 
340 		{
341 			deUint32		 numInvocationsLeft = m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
342 			const tcu::UVec3 workGroupSize = computeWorkGroupSize(numInvocationsLeft);
343 			const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
344 
345 			numInvocationsLeft -= workGroupSize.x()*workGroupSize.y()*workGroupSize.z();
346 
347 			const deUint32	xWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.x());
348 			numInvocationsLeft = numInvocationsLeft / xWorkGroupCount + ((numInvocationsLeft % xWorkGroupCount) ? 1u : 0u);
349 			const deUint32	yWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.y());
350 			numInvocationsLeft = numInvocationsLeft / yWorkGroupCount + ((numInvocationsLeft % yWorkGroupCount) ? 1u : 0u);
351 			const deUint32	zWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.z());
352 			numInvocationsLeft = numInvocationsLeft / zWorkGroupCount + ((numInvocationsLeft % zWorkGroupCount) ? 1u : 0u);
353 
354 			if (numInvocationsLeft != 1u)
355 				TCU_THROW(NotSupportedError, "Buffer size is not supported");
356 
357 			deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
358 		}
359 
360 		{
361 			const VkBufferMemoryBarrier sparseBufferWriteBarrier
362 				= makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
363 					VK_ACCESS_TRANSFER_READ_BIT,
364 					*sparseBufferWrite,
365 					0ull,
366 					m_bufferSizeInBytes);
367 
368 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferWriteBarrier, 0u, DE_NULL);
369 		}
370 
371 		{
372 			const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSizeInBytes);
373 
374 			deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBufferRead, *outputBuffer, 1u, &bufferCopy);
375 		}
376 
377 		{
378 			const VkBufferMemoryBarrier outputBufferHostBarrier
379 				= makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT,
380 					VK_ACCESS_HOST_READ_BIT,
381 					*outputBuffer,
382 					0ull,
383 					m_bufferSizeInBytes);
384 
385 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostBarrier, 0u, DE_NULL);
386 		}
387 
388 		// End recording commands
389 		endCommandBuffer(deviceInterface, *commandBuffer);
390 
391 		// The stage at which execution is going to wait for finish of sparse binding operations
392 		const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
393 
394 		// Submit commands for execution and wait for completion
395 		// In case of device groups, submit on the physical device with the resource
396 		submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(),
397 			waitStageBits, 0, DE_NULL, m_useDeviceGroups, firstDeviceID);
398 
399 		// Retrieve data from output buffer to host memory
400 		invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
401 
402 		const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
403 
404 		// Wait for sparse queue to become idle
405 		deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
406 
407 		// Prepare reference data
408 		std::vector<deUint8> referenceData;
409 		referenceData.resize(m_bufferSizeInBytes);
410 
411 		std::vector<deUint32> referenceDataBlock;
412 		referenceDataBlock.resize(MODULO_DIVISOR);
413 
414 		for (deUint32 valueNdx = 0; valueNdx < MODULO_DIVISOR; ++valueNdx)
415 		{
416 			referenceDataBlock[valueNdx] = valueNdx % MODULO_DIVISOR;
417 		}
418 
419 		const deUint32 fullBlockSizeInBytes = MODULO_DIVISOR * SIZE_OF_UINT_IN_SHADER;
420 		const deUint32 lastBlockSizeInBytes = m_bufferSizeInBytes % fullBlockSizeInBytes;
421 		const deUint32 numberOfBlocks = m_bufferSizeInBytes / fullBlockSizeInBytes + (lastBlockSizeInBytes ? 1u : 0u);
422 
423 		for (deUint32 blockNdx = 0; blockNdx < numberOfBlocks; ++blockNdx)
424 		{
425 			const deUint32 offset = blockNdx * fullBlockSizeInBytes;
426 			deMemcpy(&referenceData[0] + offset, &referenceDataBlock[0], ((offset + fullBlockSizeInBytes) <= m_bufferSizeInBytes) ? fullBlockSizeInBytes : lastBlockSizeInBytes);
427 		}
428 
429 		// Compare reference data with output data
430 		if (deMemCmp(&referenceData[0], outputData, m_bufferSizeInBytes) != 0)
431 			return tcu::TestStatus::fail("Failed");
432 	}
433 	return tcu::TestStatus::pass("Passed");
434 }
435 
createInstance(Context & context) const436 TestInstance* BufferSparseMemoryAliasingCase::createInstance (Context& context) const
437 {
438 	return new BufferSparseMemoryAliasingInstance(context, m_bufferSizeInBytes, m_useDeviceGroups);
439 }
440 
441 } // anonymous ns
442 
addBufferSparseMemoryAliasingTests(tcu::TestCaseGroup * group,const bool useDeviceGroups)443 void addBufferSparseMemoryAliasingTests(tcu::TestCaseGroup* group, const bool useDeviceGroups)
444 {
445 	group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440, useDeviceGroups));
446 	group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440, useDeviceGroups));
447 	group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440, useDeviceGroups));
448 	group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440, useDeviceGroups));
449 	group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440, useDeviceGroups));
450 	group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440, useDeviceGroups));
451 }
452 
453 } // sparse
454 } // vkt
455