1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Indirect Compute Dispatch tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktComputeIndirectComputeDispatchTests.hpp"
26 #include "vktComputeTestsUtil.hpp"
27 
28 #include <string>
29 #include <map>
30 #include <vector>
31 
32 #include "vkDefs.hpp"
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vktTestCase.hpp"
36 #include "vktTestCaseUtil.hpp"
37 #include "vkPlatform.hpp"
38 #include "vkPrograms.hpp"
39 #include "vkMemUtil.hpp"
40 #include "vkBuilderUtil.hpp"
41 #include "vkQueryUtil.hpp"
42 
43 #include "tcuVector.hpp"
44 #include "tcuVectorUtil.hpp"
45 #include "tcuTestLog.hpp"
46 #include "tcuRGBA.hpp"
47 #include "tcuStringTemplate.hpp"
48 
49 #include "deUniquePtr.hpp"
50 #include "deSharedPtr.hpp"
51 #include "deStringUtil.hpp"
52 #include "deArrayUtil.hpp"
53 
54 #include "gluShaderUtil.hpp"
55 
56 namespace vkt
57 {
58 namespace compute
59 {
60 namespace
61 {
62 
63 enum
64 {
65 	RESULT_BLOCK_BASE_SIZE			= 4 * (int)sizeof(deUint32), // uvec3 + uint
66 	RESULT_BLOCK_NUM_PASSED_OFFSET	= 3 * (int)sizeof(deUint32),
67 	INDIRECT_COMMAND_OFFSET			= 3 * (int)sizeof(deUint32),
68 };
69 
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)70 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&	instance_interface,
71 											const vk::VkPhysicalDevice		physicalDevice,
72 											const vk::VkDeviceSize			baseSize)
73 {
74 	// TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
75 	vk::VkPhysicalDeviceProperties deviceProperties;
76 	instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
77 	vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
78 
79 	if (alignment == 0 || (baseSize % alignment == 0))
80 		return baseSize;
81 	else
82 		return (baseSize / alignment + 1)*alignment;
83 }
84 
85 struct DispatchCommand
86 {
DispatchCommandvkt::compute::__anond7dfc6af0111::DispatchCommand87 				DispatchCommand (const deIntptr		offset,
88 								 const tcu::UVec3&	numWorkGroups)
89 					: m_offset			(offset)
90 					, m_numWorkGroups	(numWorkGroups) {}
91 
92 	deIntptr	m_offset;
93 	tcu::UVec3	m_numWorkGroups;
94 };
95 
96 typedef std::vector<DispatchCommand> DispatchCommandsVec;
97 
98 struct DispatchCaseDesc
99 {
DispatchCaseDescvkt::compute::__anond7dfc6af0111::DispatchCaseDesc100 								DispatchCaseDesc (const char*					name,
101 												  const char*					description,
102 												  const deUintptr				bufferSize,
103 												  const tcu::UVec3				workGroupSize,
104 												  const DispatchCommandsVec&	dispatchCommands)
105 									: m_name				(name)
106 									, m_description			(description)
107 									, m_bufferSize			(bufferSize)
108 									, m_workGroupSize		(workGroupSize)
109 									, m_dispatchCommands	(dispatchCommands) {}
110 
111 	const char*					m_name;
112 	const char*					m_description;
113 	const deUintptr				m_bufferSize;
114 	const tcu::UVec3			m_workGroupSize;
115 	const DispatchCommandsVec	m_dispatchCommands;
116 };
117 
118 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
119 {
120 public:
121 									IndirectDispatchInstanceBufferUpload	(Context&					context,
122 																			 const std::string&			name,
123 																			 const deUintptr			bufferSize,
124 																			 const tcu::UVec3&			workGroupSize,
125 																			 const DispatchCommandsVec& dispatchCommands);
126 
~IndirectDispatchInstanceBufferUpload(void)127 	virtual							~IndirectDispatchInstanceBufferUpload	(void) {}
128 
129 	virtual tcu::TestStatus			iterate									(void);
130 
131 protected:
132 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer	commandBuffer,
133 																			 const Buffer&				indirectBuffer);
134 
135 	deBool							verifyResultBuffer						(const Buffer&				resultBuffer,
136 																			 const vk::VkDeviceSize		resultBlockSize,
137 																			 const vk::VkDeviceSize		resultBufferSize) const;
138 
139 	Context&						m_context;
140 	const std::string				m_name;
141 
142 	const vk::DeviceInterface&		m_device_interface;
143 	const vk::VkDevice				m_device;
144 
145 	const vk::VkQueue				m_queue;
146 	const deUint32					m_queueFamilyIndex;
147 
148 	const deUintptr					m_bufferSize;
149 	const tcu::UVec3				m_workGroupSize;
150 	const DispatchCommandsVec		m_dispatchCommands;
151 
152 	vk::Allocator&					m_allocator;
153 
154 private:
155 	IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
156 	IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
157 };
158 
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands)159 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&					context,
160 																			const std::string&			name,
161 																			const deUintptr				bufferSize,
162 																			const tcu::UVec3&			workGroupSize,
163 																			const DispatchCommandsVec&	dispatchCommands)
164 	: vkt::TestInstance		(context)
165 	, m_context				(context)
166 	, m_name				(name)
167 	, m_device_interface	(context.getDeviceInterface())
168 	, m_device				(context.getDevice())
169 	, m_queue				(context.getUniversalQueue())
170 	, m_queueFamilyIndex	(context.getUniversalQueueFamilyIndex())
171 	, m_bufferSize			(bufferSize)
172 	, m_workGroupSize		(workGroupSize)
173 	, m_dispatchCommands	(dispatchCommands)
174 	, m_allocator			(context.getDefaultAllocator())
175 {
176 }
177 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const Buffer & indirectBuffer)178 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
179 {
180 	DE_UNREF(commandBuffer);
181 
182 	const vk::Allocation& alloc = indirectBuffer.getAllocation();
183 	deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
184 
185 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
186 	{
187 		DE_ASSERT(cmdIter->m_offset >= 0);
188 		DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
189 		DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
190 
191 		deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
192 
193 		dstPtr[0] = cmdIter->m_numWorkGroups[0];
194 		dstPtr[1] = cmdIter->m_numWorkGroups[1];
195 		dstPtr[2] = cmdIter->m_numWorkGroups[2];
196 	}
197 
198 	vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize);
199 }
200 
iterate(void)201 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
202 {
203 	tcu::TestContext& testCtx = m_context.getTestContext();
204 
205 	testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
206 	{
207 		tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
208 
209 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
210 		{
211 			testCtx.getLog()
212 				<< tcu::TestLog::Message
213 				<< cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
214 				<< tcu::TestLog::EndMessage;
215 		}
216 	}
217 
218 	// Create result buffer
219 	const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
220 	const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
221 
222 	Buffer resultBuffer(
223 		m_device_interface, m_device, m_allocator,
224 		makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
225 		vk::MemoryRequirement::HostVisible);
226 
227 	{
228 		const vk::Allocation& alloc = resultBuffer.getAllocation();
229 		deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
230 
231 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
232 		{
233 			deUint8* const	dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
234 
235 			*(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
236 			*(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
237 			*(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
238 			*(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
239 		}
240 
241 		vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
242 	}
243 
244 	// Create verify compute shader
245 	const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
246 		m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
247 
248 	// Create descriptorSetLayout
249 	vk::DescriptorSetLayoutBuilder layoutBuilder;
250 	layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
251 	vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
252 
253 	// Create compute pipeline
254 	const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
255 	const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
256 
257 	// Create descriptor pool
258 	const vk::Unique<vk::VkDescriptorPool> descriptorPool(
259 		vk::DescriptorPoolBuilder()
260 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
261 		.build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
262 
263 	const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
264 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
265 
266 	// Create command buffer
267 	const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
268 	const vk::Unique<vk::VkCommandBuffer> cmdBuffer(makeCommandBuffer(m_device_interface, m_device, *cmdPool));
269 
270 	// Begin recording commands
271 	beginCommandBuffer(m_device_interface, *cmdBuffer);
272 
273 	// Create indirect buffer
274 	Buffer indirectBuffer(
275 		m_device_interface, m_device, m_allocator,
276 		makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
277 		vk::MemoryRequirement::HostVisible);
278 	fillIndirectBufferData(*cmdBuffer, indirectBuffer);
279 
280 	// Bind compute pipeline
281 	m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
282 
283 	// Allocate descriptor sets
284 	typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
285 	std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
286 
287 	vk::VkDeviceSize curOffset = 0;
288 
289 	// Create descriptor sets
290 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
291 	{
292 		descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
293 									makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout)));
294 
295 		const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
296 
297 		vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
298 		descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
299 		descriptorSetBuilder.update(m_device_interface, m_device);
300 
301 		// Bind descriptor set
302 		m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
303 
304 		// Dispatch indirect compute command
305 		m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
306 
307 		curOffset += resultBlockSize;
308 	}
309 
310 	// Insert memory barrier
311 	m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
312 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
313 										  1, &ssboPostBarrier,
314 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
315 
316 	// End recording commands
317 	endCommandBuffer(m_device_interface, *cmdBuffer);
318 
319 	// Wait for command buffer execution finish
320 	submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
321 
322 	// Check if result buffer contains valid values
323 	if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize))
324 		return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
325 	else
326 		return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
327 }
328 
verifyResultBuffer(const Buffer & resultBuffer,const vk::VkDeviceSize resultBlockSize,const vk::VkDeviceSize resultBufferSize) const329 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer&			resultBuffer,
330 																 const vk::VkDeviceSize	resultBlockSize,
331 																 const vk::VkDeviceSize	resultBufferSize) const
332 {
333 	deBool allOk = true;
334 	const vk::Allocation& alloc = resultBuffer.getAllocation();
335 	vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
336 
337 	const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
338 
339 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
340 	{
341 		const DispatchCommand&	cmd = m_dispatchCommands[cmdNdx];
342 		const deUint8* const	srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
343 		const deUint32			numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
344 		const deUint32			numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
345 		const deUint32			numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
346 		const deUint32			expectedCount = numInvocationsPerGroup * numGroups;
347 
348 		if (numPassed != expectedCount)
349 		{
350 			tcu::TestContext& testCtx = m_context.getTestContext();
351 
352 			testCtx.getLog()
353 				<< tcu::TestLog::Message
354 				<< "ERROR: got invalid result for invocation " << cmdNdx
355 				<< ": got numPassed = " << numPassed << ", expected " << expectedCount
356 				<< tcu::TestLog::EndMessage;
357 
358 			allOk = false;
359 		}
360 	}
361 
362 	return allOk;
363 }
364 
365 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
366 {
367 public:
368 								IndirectDispatchCaseBufferUpload	(tcu::TestContext&			testCtx,
369 																	 const DispatchCaseDesc&	caseDesc,
370 																	 const glu::GLSLVersion		glslVersion);
371 
~IndirectDispatchCaseBufferUpload(void)372 	virtual						~IndirectDispatchCaseBufferUpload	(void) {}
373 
374 	virtual void				initPrograms						(vk::SourceCollections&		programCollection) const;
375 	virtual TestInstance*		createInstance						(Context&					context) const;
376 
377 protected:
378 	const deUintptr				m_bufferSize;
379 	const tcu::UVec3			m_workGroupSize;
380 	const DispatchCommandsVec	m_dispatchCommands;
381 	const glu::GLSLVersion		m_glslVersion;
382 
383 private:
384 	IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
385 	IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
386 };
387 
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)388 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&		testCtx,
389 																	const DispatchCaseDesc& caseDesc,
390 																	const glu::GLSLVersion	glslVersion)
391 	: vkt::TestCase			(testCtx, caseDesc.m_name, caseDesc.m_description)
392 	, m_bufferSize			(caseDesc.m_bufferSize)
393 	, m_workGroupSize		(caseDesc.m_workGroupSize)
394 	, m_dispatchCommands	(caseDesc.m_dispatchCommands)
395 	, m_glslVersion			(glslVersion)
396 {
397 }
398 
initPrograms(vk::SourceCollections & programCollection) const399 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
400 {
401 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
402 
403 	std::ostringstream	verifyBuffer;
404 
405 	verifyBuffer
406 		<< versionDecl << "\n"
407 		<< "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
408 		<< "layout(set = 0, binding = 0, std430) buffer Result\n"
409 		<< "{\n"
410 		<< "    uvec3           expectedGroupCount;\n"
411 		<< "    coherent uint   numPassed;\n"
412 		<< "} result;\n"
413 		<< "void main (void)\n"
414 		<< "{\n"
415 		<< "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
416 		<< "        atomicAdd(result.numPassed, 1u);\n"
417 		<< "}\n";
418 
419 	std::map<std::string, std::string> args;
420 
421 	args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
422 	args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
423 	args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
424 
425 	std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
426 
427 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
428 }
429 
createInstance(Context & context) const430 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
431 {
432 	return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
433 }
434 
435 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
436 {
437 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands)438 									IndirectDispatchInstanceBufferGenerate	(Context&					context,
439 																			 const std::string&			name,
440 																			 const deUintptr			bufferSize,
441 																			 const tcu::UVec3&			workGroupSize,
442 																			 const DispatchCommandsVec&	dispatchCommands)
443 										: IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
444 
~IndirectDispatchInstanceBufferGenerate(void)445 	virtual							~IndirectDispatchInstanceBufferGenerate	(void) {}
446 
447 protected:
448 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer	commandBuffer,
449 																			 const Buffer&				indirectBuffer);
450 
451 	vk::Move<vk::VkDescriptorPool>	m_descriptorPool;
452 	vk::Move<vk::VkDescriptorSet>	m_descriptorSet;
453 	vk::Move<vk::VkPipelineLayout>	m_pipelineLayout;
454 	vk::Move<vk::VkPipeline>		m_computePipeline;
455 
456 private:
457 	IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
458 	IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
459 };
460 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const Buffer & indirectBuffer)461 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
462 {
463 	// Create compute shader that generates data for indirect buffer
464 	const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
465 		m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
466 
467 	// Create descriptorSetLayout
468 	vk::DescriptorSetLayoutBuilder layoutBuilder;
469 	layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
470 	vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
471 
472 	// Create compute pipeline
473 	m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout);
474 	m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
475 
476 	// Create descriptor pool
477 	m_descriptorPool = vk::DescriptorPoolBuilder()
478 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
479 		.build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
480 
481 	// Create descriptor set
482 	m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout);
483 
484 	const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
485 
486 	vk::DescriptorSetUpdateBuilder	descriptorSetBuilder;
487 	descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
488 	descriptorSetBuilder.update(m_device_interface, m_device);
489 
490 	const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
491 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
492 
493 	// Bind compute pipeline
494 	m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
495 
496 	// Bind descriptor set
497 	m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
498 
499 	// Dispatch compute command
500 	m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
501 
502 	// Insert memory barrier
503 	m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
504 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
505 										  1, &bufferBarrier,
506 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
507 }
508 
509 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
510 {
511 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)512 							IndirectDispatchCaseBufferGenerate	(tcu::TestContext&			testCtx,
513 																 const DispatchCaseDesc&	caseDesc,
514 																 const glu::GLSLVersion		glslVersion)
515 								: IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
516 
~IndirectDispatchCaseBufferGenerate(void)517 	virtual					~IndirectDispatchCaseBufferGenerate	(void) {}
518 
519 	virtual void			initPrograms						(vk::SourceCollections&		programCollection) const;
520 	virtual TestInstance*	createInstance						(Context&					context) const;
521 
522 private:
523 	IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
524 	IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
525 };
526 
initPrograms(vk::SourceCollections & programCollection) const527 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
528 {
529 	IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
530 
531 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
532 
533 	std::ostringstream computeBuffer;
534 
535 	// Header
536 	computeBuffer
537 		<< versionDecl << "\n"
538 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
539 		<< "layout(set = 0, binding = 0, std430) buffer Out\n"
540 		<< "{\n"
541 		<< "	highp uint data[];\n"
542 		<< "};\n"
543 		<< "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
544 		<< "{\n"
545 		<< "	data[offset+0u] = numWorkGroups.x;\n"
546 		<< "	data[offset+1u] = numWorkGroups.y;\n"
547 		<< "	data[offset+2u] = numWorkGroups.z;\n"
548 		<< "}\n"
549 		<< "void main (void)\n"
550 		<< "{\n";
551 
552 	// Dispatch commands
553 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
554 	{
555 		const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
556 		DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
557 
558 		computeBuffer
559 			<< "\twriteCmd(" << offs << "u, uvec3("
560 			<< cmdIter->m_numWorkGroups.x() << "u, "
561 			<< cmdIter->m_numWorkGroups.y() << "u, "
562 			<< cmdIter->m_numWorkGroups.z() << "u));\n";
563 	}
564 
565 	// Ending
566 	computeBuffer << "}\n";
567 
568 	std::string computeString = computeBuffer.str();
569 
570 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
571 }
572 
createInstance(Context & context) const573 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
574 {
575 	return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
576 }
577 
commandsVec(const DispatchCommand & cmd)578 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
579 {
580 	DispatchCommandsVec vec;
581 	vec.push_back(cmd);
582 	return vec;
583 }
584 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)585 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
586 								 const DispatchCommand& cmd1,
587 								 const DispatchCommand& cmd2,
588 								 const DispatchCommand& cmd3,
589 								 const DispatchCommand& cmd4)
590 {
591 	DispatchCommandsVec vec;
592 	vec.push_back(cmd0);
593 	vec.push_back(cmd1);
594 	vec.push_back(cmd2);
595 	vec.push_back(cmd3);
596 	vec.push_back(cmd4);
597 	return vec;
598 }
599 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)600 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
601 								 const DispatchCommand& cmd1,
602 								 const DispatchCommand& cmd2,
603 								 const DispatchCommand& cmd3,
604 								 const DispatchCommand& cmd4,
605 								 const DispatchCommand& cmd5,
606 								 const DispatchCommand& cmd6)
607 {
608 	DispatchCommandsVec vec;
609 	vec.push_back(cmd0);
610 	vec.push_back(cmd1);
611 	vec.push_back(cmd2);
612 	vec.push_back(cmd3);
613 	vec.push_back(cmd4);
614 	vec.push_back(cmd5);
615 	vec.push_back(cmd6);
616 	return vec;
617 }
618 
619 } // anonymous ns
620 
createIndirectComputeDispatchTests(tcu::TestContext & testCtx)621 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
622 {
623 	static const DispatchCaseDesc s_dispatchCases[] =
624 	{
625 		DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
626 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
627         ),
628 		DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
629 			commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
630 		),
631 		DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
632 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
633 		),
634 		DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
635 			commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
636 		),
637 		DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
638 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
639 		),
640 		DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
641 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
642 		),
643 		DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
644 			commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
645 		),
646 		DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
647 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
648 						DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
649 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
650 						DispatchCommand(40, tcu::UVec3(1, 1, 7)),
651 						DispatchCommand(52, tcu::UVec3(1, 1, 4)))
652 		),
653 		DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
654 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
655 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
656 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
657 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
658 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
659 						DispatchCommand(52, tcu::UVec3(1, 1, 4)),
660 						DispatchCommand(52, tcu::UVec3(1, 1, 4)))
661 		),
662 	};
663 
664 	de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
665 
666 	tcu::TestCaseGroup* const	groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
667 	indirectComputeDispatchTests->addChild(groupBufferUpload);
668 
669 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
670 	{
671 		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
672 	}
673 
674 	tcu::TestCaseGroup* const	groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
675 	indirectComputeDispatchTests->addChild(groupBufferGenerate);
676 
677 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
678 	{
679 		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
680 	}
681 
682 	return indirectComputeDispatchTests.release();
683 }
684 
685 } // compute
686 } // vkt
687