1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 The Android Open Source Project
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Indirect Compute Dispatch tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeIndirectComputeDispatchTests.hpp"
26 #include "vktComputeTestsUtil.hpp"
27
28 #include <string>
29 #include <map>
30 #include <vector>
31
32 #include "vkDefs.hpp"
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vktTestCase.hpp"
36 #include "vktTestCaseUtil.hpp"
37 #include "vkPlatform.hpp"
38 #include "vkPrograms.hpp"
39 #include "vkMemUtil.hpp"
40 #include "vkBuilderUtil.hpp"
41 #include "vkQueryUtil.hpp"
42
43 #include "tcuVector.hpp"
44 #include "tcuVectorUtil.hpp"
45 #include "tcuTestLog.hpp"
46 #include "tcuRGBA.hpp"
47 #include "tcuStringTemplate.hpp"
48
49 #include "deUniquePtr.hpp"
50 #include "deSharedPtr.hpp"
51 #include "deStringUtil.hpp"
52 #include "deArrayUtil.hpp"
53
54 #include "gluShaderUtil.hpp"
55
56 namespace vkt
57 {
58 namespace compute
59 {
60 namespace
61 {
62
63 enum
64 {
65 RESULT_BLOCK_BASE_SIZE = 4 * (int)sizeof(deUint32), // uvec3 + uint
66 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(deUint32),
67 INDIRECT_COMMAND_OFFSET = 3 * (int)sizeof(deUint32),
68 };
69
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)70 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface& instance_interface,
71 const vk::VkPhysicalDevice physicalDevice,
72 const vk::VkDeviceSize baseSize)
73 {
74 // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
75 vk::VkPhysicalDeviceProperties deviceProperties;
76 instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
77 vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
78
79 if (alignment == 0 || (baseSize % alignment == 0))
80 return baseSize;
81 else
82 return (baseSize / alignment + 1)*alignment;
83 }
84
85 struct DispatchCommand
86 {
DispatchCommandvkt::compute::__anond7dfc6af0111::DispatchCommand87 DispatchCommand (const deIntptr offset,
88 const tcu::UVec3& numWorkGroups)
89 : m_offset (offset)
90 , m_numWorkGroups (numWorkGroups) {}
91
92 deIntptr m_offset;
93 tcu::UVec3 m_numWorkGroups;
94 };
95
96 typedef std::vector<DispatchCommand> DispatchCommandsVec;
97
98 struct DispatchCaseDesc
99 {
DispatchCaseDescvkt::compute::__anond7dfc6af0111::DispatchCaseDesc100 DispatchCaseDesc (const char* name,
101 const char* description,
102 const deUintptr bufferSize,
103 const tcu::UVec3 workGroupSize,
104 const DispatchCommandsVec& dispatchCommands)
105 : m_name (name)
106 , m_description (description)
107 , m_bufferSize (bufferSize)
108 , m_workGroupSize (workGroupSize)
109 , m_dispatchCommands (dispatchCommands) {}
110
111 const char* m_name;
112 const char* m_description;
113 const deUintptr m_bufferSize;
114 const tcu::UVec3 m_workGroupSize;
115 const DispatchCommandsVec m_dispatchCommands;
116 };
117
118 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
119 {
120 public:
121 IndirectDispatchInstanceBufferUpload (Context& context,
122 const std::string& name,
123 const deUintptr bufferSize,
124 const tcu::UVec3& workGroupSize,
125 const DispatchCommandsVec& dispatchCommands);
126
~IndirectDispatchInstanceBufferUpload(void)127 virtual ~IndirectDispatchInstanceBufferUpload (void) {}
128
129 virtual tcu::TestStatus iterate (void);
130
131 protected:
132 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
133 const Buffer& indirectBuffer);
134
135 deBool verifyResultBuffer (const Buffer& resultBuffer,
136 const vk::VkDeviceSize resultBlockSize,
137 const vk::VkDeviceSize resultBufferSize) const;
138
139 Context& m_context;
140 const std::string m_name;
141
142 const vk::DeviceInterface& m_device_interface;
143 const vk::VkDevice m_device;
144
145 const vk::VkQueue m_queue;
146 const deUint32 m_queueFamilyIndex;
147
148 const deUintptr m_bufferSize;
149 const tcu::UVec3 m_workGroupSize;
150 const DispatchCommandsVec m_dispatchCommands;
151
152 vk::Allocator& m_allocator;
153
154 private:
155 IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
156 IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
157 };
158
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands)159 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context& context,
160 const std::string& name,
161 const deUintptr bufferSize,
162 const tcu::UVec3& workGroupSize,
163 const DispatchCommandsVec& dispatchCommands)
164 : vkt::TestInstance (context)
165 , m_context (context)
166 , m_name (name)
167 , m_device_interface (context.getDeviceInterface())
168 , m_device (context.getDevice())
169 , m_queue (context.getUniversalQueue())
170 , m_queueFamilyIndex (context.getUniversalQueueFamilyIndex())
171 , m_bufferSize (bufferSize)
172 , m_workGroupSize (workGroupSize)
173 , m_dispatchCommands (dispatchCommands)
174 , m_allocator (context.getDefaultAllocator())
175 {
176 }
177
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const Buffer & indirectBuffer)178 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
179 {
180 DE_UNREF(commandBuffer);
181
182 const vk::Allocation& alloc = indirectBuffer.getAllocation();
183 deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
184
185 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
186 {
187 DE_ASSERT(cmdIter->m_offset >= 0);
188 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
189 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
190
191 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
192
193 dstPtr[0] = cmdIter->m_numWorkGroups[0];
194 dstPtr[1] = cmdIter->m_numWorkGroups[1];
195 dstPtr[2] = cmdIter->m_numWorkGroups[2];
196 }
197
198 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize);
199 }
200
iterate(void)201 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
202 {
203 tcu::TestContext& testCtx = m_context.getTestContext();
204
205 testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
206 {
207 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
208
209 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
210 {
211 testCtx.getLog()
212 << tcu::TestLog::Message
213 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
214 << tcu::TestLog::EndMessage;
215 }
216 }
217
218 // Create result buffer
219 const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
220 const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
221
222 Buffer resultBuffer(
223 m_device_interface, m_device, m_allocator,
224 makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
225 vk::MemoryRequirement::HostVisible);
226
227 {
228 const vk::Allocation& alloc = resultBuffer.getAllocation();
229 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
230
231 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
232 {
233 deUint8* const dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
234
235 *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
236 *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
237 *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
238 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
239 }
240
241 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
242 }
243
244 // Create verify compute shader
245 const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
246 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
247
248 // Create descriptorSetLayout
249 vk::DescriptorSetLayoutBuilder layoutBuilder;
250 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
251 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
252
253 // Create compute pipeline
254 const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
255 const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
256
257 // Create descriptor pool
258 const vk::Unique<vk::VkDescriptorPool> descriptorPool(
259 vk::DescriptorPoolBuilder()
260 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
261 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
262
263 const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
264 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
265
266 // Create command buffer
267 const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
268 const vk::Unique<vk::VkCommandBuffer> cmdBuffer(makeCommandBuffer(m_device_interface, m_device, *cmdPool));
269
270 // Begin recording commands
271 beginCommandBuffer(m_device_interface, *cmdBuffer);
272
273 // Create indirect buffer
274 Buffer indirectBuffer(
275 m_device_interface, m_device, m_allocator,
276 makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
277 vk::MemoryRequirement::HostVisible);
278 fillIndirectBufferData(*cmdBuffer, indirectBuffer);
279
280 // Bind compute pipeline
281 m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
282
283 // Allocate descriptor sets
284 typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
285 std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
286
287 vk::VkDeviceSize curOffset = 0;
288
289 // Create descriptor sets
290 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
291 {
292 descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
293 makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout)));
294
295 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
296
297 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
298 descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
299 descriptorSetBuilder.update(m_device_interface, m_device);
300
301 // Bind descriptor set
302 m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
303
304 // Dispatch indirect compute command
305 m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
306
307 curOffset += resultBlockSize;
308 }
309
310 // Insert memory barrier
311 m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
312 0, (const vk::VkMemoryBarrier*)DE_NULL,
313 1, &ssboPostBarrier,
314 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
315
316 // End recording commands
317 endCommandBuffer(m_device_interface, *cmdBuffer);
318
319 // Wait for command buffer execution finish
320 submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
321
322 // Check if result buffer contains valid values
323 if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize))
324 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
325 else
326 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
327 }
328
verifyResultBuffer(const Buffer & resultBuffer,const vk::VkDeviceSize resultBlockSize,const vk::VkDeviceSize resultBufferSize) const329 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer& resultBuffer,
330 const vk::VkDeviceSize resultBlockSize,
331 const vk::VkDeviceSize resultBufferSize) const
332 {
333 deBool allOk = true;
334 const vk::Allocation& alloc = resultBuffer.getAllocation();
335 vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
336
337 const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
338
339 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
340 {
341 const DispatchCommand& cmd = m_dispatchCommands[cmdNdx];
342 const deUint8* const srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
343 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
344 const deUint32 numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
345 const deUint32 numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
346 const deUint32 expectedCount = numInvocationsPerGroup * numGroups;
347
348 if (numPassed != expectedCount)
349 {
350 tcu::TestContext& testCtx = m_context.getTestContext();
351
352 testCtx.getLog()
353 << tcu::TestLog::Message
354 << "ERROR: got invalid result for invocation " << cmdNdx
355 << ": got numPassed = " << numPassed << ", expected " << expectedCount
356 << tcu::TestLog::EndMessage;
357
358 allOk = false;
359 }
360 }
361
362 return allOk;
363 }
364
365 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
366 {
367 public:
368 IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
369 const DispatchCaseDesc& caseDesc,
370 const glu::GLSLVersion glslVersion);
371
~IndirectDispatchCaseBufferUpload(void)372 virtual ~IndirectDispatchCaseBufferUpload (void) {}
373
374 virtual void initPrograms (vk::SourceCollections& programCollection) const;
375 virtual TestInstance* createInstance (Context& context) const;
376
377 protected:
378 const deUintptr m_bufferSize;
379 const tcu::UVec3 m_workGroupSize;
380 const DispatchCommandsVec m_dispatchCommands;
381 const glu::GLSLVersion m_glslVersion;
382
383 private:
384 IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
385 IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
386 };
387
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)388 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
389 const DispatchCaseDesc& caseDesc,
390 const glu::GLSLVersion glslVersion)
391 : vkt::TestCase (testCtx, caseDesc.m_name, caseDesc.m_description)
392 , m_bufferSize (caseDesc.m_bufferSize)
393 , m_workGroupSize (caseDesc.m_workGroupSize)
394 , m_dispatchCommands (caseDesc.m_dispatchCommands)
395 , m_glslVersion (glslVersion)
396 {
397 }
398
initPrograms(vk::SourceCollections & programCollection) const399 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
400 {
401 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
402
403 std::ostringstream verifyBuffer;
404
405 verifyBuffer
406 << versionDecl << "\n"
407 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
408 << "layout(set = 0, binding = 0, std430) buffer Result\n"
409 << "{\n"
410 << " uvec3 expectedGroupCount;\n"
411 << " coherent uint numPassed;\n"
412 << "} result;\n"
413 << "void main (void)\n"
414 << "{\n"
415 << " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
416 << " atomicAdd(result.numPassed, 1u);\n"
417 << "}\n";
418
419 std::map<std::string, std::string> args;
420
421 args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
422 args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
423 args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
424
425 std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
426
427 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
428 }
429
createInstance(Context & context) const430 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
431 {
432 return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
433 }
434
435 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
436 {
437 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands)438 IndirectDispatchInstanceBufferGenerate (Context& context,
439 const std::string& name,
440 const deUintptr bufferSize,
441 const tcu::UVec3& workGroupSize,
442 const DispatchCommandsVec& dispatchCommands)
443 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
444
~IndirectDispatchInstanceBufferGenerate(void)445 virtual ~IndirectDispatchInstanceBufferGenerate (void) {}
446
447 protected:
448 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
449 const Buffer& indirectBuffer);
450
451 vk::Move<vk::VkDescriptorPool> m_descriptorPool;
452 vk::Move<vk::VkDescriptorSet> m_descriptorSet;
453 vk::Move<vk::VkPipelineLayout> m_pipelineLayout;
454 vk::Move<vk::VkPipeline> m_computePipeline;
455
456 private:
457 IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
458 IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
459 };
460
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const Buffer & indirectBuffer)461 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
462 {
463 // Create compute shader that generates data for indirect buffer
464 const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
465 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
466
467 // Create descriptorSetLayout
468 vk::DescriptorSetLayoutBuilder layoutBuilder;
469 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
470 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
471
472 // Create compute pipeline
473 m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout);
474 m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
475
476 // Create descriptor pool
477 m_descriptorPool = vk::DescriptorPoolBuilder()
478 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
479 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
480
481 // Create descriptor set
482 m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout);
483
484 const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
485
486 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
487 descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
488 descriptorSetBuilder.update(m_device_interface, m_device);
489
490 const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
491 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
492
493 // Bind compute pipeline
494 m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
495
496 // Bind descriptor set
497 m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
498
499 // Dispatch compute command
500 m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
501
502 // Insert memory barrier
503 m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
504 0, (const vk::VkMemoryBarrier*)DE_NULL,
505 1, &bufferBarrier,
506 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
507 }
508
509 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
510 {
511 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)512 IndirectDispatchCaseBufferGenerate (tcu::TestContext& testCtx,
513 const DispatchCaseDesc& caseDesc,
514 const glu::GLSLVersion glslVersion)
515 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
516
~IndirectDispatchCaseBufferGenerate(void)517 virtual ~IndirectDispatchCaseBufferGenerate (void) {}
518
519 virtual void initPrograms (vk::SourceCollections& programCollection) const;
520 virtual TestInstance* createInstance (Context& context) const;
521
522 private:
523 IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
524 IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
525 };
526
initPrograms(vk::SourceCollections & programCollection) const527 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
528 {
529 IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
530
531 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
532
533 std::ostringstream computeBuffer;
534
535 // Header
536 computeBuffer
537 << versionDecl << "\n"
538 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
539 << "layout(set = 0, binding = 0, std430) buffer Out\n"
540 << "{\n"
541 << " highp uint data[];\n"
542 << "};\n"
543 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
544 << "{\n"
545 << " data[offset+0u] = numWorkGroups.x;\n"
546 << " data[offset+1u] = numWorkGroups.y;\n"
547 << " data[offset+2u] = numWorkGroups.z;\n"
548 << "}\n"
549 << "void main (void)\n"
550 << "{\n";
551
552 // Dispatch commands
553 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
554 {
555 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
556 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
557
558 computeBuffer
559 << "\twriteCmd(" << offs << "u, uvec3("
560 << cmdIter->m_numWorkGroups.x() << "u, "
561 << cmdIter->m_numWorkGroups.y() << "u, "
562 << cmdIter->m_numWorkGroups.z() << "u));\n";
563 }
564
565 // Ending
566 computeBuffer << "}\n";
567
568 std::string computeString = computeBuffer.str();
569
570 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
571 }
572
createInstance(Context & context) const573 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
574 {
575 return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
576 }
577
commandsVec(const DispatchCommand & cmd)578 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
579 {
580 DispatchCommandsVec vec;
581 vec.push_back(cmd);
582 return vec;
583 }
584
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)585 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
586 const DispatchCommand& cmd1,
587 const DispatchCommand& cmd2,
588 const DispatchCommand& cmd3,
589 const DispatchCommand& cmd4)
590 {
591 DispatchCommandsVec vec;
592 vec.push_back(cmd0);
593 vec.push_back(cmd1);
594 vec.push_back(cmd2);
595 vec.push_back(cmd3);
596 vec.push_back(cmd4);
597 return vec;
598 }
599
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)600 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
601 const DispatchCommand& cmd1,
602 const DispatchCommand& cmd2,
603 const DispatchCommand& cmd3,
604 const DispatchCommand& cmd4,
605 const DispatchCommand& cmd5,
606 const DispatchCommand& cmd6)
607 {
608 DispatchCommandsVec vec;
609 vec.push_back(cmd0);
610 vec.push_back(cmd1);
611 vec.push_back(cmd2);
612 vec.push_back(cmd3);
613 vec.push_back(cmd4);
614 vec.push_back(cmd5);
615 vec.push_back(cmd6);
616 return vec;
617 }
618
619 } // anonymous ns
620
createIndirectComputeDispatchTests(tcu::TestContext & testCtx)621 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
622 {
623 static const DispatchCaseDesc s_dispatchCases[] =
624 {
625 DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
626 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
627 ),
628 DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
629 commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
630 ),
631 DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
632 commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
633 ),
634 DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
635 commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
636 ),
637 DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
638 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
639 ),
640 DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
641 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
642 ),
643 DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
644 commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
645 ),
646 DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
647 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
648 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
649 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
650 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
651 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
652 ),
653 DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
654 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
655 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
656 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
657 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
658 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
659 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
660 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
661 ),
662 };
663
664 de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
665
666 tcu::TestCaseGroup* const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
667 indirectComputeDispatchTests->addChild(groupBufferUpload);
668
669 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
670 {
671 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
672 }
673
674 tcu::TestCaseGroup* const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
675 indirectComputeDispatchTests->addChild(groupBufferGenerate);
676
677 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
678 {
679 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
680 }
681
682 return indirectComputeDispatchTests.release();
683 }
684
685 } // compute
686 } // vkt
687