1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 The Android Open Source Project
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Compute Shader Tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeBasicComputeShaderTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktComputeTestsUtil.hpp"
29
30 #include "vkDefs.hpp"
31 #include "vkRef.hpp"
32 #include "vkRefUtil.hpp"
33 #include "vkPlatform.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkMemUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40
41 #include "deStringUtil.hpp"
42 #include "deUniquePtr.hpp"
43 #include "deRandom.hpp"
44
45 #include <vector>
46
47 using namespace vk;
48
49 namespace vkt
50 {
51 namespace compute
52 {
53 namespace
54 {
55
56 template<typename T, int size>
multiplyComponents(const tcu::Vector<T,size> & v)57 T multiplyComponents (const tcu::Vector<T, size>& v)
58 {
59 T accum = 1;
60 for (int i = 0; i < size; ++i)
61 accum *= v[i];
62 return accum;
63 }
64
65 template<typename T>
squared(const T & a)66 inline T squared (const T& a)
67 {
68 return a * a;
69 }
70
make2DImageCreateInfo(const tcu::IVec2 & imageSize,const VkImageUsageFlags usage)71 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
72 {
73 const VkImageCreateInfo imageParams =
74 {
75 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
76 DE_NULL, // const void* pNext;
77 0u, // VkImageCreateFlags flags;
78 VK_IMAGE_TYPE_2D, // VkImageType imageType;
79 VK_FORMAT_R32_UINT, // VkFormat format;
80 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), // VkExtent3D extent;
81 1u, // deUint32 mipLevels;
82 1u, // deUint32 arrayLayers;
83 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
84 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
85 usage, // VkImageUsageFlags usage;
86 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
87 0u, // deUint32 queueFamilyIndexCount;
88 DE_NULL, // const deUint32* pQueueFamilyIndices;
89 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
90 };
91 return imageParams;
92 }
93
makeBufferImageCopy(const tcu::IVec2 & imageSize)94 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
95 {
96 return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
97 }
98
99 enum BufferType
100 {
101 BUFFER_TYPE_UNIFORM,
102 BUFFER_TYPE_SSBO,
103 };
104
105 class SharedVarTest : public vkt::TestCase
106 {
107 public:
108 SharedVarTest (tcu::TestContext& testCtx,
109 const std::string& name,
110 const std::string& description,
111 const tcu::IVec3& localSize,
112 const tcu::IVec3& workSize);
113
114 void initPrograms (SourceCollections& sourceCollections) const;
115 TestInstance* createInstance (Context& context) const;
116
117 private:
118 const tcu::IVec3 m_localSize;
119 const tcu::IVec3 m_workSize;
120 };
121
122 class SharedVarTestInstance : public vkt::TestInstance
123 {
124 public:
125 SharedVarTestInstance (Context& context,
126 const tcu::IVec3& localSize,
127 const tcu::IVec3& workSize);
128
129 tcu::TestStatus iterate (void);
130
131 private:
132 const tcu::IVec3 m_localSize;
133 const tcu::IVec3 m_workSize;
134 };
135
SharedVarTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)136 SharedVarTest::SharedVarTest (tcu::TestContext& testCtx,
137 const std::string& name,
138 const std::string& description,
139 const tcu::IVec3& localSize,
140 const tcu::IVec3& workSize)
141 : TestCase (testCtx, name, description)
142 , m_localSize (localSize)
143 , m_workSize (workSize)
144 {
145 }
146
initPrograms(SourceCollections & sourceCollections) const147 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
148 {
149 const int workGroupSize = multiplyComponents(m_localSize);
150 const int workGroupCount = multiplyComponents(m_workSize);
151 const int numValues = workGroupSize * workGroupCount;
152
153 std::ostringstream src;
154 src << "#version 310 es\n"
155 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
156 << "layout(binding = 0) writeonly buffer Output {\n"
157 << " uint values[" << numValues << "];\n"
158 << "} sb_out;\n\n"
159 << "shared uint offsets[" << workGroupSize << "];\n\n"
160 << "void main (void) {\n"
161 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
162 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
163 << " uint globalOffs = localSize*globalNdx;\n"
164 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
165 << "\n"
166 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
167 << " memoryBarrierShared();\n"
168 << " barrier();\n"
169 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
170 << "}\n";
171
172 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
173 }
174
createInstance(Context & context) const175 TestInstance* SharedVarTest::createInstance (Context& context) const
176 {
177 return new SharedVarTestInstance(context, m_localSize, m_workSize);
178 }
179
SharedVarTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)180 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
181 : TestInstance (context)
182 , m_localSize (localSize)
183 , m_workSize (workSize)
184 {
185 }
186
iterate(void)187 tcu::TestStatus SharedVarTestInstance::iterate (void)
188 {
189 const DeviceInterface& vk = m_context.getDeviceInterface();
190 const VkDevice device = m_context.getDevice();
191 const VkQueue queue = m_context.getUniversalQueue();
192 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
193 Allocator& allocator = m_context.getDefaultAllocator();
194
195 const int workGroupSize = multiplyComponents(m_localSize);
196 const int workGroupCount = multiplyComponents(m_workSize);
197
198 // Create a buffer and host-visible memory for it
199
200 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
201 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
202
203 // Create descriptor set
204
205 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
206 DescriptorSetLayoutBuilder()
207 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
208 .build(vk, device));
209
210 const Unique<VkDescriptorPool> descriptorPool(
211 DescriptorPoolBuilder()
212 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
213 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
214
215 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
216
217 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
218 DescriptorSetUpdateBuilder()
219 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
220 .update(vk, device);
221
222 // Perform the computation
223
224 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
225 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
226 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
227
228 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
229
230 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
231 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
232
233 // Start recording commands
234
235 beginCommandBuffer(vk, *cmdBuffer);
236
237 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
238 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
239
240 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
241
242 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
243
244 endCommandBuffer(vk, *cmdBuffer);
245
246 // Wait for completion
247
248 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
249
250 // Validate the results
251
252 const Allocation& bufferAllocation = buffer.getAllocation();
253 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
254
255 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
256
257 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
258 {
259 const int globalOffset = groupNdx * workGroupSize;
260 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
261 {
262 const deUint32 res = bufferPtr[globalOffset + localOffset];
263 const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
264
265 if (res != ref)
266 {
267 std::ostringstream msg;
268 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
269 return tcu::TestStatus::fail(msg.str());
270 }
271 }
272 }
273 return tcu::TestStatus::pass("Compute succeeded");
274 }
275
276 class SharedVarAtomicOpTest : public vkt::TestCase
277 {
278 public:
279 SharedVarAtomicOpTest (tcu::TestContext& testCtx,
280 const std::string& name,
281 const std::string& description,
282 const tcu::IVec3& localSize,
283 const tcu::IVec3& workSize);
284
285 void initPrograms (SourceCollections& sourceCollections) const;
286 TestInstance* createInstance (Context& context) const;
287
288 private:
289 const tcu::IVec3 m_localSize;
290 const tcu::IVec3 m_workSize;
291 };
292
293 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
294 {
295 public:
296 SharedVarAtomicOpTestInstance (Context& context,
297 const tcu::IVec3& localSize,
298 const tcu::IVec3& workSize);
299
300 tcu::TestStatus iterate (void);
301
302 private:
303 const tcu::IVec3 m_localSize;
304 const tcu::IVec3 m_workSize;
305 };
306
SharedVarAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)307 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext& testCtx,
308 const std::string& name,
309 const std::string& description,
310 const tcu::IVec3& localSize,
311 const tcu::IVec3& workSize)
312 : TestCase (testCtx, name, description)
313 , m_localSize (localSize)
314 , m_workSize (workSize)
315 {
316 }
317
initPrograms(SourceCollections & sourceCollections) const318 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
319 {
320 const int workGroupSize = multiplyComponents(m_localSize);
321 const int workGroupCount = multiplyComponents(m_workSize);
322 const int numValues = workGroupSize * workGroupCount;
323
324 std::ostringstream src;
325 src << "#version 310 es\n"
326 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
327 << "layout(binding = 0) writeonly buffer Output {\n"
328 << " uint values[" << numValues << "];\n"
329 << "} sb_out;\n\n"
330 << "shared uint count;\n\n"
331 << "void main (void) {\n"
332 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
333 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
334 << " uint globalOffs = localSize*globalNdx;\n"
335 << "\n"
336 << " count = 0u;\n"
337 << " memoryBarrierShared();\n"
338 << " barrier();\n"
339 << " uint oldVal = atomicAdd(count, 1u);\n"
340 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
341 << "}\n";
342
343 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
344 }
345
createInstance(Context & context) const346 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
347 {
348 return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
349 }
350
SharedVarAtomicOpTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)351 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
352 : TestInstance (context)
353 , m_localSize (localSize)
354 , m_workSize (workSize)
355 {
356 }
357
iterate(void)358 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
359 {
360 const DeviceInterface& vk = m_context.getDeviceInterface();
361 const VkDevice device = m_context.getDevice();
362 const VkQueue queue = m_context.getUniversalQueue();
363 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
364 Allocator& allocator = m_context.getDefaultAllocator();
365
366 const int workGroupSize = multiplyComponents(m_localSize);
367 const int workGroupCount = multiplyComponents(m_workSize);
368
369 // Create a buffer and host-visible memory for it
370
371 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
372 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
373
374 // Create descriptor set
375
376 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
377 DescriptorSetLayoutBuilder()
378 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
379 .build(vk, device));
380
381 const Unique<VkDescriptorPool> descriptorPool(
382 DescriptorPoolBuilder()
383 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
384 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
385
386 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
387
388 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
389 DescriptorSetUpdateBuilder()
390 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
391 .update(vk, device);
392
393 // Perform the computation
394
395 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
396 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
397 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
398
399 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
400
401 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
402 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
403
404 // Start recording commands
405
406 beginCommandBuffer(vk, *cmdBuffer);
407
408 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
409 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
410
411 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
412
413 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
414
415 endCommandBuffer(vk, *cmdBuffer);
416
417 // Wait for completion
418
419 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
420
421 // Validate the results
422
423 const Allocation& bufferAllocation = buffer.getAllocation();
424 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
425
426 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
427
428 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
429 {
430 const int globalOffset = groupNdx * workGroupSize;
431 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
432 {
433 const deUint32 res = bufferPtr[globalOffset + localOffset];
434 const deUint32 ref = localOffset + 1;
435
436 if (res != ref)
437 {
438 std::ostringstream msg;
439 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
440 return tcu::TestStatus::fail(msg.str());
441 }
442 }
443 }
444 return tcu::TestStatus::pass("Compute succeeded");
445 }
446
447 class SSBOLocalBarrierTest : public vkt::TestCase
448 {
449 public:
450 SSBOLocalBarrierTest (tcu::TestContext& testCtx,
451 const std::string& name,
452 const std::string& description,
453 const tcu::IVec3& localSize,
454 const tcu::IVec3& workSize);
455
456 void initPrograms (SourceCollections& sourceCollections) const;
457 TestInstance* createInstance (Context& context) const;
458
459 private:
460 const tcu::IVec3 m_localSize;
461 const tcu::IVec3 m_workSize;
462 };
463
464 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
465 {
466 public:
467 SSBOLocalBarrierTestInstance (Context& context,
468 const tcu::IVec3& localSize,
469 const tcu::IVec3& workSize);
470
471 tcu::TestStatus iterate (void);
472
473 private:
474 const tcu::IVec3 m_localSize;
475 const tcu::IVec3 m_workSize;
476 };
477
SSBOLocalBarrierTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)478 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext& testCtx,
479 const std::string& name,
480 const std::string& description,
481 const tcu::IVec3& localSize,
482 const tcu::IVec3& workSize)
483 : TestCase (testCtx, name, description)
484 , m_localSize (localSize)
485 , m_workSize (workSize)
486 {
487 }
488
initPrograms(SourceCollections & sourceCollections) const489 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
490 {
491 const int workGroupSize = multiplyComponents(m_localSize);
492 const int workGroupCount = multiplyComponents(m_workSize);
493 const int numValues = workGroupSize * workGroupCount;
494
495 std::ostringstream src;
496 src << "#version 310 es\n"
497 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
498 << "layout(binding = 0) coherent buffer Output {\n"
499 << " uint values[" << numValues << "];\n"
500 << "} sb_out;\n\n"
501 << "void main (void) {\n"
502 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
503 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
504 << " uint globalOffs = localSize*globalNdx;\n"
505 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
506 << "\n"
507 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n"
508 << " memoryBarrierBuffer();\n"
509 << " barrier();\n"
510 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" // += so we read and write
511 << " memoryBarrierBuffer();\n"
512 << " barrier();\n"
513 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
514 << "}\n";
515
516 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
517 }
518
createInstance(Context & context) const519 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
520 {
521 return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
522 }
523
SSBOLocalBarrierTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)524 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
525 : TestInstance (context)
526 , m_localSize (localSize)
527 , m_workSize (workSize)
528 {
529 }
530
iterate(void)531 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
532 {
533 const DeviceInterface& vk = m_context.getDeviceInterface();
534 const VkDevice device = m_context.getDevice();
535 const VkQueue queue = m_context.getUniversalQueue();
536 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
537 Allocator& allocator = m_context.getDefaultAllocator();
538
539 const int workGroupSize = multiplyComponents(m_localSize);
540 const int workGroupCount = multiplyComponents(m_workSize);
541
542 // Create a buffer and host-visible memory for it
543
544 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
545 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
546
547 // Create descriptor set
548
549 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
550 DescriptorSetLayoutBuilder()
551 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
552 .build(vk, device));
553
554 const Unique<VkDescriptorPool> descriptorPool(
555 DescriptorPoolBuilder()
556 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
557 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
558
559 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
560
561 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
562 DescriptorSetUpdateBuilder()
563 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
564 .update(vk, device);
565
566 // Perform the computation
567
568 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
569 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
570 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
571
572 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
573
574 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
575 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
576
577 // Start recording commands
578
579 beginCommandBuffer(vk, *cmdBuffer);
580
581 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
582 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
583
584 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
585
586 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
587
588 endCommandBuffer(vk, *cmdBuffer);
589
590 // Wait for completion
591
592 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
593
594 // Validate the results
595
596 const Allocation& bufferAllocation = buffer.getAllocation();
597 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
598
599 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
600
601 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
602 {
603 const int globalOffset = groupNdx * workGroupSize;
604 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
605 {
606 const deUint32 res = bufferPtr[globalOffset + localOffset];
607 const int offs0 = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
608 const int offs1 = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
609 const deUint32 ref = static_cast<deUint32>(globalOffset + offs0 + offs1);
610
611 if (res != ref)
612 {
613 std::ostringstream msg;
614 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
615 return tcu::TestStatus::fail(msg.str());
616 }
617 }
618 }
619 return tcu::TestStatus::pass("Compute succeeded");
620 }
621
622 class CopyImageToSSBOTest : public vkt::TestCase
623 {
624 public:
625 CopyImageToSSBOTest (tcu::TestContext& testCtx,
626 const std::string& name,
627 const std::string& description,
628 const tcu::IVec2& localSize,
629 const tcu::IVec2& imageSize);
630
631 void initPrograms (SourceCollections& sourceCollections) const;
632 TestInstance* createInstance (Context& context) const;
633
634 private:
635 const tcu::IVec2 m_localSize;
636 const tcu::IVec2 m_imageSize;
637 };
638
639 class CopyImageToSSBOTestInstance : public vkt::TestInstance
640 {
641 public:
642 CopyImageToSSBOTestInstance (Context& context,
643 const tcu::IVec2& localSize,
644 const tcu::IVec2& imageSize);
645
646 tcu::TestStatus iterate (void);
647
648 private:
649 const tcu::IVec2 m_localSize;
650 const tcu::IVec2 m_imageSize;
651 };
652
CopyImageToSSBOTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)653 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext& testCtx,
654 const std::string& name,
655 const std::string& description,
656 const tcu::IVec2& localSize,
657 const tcu::IVec2& imageSize)
658 : TestCase (testCtx, name, description)
659 , m_localSize (localSize)
660 , m_imageSize (imageSize)
661 {
662 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
663 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
664 }
665
initPrograms(SourceCollections & sourceCollections) const666 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
667 {
668 std::ostringstream src;
669 src << "#version 310 es\n"
670 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
671 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
672 << "layout(binding = 0) writeonly buffer Output {\n"
673 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
674 << "} sb_out;\n\n"
675 << "void main (void) {\n"
676 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
677 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
678 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
679 << "}\n";
680
681 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
682 }
683
createInstance(Context & context) const684 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
685 {
686 return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
687 }
688
CopyImageToSSBOTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)689 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
690 : TestInstance (context)
691 , m_localSize (localSize)
692 , m_imageSize (imageSize)
693 {
694 }
695
iterate(void)696 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
697 {
698 const DeviceInterface& vk = m_context.getDeviceInterface();
699 const VkDevice device = m_context.getDevice();
700 const VkQueue queue = m_context.getUniversalQueue();
701 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
702 Allocator& allocator = m_context.getDefaultAllocator();
703
704 // Create an image
705
706 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
707 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
708
709 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
710 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
711
712 // Staging buffer (source data for image)
713
714 const deUint32 imageArea = multiplyComponents(m_imageSize);
715 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
716
717 const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
718
719 // Populate the staging buffer with test data
720 {
721 de::Random rnd(0xab2c7);
722 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
723 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
724 for (deUint32 i = 0; i < imageArea; ++i)
725 *bufferPtr++ = rnd.getUint32();
726
727 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
728 }
729
730 // Create a buffer to store shader output
731
732 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
733
734 // Create descriptor set
735
736 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
737 DescriptorSetLayoutBuilder()
738 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
739 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
740 .build(vk, device));
741
742 const Unique<VkDescriptorPool> descriptorPool(
743 DescriptorPoolBuilder()
744 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
745 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
746 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
747
748 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
749
750 // Set the bindings
751
752 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
753 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
754
755 DescriptorSetUpdateBuilder()
756 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
757 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
758 .update(vk, device);
759
760 // Perform the computation
761 {
762 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
763 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
764 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
765
766 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
767
768 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
769 0u, VK_ACCESS_TRANSFER_WRITE_BIT,
770 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
771 *image, subresourceRange);
772
773 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
774 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
775 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
776 *image, subresourceRange);
777
778 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
779
780 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
781 const tcu::IVec2 workSize = m_imageSize / m_localSize;
782
783 // Prepare the command buffer
784
785 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
786 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
787
788 // Start recording commands
789
790 beginCommandBuffer(vk, *cmdBuffer);
791
792 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
793 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
794
795 vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &stagingBufferPostHostWriteBarrier, 1, &imagePreCopyBarrier);
796 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, ©Params);
797 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePostCopyBarrier);
798
799 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
800 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
801
802 endCommandBuffer(vk, *cmdBuffer);
803
804 // Wait for completion
805
806 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
807 }
808
809 // Validate the results
810
811 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
812 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
813
814 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
815 const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
816
817 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
818 {
819 const deUint32 res = *(bufferPtr + ndx);
820 const deUint32 ref = *(refBufferPtr + ndx);
821
822 if (res != ref)
823 {
824 std::ostringstream msg;
825 msg << "Comparison failed for Output.values[" << ndx << "]";
826 return tcu::TestStatus::fail(msg.str());
827 }
828 }
829 return tcu::TestStatus::pass("Compute succeeded");
830 }
831
832 class CopySSBOToImageTest : public vkt::TestCase
833 {
834 public:
835 CopySSBOToImageTest (tcu::TestContext& testCtx,
836 const std::string& name,
837 const std::string& description,
838 const tcu::IVec2& localSize,
839 const tcu::IVec2& imageSize);
840
841 void initPrograms (SourceCollections& sourceCollections) const;
842 TestInstance* createInstance (Context& context) const;
843
844 private:
845 const tcu::IVec2 m_localSize;
846 const tcu::IVec2 m_imageSize;
847 };
848
849 class CopySSBOToImageTestInstance : public vkt::TestInstance
850 {
851 public:
852 CopySSBOToImageTestInstance (Context& context,
853 const tcu::IVec2& localSize,
854 const tcu::IVec2& imageSize);
855
856 tcu::TestStatus iterate (void);
857
858 private:
859 const tcu::IVec2 m_localSize;
860 const tcu::IVec2 m_imageSize;
861 };
862
CopySSBOToImageTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)863 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext& testCtx,
864 const std::string& name,
865 const std::string& description,
866 const tcu::IVec2& localSize,
867 const tcu::IVec2& imageSize)
868 : TestCase (testCtx, name, description)
869 , m_localSize (localSize)
870 , m_imageSize (imageSize)
871 {
872 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
873 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
874 }
875
initPrograms(SourceCollections & sourceCollections) const876 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
877 {
878 std::ostringstream src;
879 src << "#version 310 es\n"
880 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
881 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
882 << "layout(binding = 0) readonly buffer Input {\n"
883 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
884 << "} sb_in;\n\n"
885 << "void main (void) {\n"
886 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
887 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
888 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
889 << "}\n";
890
891 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
892 }
893
createInstance(Context & context) const894 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
895 {
896 return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
897 }
898
CopySSBOToImageTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)899 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
900 : TestInstance (context)
901 , m_localSize (localSize)
902 , m_imageSize (imageSize)
903 {
904 }
905
iterate(void)906 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
907 {
908 const DeviceInterface& vk = m_context.getDeviceInterface();
909 const VkDevice device = m_context.getDevice();
910 const VkQueue queue = m_context.getUniversalQueue();
911 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
912 Allocator& allocator = m_context.getDefaultAllocator();
913
914 // Create an image
915
916 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
917 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
918
919 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
920 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
921
922 // Create an input buffer (data to be read in the shader)
923
924 const deUint32 imageArea = multiplyComponents(m_imageSize);
925 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
926
927 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
928
929 // Populate the buffer with test data
930 {
931 de::Random rnd(0x77238ac2);
932 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
933 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
934 for (deUint32 i = 0; i < imageArea; ++i)
935 *bufferPtr++ = rnd.getUint32();
936
937 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
938 }
939
940 // Create a buffer to store shader output (copied from image data)
941
942 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
943
944 // Create descriptor set
945
946 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
947 DescriptorSetLayoutBuilder()
948 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
949 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
950 .build(vk, device));
951
952 const Unique<VkDescriptorPool> descriptorPool(
953 DescriptorPoolBuilder()
954 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
955 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
956 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
957
958 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
959
960 // Set the bindings
961
962 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
963 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
964
965 DescriptorSetUpdateBuilder()
966 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
967 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
968 .update(vk, device);
969
970 // Perform the computation
971 {
972 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
973 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
974 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
975
976 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
977
978 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
979 0u, 0u,
980 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
981 *image, subresourceRange);
982
983 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
984 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
985 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
986 *image, subresourceRange);
987
988 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
989
990 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
991 const tcu::IVec2 workSize = m_imageSize / m_localSize;
992
993 // Prepare the command buffer
994
995 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
996 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
997
998 // Start recording commands
999
1000 beginCommandBuffer(vk, *cmdBuffer);
1001
1002 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1003 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1004
1005 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1006 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1007
1008 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
1009 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
1010 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1011
1012 endCommandBuffer(vk, *cmdBuffer);
1013
1014 // Wait for completion
1015
1016 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1017 }
1018
1019 // Validate the results
1020
1021 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1022 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1023
1024 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1025 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1026
1027 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1028 {
1029 const deUint32 res = *(bufferPtr + ndx);
1030 const deUint32 ref = *(refBufferPtr + ndx);
1031
1032 if (res != ref)
1033 {
1034 std::ostringstream msg;
1035 msg << "Comparison failed for pixel " << ndx;
1036 return tcu::TestStatus::fail(msg.str());
1037 }
1038 }
1039 return tcu::TestStatus::pass("Compute succeeded");
1040 }
1041
1042 class BufferToBufferInvertTest : public vkt::TestCase
1043 {
1044 public:
1045 void initPrograms (SourceCollections& sourceCollections) const;
1046 TestInstance* createInstance (Context& context) const;
1047
1048 static BufferToBufferInvertTest* UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1049 const std::string& name,
1050 const std::string& description,
1051 const deUint32 numValues,
1052 const tcu::IVec3& localSize,
1053 const tcu::IVec3& workSize);
1054
1055 static BufferToBufferInvertTest* CopyInvertSSBOCase (tcu::TestContext& testCtx,
1056 const std::string& name,
1057 const std::string& description,
1058 const deUint32 numValues,
1059 const tcu::IVec3& localSize,
1060 const tcu::IVec3& workSize);
1061
1062 private:
1063 BufferToBufferInvertTest (tcu::TestContext& testCtx,
1064 const std::string& name,
1065 const std::string& description,
1066 const deUint32 numValues,
1067 const tcu::IVec3& localSize,
1068 const tcu::IVec3& workSize,
1069 const BufferType bufferType);
1070
1071 const BufferType m_bufferType;
1072 const deUint32 m_numValues;
1073 const tcu::IVec3 m_localSize;
1074 const tcu::IVec3 m_workSize;
1075 };
1076
1077 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1078 {
1079 public:
1080 BufferToBufferInvertTestInstance (Context& context,
1081 const deUint32 numValues,
1082 const tcu::IVec3& localSize,
1083 const tcu::IVec3& workSize,
1084 const BufferType bufferType);
1085
1086 tcu::TestStatus iterate (void);
1087
1088 private:
1089 const BufferType m_bufferType;
1090 const deUint32 m_numValues;
1091 const tcu::IVec3 m_localSize;
1092 const tcu::IVec3 m_workSize;
1093 };
1094
BufferToBufferInvertTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType)1095 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext& testCtx,
1096 const std::string& name,
1097 const std::string& description,
1098 const deUint32 numValues,
1099 const tcu::IVec3& localSize,
1100 const tcu::IVec3& workSize,
1101 const BufferType bufferType)
1102 : TestCase (testCtx, name, description)
1103 , m_bufferType (bufferType)
1104 , m_numValues (numValues)
1105 , m_localSize (localSize)
1106 , m_workSize (workSize)
1107 {
1108 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1109 DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1110 }
1111
UBOToSSBOInvertCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1112 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1113 const std::string& name,
1114 const std::string& description,
1115 const deUint32 numValues,
1116 const tcu::IVec3& localSize,
1117 const tcu::IVec3& workSize)
1118 {
1119 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1120 }
1121
CopyInvertSSBOCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1122 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext& testCtx,
1123 const std::string& name,
1124 const std::string& description,
1125 const deUint32 numValues,
1126 const tcu::IVec3& localSize,
1127 const tcu::IVec3& workSize)
1128 {
1129 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1130 }
1131
initPrograms(SourceCollections & sourceCollections) const1132 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1133 {
1134 std::ostringstream src;
1135 if (m_bufferType == BUFFER_TYPE_UNIFORM)
1136 {
1137 src << "#version 310 es\n"
1138 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1139 << "layout(binding = 0) readonly uniform Input {\n"
1140 << " uint values[" << m_numValues << "];\n"
1141 << "} ub_in;\n"
1142 << "layout(binding = 1, std140) writeonly buffer Output {\n"
1143 << " uint values[" << m_numValues << "];\n"
1144 << "} sb_out;\n"
1145 << "void main (void) {\n"
1146 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1147 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1148 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1149 << " uint offset = numValuesPerInv*groupNdx;\n"
1150 << "\n"
1151 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1152 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1153 << "}\n";
1154 }
1155 else if (m_bufferType == BUFFER_TYPE_SSBO)
1156 {
1157 src << "#version 310 es\n"
1158 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1159 << "layout(binding = 0, std140) readonly buffer Input {\n"
1160 << " uint values[" << m_numValues << "];\n"
1161 << "} sb_in;\n"
1162 << "layout (binding = 1, std140) writeonly buffer Output {\n"
1163 << " uint values[" << m_numValues << "];\n"
1164 << "} sb_out;\n"
1165 << "void main (void) {\n"
1166 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1167 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1168 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1169 << " uint offset = numValuesPerInv*groupNdx;\n"
1170 << "\n"
1171 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1172 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1173 << "}\n";
1174 }
1175
1176 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1177 }
1178
createInstance(Context & context) const1179 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1180 {
1181 return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1182 }
1183
BufferToBufferInvertTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType)1184 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context& context,
1185 const deUint32 numValues,
1186 const tcu::IVec3& localSize,
1187 const tcu::IVec3& workSize,
1188 const BufferType bufferType)
1189 : TestInstance (context)
1190 , m_bufferType (bufferType)
1191 , m_numValues (numValues)
1192 , m_localSize (localSize)
1193 , m_workSize (workSize)
1194 {
1195 }
1196
iterate(void)1197 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1198 {
1199 const DeviceInterface& vk = m_context.getDeviceInterface();
1200 const VkDevice device = m_context.getDevice();
1201 const VkQueue queue = m_context.getUniversalQueue();
1202 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1203 Allocator& allocator = m_context.getDefaultAllocator();
1204
1205 // Customize the test based on buffer type
1206
1207 const VkBufferUsageFlags inputBufferUsageFlags = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1208 const VkDescriptorType inputBufferDescriptorType = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1209 const deUint32 randomSeed = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1210
1211 // Create an input buffer
1212
1213 const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1214 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1215
1216 // Fill the input buffer with data
1217 {
1218 de::Random rnd(randomSeed);
1219 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1220 tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1221 for (deUint32 i = 0; i < m_numValues; ++i)
1222 bufferPtr[i].x() = rnd.getUint32();
1223
1224 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1225 }
1226
1227 // Create an output buffer
1228
1229 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1230
1231 // Create descriptor set
1232
1233 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1234 DescriptorSetLayoutBuilder()
1235 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1236 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1237 .build(vk, device));
1238
1239 const Unique<VkDescriptorPool> descriptorPool(
1240 DescriptorPoolBuilder()
1241 .addType(inputBufferDescriptorType)
1242 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1243 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1244
1245 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1246
1247 const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1248 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1249 DescriptorSetUpdateBuilder()
1250 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1251 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1252 .update(vk, device);
1253
1254 // Perform the computation
1255
1256 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1257 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1258 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1259
1260 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1261
1262 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1263
1264 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1265 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1266
1267 // Start recording commands
1268
1269 beginCommandBuffer(vk, *cmdBuffer);
1270
1271 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1272 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1273
1274 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1275 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1276 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1277
1278 endCommandBuffer(vk, *cmdBuffer);
1279
1280 // Wait for completion
1281
1282 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1283
1284 // Validate the results
1285
1286 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1287 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1288
1289 const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1290 const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1291
1292 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1293 {
1294 const deUint32 res = bufferPtr[ndx].x();
1295 const deUint32 ref = ~refBufferPtr[ndx].x();
1296
1297 if (res != ref)
1298 {
1299 std::ostringstream msg;
1300 msg << "Comparison failed for Output.values[" << ndx << "]";
1301 return tcu::TestStatus::fail(msg.str());
1302 }
1303 }
1304 return tcu::TestStatus::pass("Compute succeeded");
1305 }
1306
1307 class InvertSSBOInPlaceTest : public vkt::TestCase
1308 {
1309 public:
1310 InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1311 const std::string& name,
1312 const std::string& description,
1313 const deUint32 numValues,
1314 const bool sized,
1315 const tcu::IVec3& localSize,
1316 const tcu::IVec3& workSize);
1317
1318
1319 void initPrograms (SourceCollections& sourceCollections) const;
1320 TestInstance* createInstance (Context& context) const;
1321
1322 private:
1323 const deUint32 m_numValues;
1324 const bool m_sized;
1325 const tcu::IVec3 m_localSize;
1326 const tcu::IVec3 m_workSize;
1327 };
1328
1329 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1330 {
1331 public:
1332 InvertSSBOInPlaceTestInstance (Context& context,
1333 const deUint32 numValues,
1334 const tcu::IVec3& localSize,
1335 const tcu::IVec3& workSize);
1336
1337 tcu::TestStatus iterate (void);
1338
1339 private:
1340 const deUint32 m_numValues;
1341 const tcu::IVec3 m_localSize;
1342 const tcu::IVec3 m_workSize;
1343 };
1344
InvertSSBOInPlaceTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1345 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1346 const std::string& name,
1347 const std::string& description,
1348 const deUint32 numValues,
1349 const bool sized,
1350 const tcu::IVec3& localSize,
1351 const tcu::IVec3& workSize)
1352 : TestCase (testCtx, name, description)
1353 , m_numValues (numValues)
1354 , m_sized (sized)
1355 , m_localSize (localSize)
1356 , m_workSize (workSize)
1357 {
1358 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1359 }
1360
initPrograms(SourceCollections & sourceCollections) const1361 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1362 {
1363 std::ostringstream src;
1364 src << "#version 310 es\n"
1365 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1366 << "layout(binding = 0) buffer InOut {\n"
1367 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1368 << "} sb_inout;\n"
1369 << "void main (void) {\n"
1370 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1371 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1372 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1373 << " uint offset = numValuesPerInv*groupNdx;\n"
1374 << "\n"
1375 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1376 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1377 << "}\n";
1378
1379 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1380 }
1381
createInstance(Context & context) const1382 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1383 {
1384 return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1385 }
1386
InvertSSBOInPlaceTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1387 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context& context,
1388 const deUint32 numValues,
1389 const tcu::IVec3& localSize,
1390 const tcu::IVec3& workSize)
1391 : TestInstance (context)
1392 , m_numValues (numValues)
1393 , m_localSize (localSize)
1394 , m_workSize (workSize)
1395 {
1396 }
1397
iterate(void)1398 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1399 {
1400 const DeviceInterface& vk = m_context.getDeviceInterface();
1401 const VkDevice device = m_context.getDevice();
1402 const VkQueue queue = m_context.getUniversalQueue();
1403 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1404 Allocator& allocator = m_context.getDefaultAllocator();
1405
1406 // Create an input/output buffer
1407
1408 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1409 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1410
1411 // Fill the buffer with data
1412
1413 typedef std::vector<deUint32> data_vector_t;
1414 data_vector_t inputData(m_numValues);
1415
1416 {
1417 de::Random rnd(0x82ce7f);
1418 const Allocation& bufferAllocation = buffer.getAllocation();
1419 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1420 for (deUint32 i = 0; i < m_numValues; ++i)
1421 inputData[i] = *bufferPtr++ = rnd.getUint32();
1422
1423 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1424 }
1425
1426 // Create descriptor set
1427
1428 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1429 DescriptorSetLayoutBuilder()
1430 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1431 .build(vk, device));
1432
1433 const Unique<VkDescriptorPool> descriptorPool(
1434 DescriptorPoolBuilder()
1435 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1436 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1437
1438 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1439
1440 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1441 DescriptorSetUpdateBuilder()
1442 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1443 .update(vk, device);
1444
1445 // Perform the computation
1446
1447 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1448 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1449 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1450
1451 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1452
1453 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1454
1455 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1456 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1457
1458 // Start recording commands
1459
1460 beginCommandBuffer(vk, *cmdBuffer);
1461
1462 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1463 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1464
1465 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1466 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1467 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1468
1469 endCommandBuffer(vk, *cmdBuffer);
1470
1471 // Wait for completion
1472
1473 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1474
1475 // Validate the results
1476
1477 const Allocation& bufferAllocation = buffer.getAllocation();
1478 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1479
1480 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1481
1482 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1483 {
1484 const deUint32 res = bufferPtr[ndx];
1485 const deUint32 ref = ~inputData[ndx];
1486
1487 if (res != ref)
1488 {
1489 std::ostringstream msg;
1490 msg << "Comparison failed for InOut.values[" << ndx << "]";
1491 return tcu::TestStatus::fail(msg.str());
1492 }
1493 }
1494 return tcu::TestStatus::pass("Compute succeeded");
1495 }
1496
1497 class WriteToMultipleSSBOTest : public vkt::TestCase
1498 {
1499 public:
1500 WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1501 const std::string& name,
1502 const std::string& description,
1503 const deUint32 numValues,
1504 const bool sized,
1505 const tcu::IVec3& localSize,
1506 const tcu::IVec3& workSize);
1507
1508 void initPrograms (SourceCollections& sourceCollections) const;
1509 TestInstance* createInstance (Context& context) const;
1510
1511 private:
1512 const deUint32 m_numValues;
1513 const bool m_sized;
1514 const tcu::IVec3 m_localSize;
1515 const tcu::IVec3 m_workSize;
1516 };
1517
1518 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1519 {
1520 public:
1521 WriteToMultipleSSBOTestInstance (Context& context,
1522 const deUint32 numValues,
1523 const tcu::IVec3& localSize,
1524 const tcu::IVec3& workSize);
1525
1526 tcu::TestStatus iterate (void);
1527
1528 private:
1529 const deUint32 m_numValues;
1530 const tcu::IVec3 m_localSize;
1531 const tcu::IVec3 m_workSize;
1532 };
1533
WriteToMultipleSSBOTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1534 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1535 const std::string& name,
1536 const std::string& description,
1537 const deUint32 numValues,
1538 const bool sized,
1539 const tcu::IVec3& localSize,
1540 const tcu::IVec3& workSize)
1541 : TestCase (testCtx, name, description)
1542 , m_numValues (numValues)
1543 , m_sized (sized)
1544 , m_localSize (localSize)
1545 , m_workSize (workSize)
1546 {
1547 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1548 }
1549
initPrograms(SourceCollections & sourceCollections) const1550 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1551 {
1552 std::ostringstream src;
1553 src << "#version 310 es\n"
1554 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1555 << "layout(binding = 0) writeonly buffer Out0 {\n"
1556 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1557 << "} sb_out0;\n"
1558 << "layout(binding = 1) writeonly buffer Out1 {\n"
1559 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1560 << "} sb_out1;\n"
1561 << "void main (void) {\n"
1562 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1563 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1564 << "\n"
1565 << " {\n"
1566 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1567 << " uint offset = numValuesPerInv*groupNdx;\n"
1568 << "\n"
1569 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1570 << " sb_out0.values[offset + ndx] = offset + ndx;\n"
1571 << " }\n"
1572 << " {\n"
1573 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1574 << " uint offset = numValuesPerInv*groupNdx;\n"
1575 << "\n"
1576 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1577 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1578 << " }\n"
1579 << "}\n";
1580
1581 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1582 }
1583
createInstance(Context & context) const1584 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1585 {
1586 return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1587 }
1588
WriteToMultipleSSBOTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1589 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context& context,
1590 const deUint32 numValues,
1591 const tcu::IVec3& localSize,
1592 const tcu::IVec3& workSize)
1593 : TestInstance (context)
1594 , m_numValues (numValues)
1595 , m_localSize (localSize)
1596 , m_workSize (workSize)
1597 {
1598 }
1599
iterate(void)1600 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1601 {
1602 const DeviceInterface& vk = m_context.getDeviceInterface();
1603 const VkDevice device = m_context.getDevice();
1604 const VkQueue queue = m_context.getUniversalQueue();
1605 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1606 Allocator& allocator = m_context.getDefaultAllocator();
1607
1608 // Create two output buffers
1609
1610 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1611 const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1612 const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1613
1614 // Create descriptor set
1615
1616 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1617 DescriptorSetLayoutBuilder()
1618 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1619 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1620 .build(vk, device));
1621
1622 const Unique<VkDescriptorPool> descriptorPool(
1623 DescriptorPoolBuilder()
1624 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1625 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1626
1627 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1628
1629 const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1630 const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1631 DescriptorSetUpdateBuilder()
1632 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1633 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1634 .update(vk, device);
1635
1636 // Perform the computation
1637
1638 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1639 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1640 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1641
1642 const VkBufferMemoryBarrier shaderWriteBarriers[] =
1643 {
1644 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1645 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1646 };
1647
1648 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1649 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1650
1651 // Start recording commands
1652
1653 beginCommandBuffer(vk, *cmdBuffer);
1654
1655 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1656 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1657
1658 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1659 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1660
1661 endCommandBuffer(vk, *cmdBuffer);
1662
1663 // Wait for completion
1664
1665 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1666
1667 // Validate the results
1668 {
1669 const Allocation& buffer0Allocation = buffer0.getAllocation();
1670 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1671 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1672
1673 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1674 {
1675 const deUint32 res = buffer0Ptr[ndx];
1676 const deUint32 ref = ndx;
1677
1678 if (res != ref)
1679 {
1680 std::ostringstream msg;
1681 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1682 return tcu::TestStatus::fail(msg.str());
1683 }
1684 }
1685 }
1686 {
1687 const Allocation& buffer1Allocation = buffer1.getAllocation();
1688 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1689 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1690
1691 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1692 {
1693 const deUint32 res = buffer1Ptr[ndx];
1694 const deUint32 ref = m_numValues - ndx;
1695
1696 if (res != ref)
1697 {
1698 std::ostringstream msg;
1699 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1700 return tcu::TestStatus::fail(msg.str());
1701 }
1702 }
1703 }
1704 return tcu::TestStatus::pass("Compute succeeded");
1705 }
1706
1707 class SSBOBarrierTest : public vkt::TestCase
1708 {
1709 public:
1710 SSBOBarrierTest (tcu::TestContext& testCtx,
1711 const std::string& name,
1712 const std::string& description,
1713 const tcu::IVec3& workSize);
1714
1715 void initPrograms (SourceCollections& sourceCollections) const;
1716 TestInstance* createInstance (Context& context) const;
1717
1718 private:
1719 const tcu::IVec3 m_workSize;
1720 };
1721
1722 class SSBOBarrierTestInstance : public vkt::TestInstance
1723 {
1724 public:
1725 SSBOBarrierTestInstance (Context& context,
1726 const tcu::IVec3& workSize);
1727
1728 tcu::TestStatus iterate (void);
1729
1730 private:
1731 const tcu::IVec3 m_workSize;
1732 };
1733
SSBOBarrierTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & workSize)1734 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext& testCtx,
1735 const std::string& name,
1736 const std::string& description,
1737 const tcu::IVec3& workSize)
1738 : TestCase (testCtx, name, description)
1739 , m_workSize (workSize)
1740 {
1741 }
1742
initPrograms(SourceCollections & sourceCollections) const1743 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1744 {
1745 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1746 "#version 310 es\n"
1747 "layout (local_size_x = 1) in;\n"
1748 "layout(binding = 2) readonly uniform Constants {\n"
1749 " uint u_baseVal;\n"
1750 "};\n"
1751 "layout(binding = 1) writeonly buffer Output {\n"
1752 " uint values[];\n"
1753 "};\n"
1754 "void main (void) {\n"
1755 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1756 " values[offset] = u_baseVal + offset;\n"
1757 "}\n");
1758
1759 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1760 "#version 310 es\n"
1761 "layout (local_size_x = 1) in;\n"
1762 "layout(binding = 1) readonly buffer Input {\n"
1763 " uint values[];\n"
1764 "};\n"
1765 "layout(binding = 0) coherent buffer Output {\n"
1766 " uint sum;\n"
1767 "};\n"
1768 "void main (void) {\n"
1769 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1770 " uint value = values[offset];\n"
1771 " atomicAdd(sum, value);\n"
1772 "}\n");
1773 }
1774
createInstance(Context & context) const1775 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1776 {
1777 return new SSBOBarrierTestInstance(context, m_workSize);
1778 }
1779
SSBOBarrierTestInstance(Context & context,const tcu::IVec3 & workSize)1780 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1781 : TestInstance (context)
1782 , m_workSize (workSize)
1783 {
1784 }
1785
iterate(void)1786 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1787 {
1788 const DeviceInterface& vk = m_context.getDeviceInterface();
1789 const VkDevice device = m_context.getDevice();
1790 const VkQueue queue = m_context.getUniversalQueue();
1791 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1792 Allocator& allocator = m_context.getDefaultAllocator();
1793
1794 // Create a work buffer used by both shaders
1795
1796 const int workGroupCount = multiplyComponents(m_workSize);
1797 const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1798 const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1799
1800 // Create an output buffer
1801
1802 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1803 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1804
1805 // Initialize atomic counter value to zero
1806 {
1807 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1808 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1809 *outputBufferPtr = 0;
1810 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1811 }
1812
1813 // Create a uniform buffer (to pass uniform constants)
1814
1815 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1816 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1817
1818 // Set the constants in the uniform buffer
1819
1820 const deUint32 baseValue = 127;
1821 {
1822 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1823 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1824 uniformBufferPtr[0] = baseValue;
1825
1826 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1827 }
1828
1829 // Create descriptor set
1830
1831 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1832 DescriptorSetLayoutBuilder()
1833 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1834 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1835 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1836 .build(vk, device));
1837
1838 const Unique<VkDescriptorPool> descriptorPool(
1839 DescriptorPoolBuilder()
1840 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1841 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1842 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1843
1844 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1845
1846 const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1847 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1848 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1849 DescriptorSetUpdateBuilder()
1850 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1851 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1852 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1853 .update(vk, device);
1854
1855 // Perform the computation
1856
1857 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1858 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1859
1860 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1861 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1862 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1863
1864 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1865
1866 const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1867
1868 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1869
1870 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1871 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1872
1873 // Start recording commands
1874
1875 beginCommandBuffer(vk, *cmdBuffer);
1876
1877 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1878 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1879
1880 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1881
1882 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1883 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1884
1885 // Switch to the second shader program
1886 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1887
1888 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1889 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1890
1891 endCommandBuffer(vk, *cmdBuffer);
1892
1893 // Wait for completion
1894
1895 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1896
1897 // Validate the results
1898
1899 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1900 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1901
1902 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1903 const deUint32 res = *bufferPtr;
1904 deUint32 ref = 0;
1905
1906 for (int ndx = 0; ndx < workGroupCount; ++ndx)
1907 ref += baseValue + ndx;
1908
1909 if (res != ref)
1910 {
1911 std::ostringstream msg;
1912 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1913 return tcu::TestStatus::fail(msg.str());
1914 }
1915 return tcu::TestStatus::pass("Compute succeeded");
1916 }
1917
1918 class ImageAtomicOpTest : public vkt::TestCase
1919 {
1920 public:
1921 ImageAtomicOpTest (tcu::TestContext& testCtx,
1922 const std::string& name,
1923 const std::string& description,
1924 const deUint32 localSize,
1925 const tcu::IVec2& imageSize);
1926
1927 void initPrograms (SourceCollections& sourceCollections) const;
1928 TestInstance* createInstance (Context& context) const;
1929
1930 private:
1931 const deUint32 m_localSize;
1932 const tcu::IVec2 m_imageSize;
1933 };
1934
1935 class ImageAtomicOpTestInstance : public vkt::TestInstance
1936 {
1937 public:
1938 ImageAtomicOpTestInstance (Context& context,
1939 const deUint32 localSize,
1940 const tcu::IVec2& imageSize);
1941
1942 tcu::TestStatus iterate (void);
1943
1944 private:
1945 const deUint32 m_localSize;
1946 const tcu::IVec2 m_imageSize;
1947 };
1948
ImageAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 localSize,const tcu::IVec2 & imageSize)1949 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext& testCtx,
1950 const std::string& name,
1951 const std::string& description,
1952 const deUint32 localSize,
1953 const tcu::IVec2& imageSize)
1954 : TestCase (testCtx, name, description)
1955 , m_localSize (localSize)
1956 , m_imageSize (imageSize)
1957 {
1958 }
1959
initPrograms(SourceCollections & sourceCollections) const1960 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1961 {
1962 std::ostringstream src;
1963 src << "#version 310 es\n"
1964 << "#extension GL_OES_shader_image_atomic : require\n"
1965 << "layout (local_size_x = " << m_localSize << ") in;\n"
1966 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1967 << "layout(binding = 0) readonly buffer Input {\n"
1968 << " uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1969 << "} sb_in;\n\n"
1970 << "void main (void) {\n"
1971 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1972 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1973 << "\n"
1974 << " if (gl_LocalInvocationIndex == 0u)\n"
1975 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1976 << " memoryBarrierImage();\n"
1977 << " barrier();\n"
1978 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1979 << "}\n";
1980
1981 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1982 }
1983
createInstance(Context & context) const1984 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
1985 {
1986 return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
1987 }
1988
ImageAtomicOpTestInstance(Context & context,const deUint32 localSize,const tcu::IVec2 & imageSize)1989 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
1990 : TestInstance (context)
1991 , m_localSize (localSize)
1992 , m_imageSize (imageSize)
1993 {
1994 }
1995
iterate(void)1996 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
1997 {
1998 const DeviceInterface& vk = m_context.getDeviceInterface();
1999 const VkDevice device = m_context.getDevice();
2000 const VkQueue queue = m_context.getUniversalQueue();
2001 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2002 Allocator& allocator = m_context.getDefaultAllocator();
2003
2004 // Create an image
2005
2006 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2007 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2008
2009 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2010 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2011
2012 // Input buffer
2013
2014 const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2015 const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2016
2017 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2018
2019 // Populate the input buffer with test data
2020 {
2021 de::Random rnd(0x77238ac2);
2022 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2023 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2024 for (deUint32 i = 0; i < numInputValues; ++i)
2025 *bufferPtr++ = rnd.getUint32();
2026
2027 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2028 }
2029
2030 // Create a buffer to store shader output (copied from image data)
2031
2032 const deUint32 imageArea = multiplyComponents(m_imageSize);
2033 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2034 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2035
2036 // Create descriptor set
2037
2038 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2039 DescriptorSetLayoutBuilder()
2040 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2041 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2042 .build(vk, device));
2043
2044 const Unique<VkDescriptorPool> descriptorPool(
2045 DescriptorPoolBuilder()
2046 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2047 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2048 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2049
2050 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2051
2052 // Set the bindings
2053
2054 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2055 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2056
2057 DescriptorSetUpdateBuilder()
2058 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2059 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2060 .update(vk, device);
2061
2062 // Perform the computation
2063 {
2064 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2065 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2066 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2067
2068 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2069
2070 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2071 (VkAccessFlags)0, VK_ACCESS_SHADER_WRITE_BIT,
2072 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2073 *image, subresourceRange);
2074
2075 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2076 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2077 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2078 *image, subresourceRange);
2079
2080 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2081
2082 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2083
2084 // Prepare the command buffer
2085
2086 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2087 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2088
2089 // Start recording commands
2090
2091 beginCommandBuffer(vk, *cmdBuffer);
2092
2093 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2094 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2095
2096 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
2097 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2098
2099 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
2100 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
2101 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2102
2103 endCommandBuffer(vk, *cmdBuffer);
2104
2105 // Wait for completion
2106
2107 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2108 }
2109
2110 // Validate the results
2111
2112 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2113 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2114
2115 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2116 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2117
2118 for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2119 {
2120 const deUint32 res = bufferPtr[pixelNdx];
2121 deUint32 ref = 0;
2122
2123 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2124 ref += refBufferPtr[pixelNdx * m_localSize + offs];
2125
2126 if (res != ref)
2127 {
2128 std::ostringstream msg;
2129 msg << "Comparison failed for pixel " << pixelNdx;
2130 return tcu::TestStatus::fail(msg.str());
2131 }
2132 }
2133 return tcu::TestStatus::pass("Compute succeeded");
2134 }
2135
2136 class ImageBarrierTest : public vkt::TestCase
2137 {
2138 public:
2139 ImageBarrierTest (tcu::TestContext& testCtx,
2140 const std::string& name,
2141 const std::string& description,
2142 const tcu::IVec2& imageSize);
2143
2144 void initPrograms (SourceCollections& sourceCollections) const;
2145 TestInstance* createInstance (Context& context) const;
2146
2147 private:
2148 const tcu::IVec2 m_imageSize;
2149 };
2150
2151 class ImageBarrierTestInstance : public vkt::TestInstance
2152 {
2153 public:
2154 ImageBarrierTestInstance (Context& context,
2155 const tcu::IVec2& imageSize);
2156
2157 tcu::TestStatus iterate (void);
2158
2159 private:
2160 const tcu::IVec2 m_imageSize;
2161 };
2162
ImageBarrierTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec2 & imageSize)2163 ImageBarrierTest::ImageBarrierTest (tcu::TestContext& testCtx,
2164 const std::string& name,
2165 const std::string& description,
2166 const tcu::IVec2& imageSize)
2167 : TestCase (testCtx, name, description)
2168 , m_imageSize (imageSize)
2169 {
2170 }
2171
initPrograms(SourceCollections & sourceCollections) const2172 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2173 {
2174 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2175 "#version 310 es\n"
2176 "layout (local_size_x = 1) in;\n"
2177 "layout(binding = 2) readonly uniform Constants {\n"
2178 " uint u_baseVal;\n"
2179 "};\n"
2180 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2181 "void main (void) {\n"
2182 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2183 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2184 "}\n");
2185
2186 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2187 "#version 310 es\n"
2188 "layout (local_size_x = 1) in;\n"
2189 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2190 "layout(binding = 0) coherent buffer Output {\n"
2191 " uint sum;\n"
2192 "};\n"
2193 "void main (void) {\n"
2194 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2195 " atomicAdd(sum, value);\n"
2196 "}\n");
2197 }
2198
createInstance(Context & context) const2199 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2200 {
2201 return new ImageBarrierTestInstance(context, m_imageSize);
2202 }
2203
ImageBarrierTestInstance(Context & context,const tcu::IVec2 & imageSize)2204 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2205 : TestInstance (context)
2206 , m_imageSize (imageSize)
2207 {
2208 }
2209
iterate(void)2210 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2211 {
2212 const DeviceInterface& vk = m_context.getDeviceInterface();
2213 const VkDevice device = m_context.getDevice();
2214 const VkQueue queue = m_context.getUniversalQueue();
2215 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2216 Allocator& allocator = m_context.getDefaultAllocator();
2217
2218 // Create an image used by both shaders
2219
2220 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2221 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2222
2223 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2224 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2225
2226 // Create an output buffer
2227
2228 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2229 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2230
2231 // Initialize atomic counter value to zero
2232 {
2233 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2234 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2235 *outputBufferPtr = 0;
2236 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2237 }
2238
2239 // Create a uniform buffer (to pass uniform constants)
2240
2241 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2242 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2243
2244 // Set the constants in the uniform buffer
2245
2246 const deUint32 baseValue = 127;
2247 {
2248 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2249 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2250 uniformBufferPtr[0] = baseValue;
2251
2252 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2253 }
2254
2255 // Create descriptor set
2256
2257 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2258 DescriptorSetLayoutBuilder()
2259 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2260 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2261 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2262 .build(vk, device));
2263
2264 const Unique<VkDescriptorPool> descriptorPool(
2265 DescriptorPoolBuilder()
2266 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2267 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2268 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2269 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2270
2271 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2272
2273 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2274 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2275 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2276 DescriptorSetUpdateBuilder()
2277 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2278 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2279 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2280 .update(vk, device);
2281
2282 // Perform the computation
2283
2284 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2285 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2286
2287 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2288 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2289 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2290
2291 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2292
2293 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2294 0u, 0u,
2295 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2296 *image, subresourceRange);
2297
2298 const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2299 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2300 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2301 *image, subresourceRange);
2302
2303 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2304
2305 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2306 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2307
2308 // Start recording commands
2309
2310 beginCommandBuffer(vk, *cmdBuffer);
2311
2312 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2313 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2314
2315 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2316
2317 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2318 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2319
2320 // Switch to the second shader program
2321 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2322
2323 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2324 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2325
2326 endCommandBuffer(vk, *cmdBuffer);
2327
2328 // Wait for completion
2329
2330 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2331
2332 // Validate the results
2333
2334 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2335 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2336
2337 const int numValues = multiplyComponents(m_imageSize);
2338 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2339 const deUint32 res = *bufferPtr;
2340 deUint32 ref = 0;
2341
2342 for (int ndx = 0; ndx < numValues; ++ndx)
2343 ref += baseValue + ndx;
2344
2345 if (res != ref)
2346 {
2347 std::ostringstream msg;
2348 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2349 return tcu::TestStatus::fail(msg.str());
2350 }
2351 return tcu::TestStatus::pass("Compute succeeded");
2352 }
2353
2354 namespace EmptyShaderTest
2355 {
2356
createProgram(SourceCollections & dst)2357 void createProgram (SourceCollections& dst)
2358 {
2359 dst.glslSources.add("comp") << glu::ComputeSource(
2360 "#version 310 es\n"
2361 "layout (local_size_x = 1) in;\n"
2362 "void main (void) {}\n"
2363 );
2364 }
2365
createTest(Context & context)2366 tcu::TestStatus createTest (Context& context)
2367 {
2368 const DeviceInterface& vk = context.getDeviceInterface();
2369 const VkDevice device = context.getDevice();
2370 const VkQueue queue = context.getUniversalQueue();
2371 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2372
2373 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2374
2375 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2376 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2377
2378 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2379 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2380
2381 // Start recording commands
2382
2383 beginCommandBuffer(vk, *cmdBuffer);
2384
2385 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2386
2387 const tcu::IVec3 workGroups(1, 1, 1);
2388 vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2389
2390 endCommandBuffer(vk, *cmdBuffer);
2391
2392 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2393
2394 return tcu::TestStatus::pass("Compute succeeded");
2395 }
2396
2397 } // EmptyShaderTest ns
2398 } // anonymous
2399
createBasicComputeShaderTests(tcu::TestContext & testCtx)2400 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2401 {
2402 de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2403
2404 addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2405
2406 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2407 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1)));
2408 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2409 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2410
2411 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2412 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2413 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2414
2415 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2416 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2417 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2418 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2419
2420 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2421 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2422 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2423 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2424
2425 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2426 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2427 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2428
2429 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1,1,1)));
2430 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11,5,7)));
2431
2432 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2433 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2434 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2435 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2436
2437 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2438 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2439 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2440 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2441
2442 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1,1), tcu::IVec2(64,64)));
2443 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2,4), tcu::IVec2(512,512)));
2444
2445 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1, 1), tcu::IVec2(64, 64)));
2446 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2, 4), tcu::IVec2(512, 512)));
2447
2448 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64,64)));
2449 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64,64)));
2450
2451 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_single", "Image barrier", tcu::IVec2(1,1)));
2452 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_multiple", "Image barrier", tcu::IVec2(64,64)));
2453
2454 return basicComputeTests.release();
2455 }
2456
2457 } // compute
2458 } // vkt
2459