1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Compute Shader Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktComputeBasicComputeShaderTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktComputeTestsUtil.hpp"
29 
30 #include "vkDefs.hpp"
31 #include "vkRef.hpp"
32 #include "vkRefUtil.hpp"
33 #include "vkPlatform.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkMemUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 
41 #include "deStringUtil.hpp"
42 #include "deUniquePtr.hpp"
43 #include "deRandom.hpp"
44 
45 #include <vector>
46 
47 using namespace vk;
48 
49 namespace vkt
50 {
51 namespace compute
52 {
53 namespace
54 {
55 
56 template<typename T, int size>
multiplyComponents(const tcu::Vector<T,size> & v)57 T multiplyComponents (const tcu::Vector<T, size>& v)
58 {
59 	T accum = 1;
60 	for (int i = 0; i < size; ++i)
61 		accum *= v[i];
62 	return accum;
63 }
64 
65 template<typename T>
squared(const T & a)66 inline T squared (const T& a)
67 {
68 	return a * a;
69 }
70 
make2DImageCreateInfo(const tcu::IVec2 & imageSize,const VkImageUsageFlags usage)71 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
72 {
73 	const VkImageCreateInfo imageParams =
74 	{
75 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,				// VkStructureType			sType;
76 		DE_NULL,											// const void*				pNext;
77 		0u,													// VkImageCreateFlags		flags;
78 		VK_IMAGE_TYPE_2D,									// VkImageType				imageType;
79 		VK_FORMAT_R32_UINT,									// VkFormat					format;
80 		vk::makeExtent3D(imageSize.x(), imageSize.y(), 1),	// VkExtent3D				extent;
81 		1u,													// deUint32					mipLevels;
82 		1u,													// deUint32					arrayLayers;
83 		VK_SAMPLE_COUNT_1_BIT,								// VkSampleCountFlagBits	samples;
84 		VK_IMAGE_TILING_OPTIMAL,							// VkImageTiling			tiling;
85 		usage,												// VkImageUsageFlags		usage;
86 		VK_SHARING_MODE_EXCLUSIVE,							// VkSharingMode			sharingMode;
87 		0u,													// deUint32					queueFamilyIndexCount;
88 		DE_NULL,											// const deUint32*			pQueueFamilyIndices;
89 		VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout			initialLayout;
90 	};
91 	return imageParams;
92 }
93 
makeBufferImageCopy(const tcu::IVec2 & imageSize)94 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
95 {
96 	return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
97 }
98 
99 enum BufferType
100 {
101 	BUFFER_TYPE_UNIFORM,
102 	BUFFER_TYPE_SSBO,
103 };
104 
105 class SharedVarTest : public vkt::TestCase
106 {
107 public:
108 						SharedVarTest	(tcu::TestContext&		testCtx,
109 										 const std::string&		name,
110 										 const std::string&		description,
111 										 const tcu::IVec3&		localSize,
112 										 const tcu::IVec3&		workSize);
113 
114 	void				initPrograms	(SourceCollections&		sourceCollections) const;
115 	TestInstance*		createInstance	(Context&				context) const;
116 
117 private:
118 	const tcu::IVec3	m_localSize;
119 	const tcu::IVec3	m_workSize;
120 };
121 
122 class SharedVarTestInstance : public vkt::TestInstance
123 {
124 public:
125 									SharedVarTestInstance	(Context&			context,
126 															 const tcu::IVec3&	localSize,
127 															 const tcu::IVec3&	workSize);
128 
129 	tcu::TestStatus					iterate					(void);
130 
131 private:
132 	const tcu::IVec3				m_localSize;
133 	const tcu::IVec3				m_workSize;
134 };
135 
SharedVarTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)136 SharedVarTest::SharedVarTest (tcu::TestContext&		testCtx,
137 							  const std::string&	name,
138 							  const std::string&	description,
139 							  const tcu::IVec3&		localSize,
140 							  const tcu::IVec3&		workSize)
141 	: TestCase		(testCtx, name, description)
142 	, m_localSize	(localSize)
143 	, m_workSize	(workSize)
144 {
145 }
146 
initPrograms(SourceCollections & sourceCollections) const147 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
148 {
149 	const int workGroupSize = multiplyComponents(m_localSize);
150 	const int workGroupCount = multiplyComponents(m_workSize);
151 	const int numValues = workGroupSize * workGroupCount;
152 
153 	std::ostringstream src;
154 	src << "#version 310 es\n"
155 		<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
156 		<< "layout(binding = 0) writeonly buffer Output {\n"
157 		<< "    uint values[" << numValues << "];\n"
158 		<< "} sb_out;\n\n"
159 		<< "shared uint offsets[" << workGroupSize << "];\n\n"
160 		<< "void main (void) {\n"
161 		<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
162 		<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
163 		<< "    uint globalOffs = localSize*globalNdx;\n"
164 		<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
165 		<< "\n"
166 		<< "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
167 		<< "    memoryBarrierShared();\n"
168 		<< "    barrier();\n"
169 		<< "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
170 		<< "}\n";
171 
172 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
173 }
174 
createInstance(Context & context) const175 TestInstance* SharedVarTest::createInstance (Context& context) const
176 {
177 	return new SharedVarTestInstance(context, m_localSize, m_workSize);
178 }
179 
SharedVarTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)180 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
181 	: TestInstance	(context)
182 	, m_localSize	(localSize)
183 	, m_workSize	(workSize)
184 {
185 }
186 
iterate(void)187 tcu::TestStatus SharedVarTestInstance::iterate (void)
188 {
189 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
190 	const VkDevice			device				= m_context.getDevice();
191 	const VkQueue			queue				= m_context.getUniversalQueue();
192 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
193 	Allocator&				allocator			= m_context.getDefaultAllocator();
194 
195 	const int workGroupSize = multiplyComponents(m_localSize);
196 	const int workGroupCount = multiplyComponents(m_workSize);
197 
198 	// Create a buffer and host-visible memory for it
199 
200 	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
201 	const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
202 
203 	// Create descriptor set
204 
205 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
206 		DescriptorSetLayoutBuilder()
207 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
208 		.build(vk, device));
209 
210 	const Unique<VkDescriptorPool> descriptorPool(
211 		DescriptorPoolBuilder()
212 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
213 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
214 
215 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
216 
217 	const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
218 	DescriptorSetUpdateBuilder()
219 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
220 		.update(vk, device);
221 
222 	// Perform the computation
223 
224 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
225 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
226 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
227 
228 	const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
229 
230 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
231 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
232 
233 	// Start recording commands
234 
235 	beginCommandBuffer(vk, *cmdBuffer);
236 
237 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
238 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
239 
240 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
241 
242 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
243 
244 	endCommandBuffer(vk, *cmdBuffer);
245 
246 	// Wait for completion
247 
248 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
249 
250 	// Validate the results
251 
252 	const Allocation& bufferAllocation = buffer.getAllocation();
253 	invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
254 
255 	const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
256 
257 	for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
258 	{
259 		const int globalOffset = groupNdx * workGroupSize;
260 		for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
261 		{
262 			const deUint32 res = bufferPtr[globalOffset + localOffset];
263 			const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
264 
265 			if (res != ref)
266 			{
267 				std::ostringstream msg;
268 				msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
269 				return tcu::TestStatus::fail(msg.str());
270 			}
271 		}
272 	}
273 	return tcu::TestStatus::pass("Compute succeeded");
274 }
275 
276 class SharedVarAtomicOpTest : public vkt::TestCase
277 {
278 public:
279 						SharedVarAtomicOpTest	(tcu::TestContext&	testCtx,
280 												 const std::string&	name,
281 												 const std::string&	description,
282 												 const tcu::IVec3&	localSize,
283 												 const tcu::IVec3&	workSize);
284 
285 	void				initPrograms			(SourceCollections& sourceCollections) const;
286 	TestInstance*		createInstance			(Context&			context) const;
287 
288 private:
289 	const tcu::IVec3	m_localSize;
290 	const tcu::IVec3	m_workSize;
291 };
292 
293 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
294 {
295 public:
296 									SharedVarAtomicOpTestInstance	(Context&			context,
297 																	 const tcu::IVec3&	localSize,
298 																	 const tcu::IVec3&	workSize);
299 
300 	tcu::TestStatus					iterate							(void);
301 
302 private:
303 	const tcu::IVec3				m_localSize;
304 	const tcu::IVec3				m_workSize;
305 };
306 
SharedVarAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)307 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext&		testCtx,
308 											  const std::string&	name,
309 											  const std::string&	description,
310 											  const tcu::IVec3&		localSize,
311 											  const tcu::IVec3&		workSize)
312 	: TestCase		(testCtx, name, description)
313 	, m_localSize	(localSize)
314 	, m_workSize	(workSize)
315 {
316 }
317 
initPrograms(SourceCollections & sourceCollections) const318 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
319 {
320 	const int workGroupSize = multiplyComponents(m_localSize);
321 	const int workGroupCount = multiplyComponents(m_workSize);
322 	const int numValues = workGroupSize * workGroupCount;
323 
324 	std::ostringstream src;
325 	src << "#version 310 es\n"
326 		<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
327 		<< "layout(binding = 0) writeonly buffer Output {\n"
328 		<< "    uint values[" << numValues << "];\n"
329 		<< "} sb_out;\n\n"
330 		<< "shared uint count;\n\n"
331 		<< "void main (void) {\n"
332 		<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
333 		<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
334 		<< "    uint globalOffs = localSize*globalNdx;\n"
335 		<< "\n"
336 		<< "    count = 0u;\n"
337 		<< "    memoryBarrierShared();\n"
338 		<< "    barrier();\n"
339 		<< "    uint oldVal = atomicAdd(count, 1u);\n"
340 		<< "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
341 		<< "}\n";
342 
343 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
344 }
345 
createInstance(Context & context) const346 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
347 {
348 	return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
349 }
350 
SharedVarAtomicOpTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)351 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
352 	: TestInstance	(context)
353 	, m_localSize	(localSize)
354 	, m_workSize	(workSize)
355 {
356 }
357 
iterate(void)358 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
359 {
360 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
361 	const VkDevice			device				= m_context.getDevice();
362 	const VkQueue			queue				= m_context.getUniversalQueue();
363 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
364 	Allocator&				allocator			= m_context.getDefaultAllocator();
365 
366 	const int workGroupSize = multiplyComponents(m_localSize);
367 	const int workGroupCount = multiplyComponents(m_workSize);
368 
369 	// Create a buffer and host-visible memory for it
370 
371 	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
372 	const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
373 
374 	// Create descriptor set
375 
376 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
377 		DescriptorSetLayoutBuilder()
378 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
379 		.build(vk, device));
380 
381 	const Unique<VkDescriptorPool> descriptorPool(
382 		DescriptorPoolBuilder()
383 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
384 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
385 
386 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
387 
388 	const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
389 	DescriptorSetUpdateBuilder()
390 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
391 		.update(vk, device);
392 
393 	// Perform the computation
394 
395 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
396 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
397 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
398 
399 	const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
400 
401 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
402 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
403 
404 	// Start recording commands
405 
406 	beginCommandBuffer(vk, *cmdBuffer);
407 
408 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
409 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
410 
411 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
412 
413 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
414 
415 	endCommandBuffer(vk, *cmdBuffer);
416 
417 	// Wait for completion
418 
419 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
420 
421 	// Validate the results
422 
423 	const Allocation& bufferAllocation = buffer.getAllocation();
424 	invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
425 
426 	const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
427 
428 	for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
429 	{
430 		const int globalOffset = groupNdx * workGroupSize;
431 		for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
432 		{
433 			const deUint32 res = bufferPtr[globalOffset + localOffset];
434 			const deUint32 ref = localOffset + 1;
435 
436 			if (res != ref)
437 			{
438 				std::ostringstream msg;
439 				msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
440 				return tcu::TestStatus::fail(msg.str());
441 			}
442 		}
443 	}
444 	return tcu::TestStatus::pass("Compute succeeded");
445 }
446 
447 class SSBOLocalBarrierTest : public vkt::TestCase
448 {
449 public:
450 						SSBOLocalBarrierTest	(tcu::TestContext&	testCtx,
451 												 const std::string& name,
452 												 const std::string&	description,
453 												 const tcu::IVec3&	localSize,
454 												 const tcu::IVec3&	workSize);
455 
456 	void				initPrograms			(SourceCollections& sourceCollections) const;
457 	TestInstance*		createInstance			(Context&			context) const;
458 
459 private:
460 	const tcu::IVec3	m_localSize;
461 	const tcu::IVec3	m_workSize;
462 };
463 
464 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
465 {
466 public:
467 									SSBOLocalBarrierTestInstance	(Context&			context,
468 																	 const tcu::IVec3&	localSize,
469 																	 const tcu::IVec3&	workSize);
470 
471 	tcu::TestStatus					iterate							(void);
472 
473 private:
474 	const tcu::IVec3				m_localSize;
475 	const tcu::IVec3				m_workSize;
476 };
477 
SSBOLocalBarrierTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)478 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext&	testCtx,
479 											const std::string&	name,
480 											const std::string&	description,
481 											const tcu::IVec3&	localSize,
482 											const tcu::IVec3&	workSize)
483 	: TestCase		(testCtx, name, description)
484 	, m_localSize	(localSize)
485 	, m_workSize	(workSize)
486 {
487 }
488 
initPrograms(SourceCollections & sourceCollections) const489 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
490 {
491 	const int workGroupSize = multiplyComponents(m_localSize);
492 	const int workGroupCount = multiplyComponents(m_workSize);
493 	const int numValues = workGroupSize * workGroupCount;
494 
495 	std::ostringstream src;
496 	src << "#version 310 es\n"
497 		<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
498 		<< "layout(binding = 0) coherent buffer Output {\n"
499 		<< "    uint values[" << numValues << "];\n"
500 		<< "} sb_out;\n\n"
501 		<< "void main (void) {\n"
502 		<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
503 		<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
504 		<< "    uint globalOffs = localSize*globalNdx;\n"
505 		<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
506 		<< "\n"
507 		<< "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
508 		<< "    memoryBarrierBuffer();\n"
509 		<< "    barrier();\n"
510 		<< "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"		// += so we read and write
511 		<< "    memoryBarrierBuffer();\n"
512 		<< "    barrier();\n"
513 		<< "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
514 		<< "}\n";
515 
516 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
517 }
518 
createInstance(Context & context) const519 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
520 {
521 	return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
522 }
523 
SSBOLocalBarrierTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)524 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
525 	: TestInstance	(context)
526 	, m_localSize	(localSize)
527 	, m_workSize	(workSize)
528 {
529 }
530 
iterate(void)531 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
532 {
533 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
534 	const VkDevice			device				= m_context.getDevice();
535 	const VkQueue			queue				= m_context.getUniversalQueue();
536 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
537 	Allocator&				allocator			= m_context.getDefaultAllocator();
538 
539 	const int workGroupSize = multiplyComponents(m_localSize);
540 	const int workGroupCount = multiplyComponents(m_workSize);
541 
542 	// Create a buffer and host-visible memory for it
543 
544 	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
545 	const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
546 
547 	// Create descriptor set
548 
549 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
550 		DescriptorSetLayoutBuilder()
551 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
552 		.build(vk, device));
553 
554 	const Unique<VkDescriptorPool> descriptorPool(
555 		DescriptorPoolBuilder()
556 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
557 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
558 
559 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
560 
561 	const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
562 	DescriptorSetUpdateBuilder()
563 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
564 		.update(vk, device);
565 
566 	// Perform the computation
567 
568 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
569 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
570 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
571 
572 	const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
573 
574 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
575 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
576 
577 	// Start recording commands
578 
579 	beginCommandBuffer(vk, *cmdBuffer);
580 
581 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
582 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
583 
584 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
585 
586 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
587 
588 	endCommandBuffer(vk, *cmdBuffer);
589 
590 	// Wait for completion
591 
592 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
593 
594 	// Validate the results
595 
596 	const Allocation& bufferAllocation = buffer.getAllocation();
597 	invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
598 
599 	const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
600 
601 	for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
602 	{
603 		const int globalOffset = groupNdx * workGroupSize;
604 		for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
605 		{
606 			const deUint32	res		= bufferPtr[globalOffset + localOffset];
607 			const int		offs0	= localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
608 			const int		offs1	= localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
609 			const deUint32	ref		= static_cast<deUint32>(globalOffset + offs0 + offs1);
610 
611 			if (res != ref)
612 			{
613 				std::ostringstream msg;
614 				msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
615 				return tcu::TestStatus::fail(msg.str());
616 			}
617 		}
618 	}
619 	return tcu::TestStatus::pass("Compute succeeded");
620 }
621 
622 class CopyImageToSSBOTest : public vkt::TestCase
623 {
624 public:
625 						CopyImageToSSBOTest		(tcu::TestContext&	testCtx,
626 												 const std::string&	name,
627 												 const std::string&	description,
628 												 const tcu::IVec2&	localSize,
629 												 const tcu::IVec2&	imageSize);
630 
631 	void				initPrograms			(SourceCollections& sourceCollections) const;
632 	TestInstance*		createInstance			(Context&			context) const;
633 
634 private:
635 	const tcu::IVec2	m_localSize;
636 	const tcu::IVec2	m_imageSize;
637 };
638 
639 class CopyImageToSSBOTestInstance : public vkt::TestInstance
640 {
641 public:
642 									CopyImageToSSBOTestInstance		(Context&			context,
643 																	 const tcu::IVec2&	localSize,
644 																	 const tcu::IVec2&	imageSize);
645 
646 	tcu::TestStatus					iterate							(void);
647 
648 private:
649 	const tcu::IVec2				m_localSize;
650 	const tcu::IVec2				m_imageSize;
651 };
652 
CopyImageToSSBOTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)653 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext&		testCtx,
654 										  const std::string&	name,
655 										  const std::string&	description,
656 										  const tcu::IVec2&		localSize,
657 										  const tcu::IVec2&		imageSize)
658 	: TestCase		(testCtx, name, description)
659 	, m_localSize	(localSize)
660 	, m_imageSize	(imageSize)
661 {
662 	DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
663 	DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
664 }
665 
initPrograms(SourceCollections & sourceCollections) const666 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
667 {
668 	std::ostringstream src;
669 	src << "#version 310 es\n"
670 		<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
671 		<< "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
672 		<< "layout(binding = 0) writeonly buffer Output {\n"
673 		<< "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
674 		<< "} sb_out;\n\n"
675 		<< "void main (void) {\n"
676 		<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
677 		<< "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
678 		<< "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
679 		<< "}\n";
680 
681 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
682 }
683 
createInstance(Context & context) const684 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
685 {
686 	return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
687 }
688 
CopyImageToSSBOTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)689 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
690 	: TestInstance	(context)
691 	, m_localSize	(localSize)
692 	, m_imageSize	(imageSize)
693 {
694 }
695 
iterate(void)696 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
697 {
698 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
699 	const VkDevice			device				= m_context.getDevice();
700 	const VkQueue			queue				= m_context.getUniversalQueue();
701 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
702 	Allocator&				allocator			= m_context.getDefaultAllocator();
703 
704 	// Create an image
705 
706 	const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
707 	const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
708 
709 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
710 	const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
711 
712 	// Staging buffer (source data for image)
713 
714 	const deUint32 imageArea = multiplyComponents(m_imageSize);
715 	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
716 
717 	const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
718 
719 	// Populate the staging buffer with test data
720 	{
721 		de::Random rnd(0xab2c7);
722 		const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
723 		deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
724 		for (deUint32 i = 0; i < imageArea; ++i)
725 			*bufferPtr++ = rnd.getUint32();
726 
727 		flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
728 	}
729 
730 	// Create a buffer to store shader output
731 
732 	const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
733 
734 	// Create descriptor set
735 
736 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
737 		DescriptorSetLayoutBuilder()
738 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
739 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
740 		.build(vk, device));
741 
742 	const Unique<VkDescriptorPool> descriptorPool(
743 		DescriptorPoolBuilder()
744 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
745 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
746 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
747 
748 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
749 
750 	// Set the bindings
751 
752 	const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
753 	const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
754 
755 	DescriptorSetUpdateBuilder()
756 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
757 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
758 		.update(vk, device);
759 
760 	// Perform the computation
761 	{
762 		const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
763 		const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
764 		const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
765 
766 		const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
767 
768 		const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
769 			0u, VK_ACCESS_TRANSFER_WRITE_BIT,
770 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
771 			*image, subresourceRange);
772 
773 		const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
774 			VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
775 			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
776 			*image, subresourceRange);
777 
778 		const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
779 
780 		const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
781 		const tcu::IVec2 workSize = m_imageSize / m_localSize;
782 
783 		// Prepare the command buffer
784 
785 		const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
786 		const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
787 
788 		// Start recording commands
789 
790 		beginCommandBuffer(vk, *cmdBuffer);
791 
792 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
793 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
794 
795 		vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &stagingBufferPostHostWriteBarrier, 1, &imagePreCopyBarrier);
796 		vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &copyParams);
797 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePostCopyBarrier);
798 
799 		vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
800 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
801 
802 		endCommandBuffer(vk, *cmdBuffer);
803 
804 		// Wait for completion
805 
806 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
807 	}
808 
809 	// Validate the results
810 
811 	const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
812 	invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
813 
814 	const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
815 	const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
816 
817 	for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
818 	{
819 		const deUint32 res = *(bufferPtr + ndx);
820 		const deUint32 ref = *(refBufferPtr + ndx);
821 
822 		if (res != ref)
823 		{
824 			std::ostringstream msg;
825 			msg << "Comparison failed for Output.values[" << ndx << "]";
826 			return tcu::TestStatus::fail(msg.str());
827 		}
828 	}
829 	return tcu::TestStatus::pass("Compute succeeded");
830 }
831 
832 class CopySSBOToImageTest : public vkt::TestCase
833 {
834 public:
835 						CopySSBOToImageTest	(tcu::TestContext&	testCtx,
836 											 const std::string&	name,
837 											 const std::string&	description,
838 											 const tcu::IVec2&	localSize,
839 											 const tcu::IVec2&	imageSize);
840 
841 	void				initPrograms		(SourceCollections& sourceCollections) const;
842 	TestInstance*		createInstance		(Context&			context) const;
843 
844 private:
845 	const tcu::IVec2	m_localSize;
846 	const tcu::IVec2	m_imageSize;
847 };
848 
849 class CopySSBOToImageTestInstance : public vkt::TestInstance
850 {
851 public:
852 									CopySSBOToImageTestInstance	(Context&			context,
853 																 const tcu::IVec2&	localSize,
854 																 const tcu::IVec2&	imageSize);
855 
856 	tcu::TestStatus					iterate						(void);
857 
858 private:
859 	const tcu::IVec2				m_localSize;
860 	const tcu::IVec2				m_imageSize;
861 };
862 
CopySSBOToImageTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)863 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext&		testCtx,
864 										  const std::string&	name,
865 										  const std::string&	description,
866 										  const tcu::IVec2&		localSize,
867 										  const tcu::IVec2&		imageSize)
868 	: TestCase		(testCtx, name, description)
869 	, m_localSize	(localSize)
870 	, m_imageSize	(imageSize)
871 {
872 	DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
873 	DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
874 }
875 
initPrograms(SourceCollections & sourceCollections) const876 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
877 {
878 	std::ostringstream src;
879 	src << "#version 310 es\n"
880 		<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
881 		<< "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
882 		<< "layout(binding = 0) readonly buffer Input {\n"
883 		<< "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
884 		<< "} sb_in;\n\n"
885 		<< "void main (void) {\n"
886 		<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
887 		<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
888 		<< "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
889 		<< "}\n";
890 
891 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
892 }
893 
createInstance(Context & context) const894 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
895 {
896 	return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
897 }
898 
CopySSBOToImageTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)899 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
900 	: TestInstance	(context)
901 	, m_localSize	(localSize)
902 	, m_imageSize	(imageSize)
903 {
904 }
905 
iterate(void)906 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
907 {
908 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
909 	const VkDevice			device				= m_context.getDevice();
910 	const VkQueue			queue				= m_context.getUniversalQueue();
911 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
912 	Allocator&				allocator			= m_context.getDefaultAllocator();
913 
914 	// Create an image
915 
916 	const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
917 	const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
918 
919 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
920 	const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
921 
922 	// Create an input buffer (data to be read in the shader)
923 
924 	const deUint32 imageArea = multiplyComponents(m_imageSize);
925 	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
926 
927 	const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
928 
929 	// Populate the buffer with test data
930 	{
931 		de::Random rnd(0x77238ac2);
932 		const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
933 		deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
934 		for (deUint32 i = 0; i < imageArea; ++i)
935 			*bufferPtr++ = rnd.getUint32();
936 
937 		flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
938 	}
939 
940 	// Create a buffer to store shader output (copied from image data)
941 
942 	const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
943 
944 	// Create descriptor set
945 
946 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
947 		DescriptorSetLayoutBuilder()
948 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
949 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
950 		.build(vk, device));
951 
952 	const Unique<VkDescriptorPool> descriptorPool(
953 		DescriptorPoolBuilder()
954 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
955 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
956 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
957 
958 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
959 
960 	// Set the bindings
961 
962 	const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
963 	const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
964 
965 	DescriptorSetUpdateBuilder()
966 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
967 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
968 		.update(vk, device);
969 
970 	// Perform the computation
971 	{
972 		const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
973 		const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
974 		const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
975 
976 		const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
977 
978 		const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
979 			0u, 0u,
980 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
981 			*image, subresourceRange);
982 
983 		const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
984 			VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
985 			VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
986 			*image, subresourceRange);
987 
988 		const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
989 
990 		const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
991 		const tcu::IVec2 workSize = m_imageSize / m_localSize;
992 
993 		// Prepare the command buffer
994 
995 		const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
996 		const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
997 
998 		// Start recording commands
999 
1000 		beginCommandBuffer(vk, *cmdBuffer);
1001 
1002 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1003 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1004 
1005 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1006 		vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1007 
1008 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
1009 		vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
1010 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1011 
1012 		endCommandBuffer(vk, *cmdBuffer);
1013 
1014 		// Wait for completion
1015 
1016 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1017 	}
1018 
1019 	// Validate the results
1020 
1021 	const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1022 	invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1023 
1024 	const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1025 	const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1026 
1027 	for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1028 	{
1029 		const deUint32 res = *(bufferPtr + ndx);
1030 		const deUint32 ref = *(refBufferPtr + ndx);
1031 
1032 		if (res != ref)
1033 		{
1034 			std::ostringstream msg;
1035 			msg << "Comparison failed for pixel " << ndx;
1036 			return tcu::TestStatus::fail(msg.str());
1037 		}
1038 	}
1039 	return tcu::TestStatus::pass("Compute succeeded");
1040 }
1041 
1042 class BufferToBufferInvertTest : public vkt::TestCase
1043 {
1044 public:
1045 	void								initPrograms				(SourceCollections&	sourceCollections) const;
1046 	TestInstance*						createInstance				(Context&			context) const;
1047 
1048 	static BufferToBufferInvertTest*	UBOToSSBOInvertCase			(tcu::TestContext&	testCtx,
1049 																	 const std::string& name,
1050 																	 const std::string& description,
1051 																	 const deUint32		numValues,
1052 																	 const tcu::IVec3&	localSize,
1053 																	 const tcu::IVec3&	workSize);
1054 
1055 	static BufferToBufferInvertTest*	CopyInvertSSBOCase			(tcu::TestContext&	testCtx,
1056 																	 const std::string& name,
1057 																	 const std::string& description,
1058 																	 const deUint32		numValues,
1059 																	 const tcu::IVec3&	localSize,
1060 																	 const tcu::IVec3&	workSize);
1061 
1062 private:
1063 										BufferToBufferInvertTest	(tcu::TestContext&	testCtx,
1064 																	 const std::string& name,
1065 																	 const std::string& description,
1066 																	 const deUint32		numValues,
1067 																	 const tcu::IVec3&	localSize,
1068 																	 const tcu::IVec3&	workSize,
1069 																	 const BufferType	bufferType);
1070 
1071 	const BufferType					m_bufferType;
1072 	const deUint32						m_numValues;
1073 	const tcu::IVec3					m_localSize;
1074 	const tcu::IVec3					m_workSize;
1075 };
1076 
1077 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1078 {
1079 public:
1080 									BufferToBufferInvertTestInstance	(Context&			context,
1081 																		 const deUint32		numValues,
1082 																		 const tcu::IVec3&	localSize,
1083 																		 const tcu::IVec3&	workSize,
1084 																		 const BufferType	bufferType);
1085 
1086 	tcu::TestStatus					iterate								(void);
1087 
1088 private:
1089 	const BufferType				m_bufferType;
1090 	const deUint32					m_numValues;
1091 	const tcu::IVec3				m_localSize;
1092 	const tcu::IVec3				m_workSize;
1093 };
1094 
BufferToBufferInvertTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType)1095 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext&	testCtx,
1096 													const std::string&	name,
1097 													const std::string&	description,
1098 													const deUint32		numValues,
1099 													const tcu::IVec3&	localSize,
1100 													const tcu::IVec3&	workSize,
1101 													const BufferType	bufferType)
1102 	: TestCase		(testCtx, name, description)
1103 	, m_bufferType	(bufferType)
1104 	, m_numValues	(numValues)
1105 	, m_localSize	(localSize)
1106 	, m_workSize	(workSize)
1107 {
1108 	DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1109 	DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1110 }
1111 
UBOToSSBOInvertCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1112 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext&	testCtx,
1113 																		 const std::string&	name,
1114 																		 const std::string&	description,
1115 																		 const deUint32		numValues,
1116 																		 const tcu::IVec3&	localSize,
1117 																		 const tcu::IVec3&	workSize)
1118 {
1119 	return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1120 }
1121 
CopyInvertSSBOCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1122 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext&	testCtx,
1123 																		const std::string&	name,
1124 																		const std::string&	description,
1125 																		const deUint32		numValues,
1126 																		const tcu::IVec3&	localSize,
1127 																		const tcu::IVec3&	workSize)
1128 {
1129 	return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1130 }
1131 
initPrograms(SourceCollections & sourceCollections) const1132 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1133 {
1134 	std::ostringstream src;
1135 	if (m_bufferType == BUFFER_TYPE_UNIFORM)
1136 	{
1137 		src << "#version 310 es\n"
1138 			<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1139 			<< "layout(binding = 0) readonly uniform Input {\n"
1140 			<< "    uint values[" << m_numValues << "];\n"
1141 			<< "} ub_in;\n"
1142 			<< "layout(binding = 1, std140) writeonly buffer Output {\n"
1143 			<< "    uint values[" << m_numValues << "];\n"
1144 			<< "} sb_out;\n"
1145 			<< "void main (void) {\n"
1146 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1147 			<< "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1148 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1149 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
1150 			<< "\n"
1151 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1152 			<< "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1153 			<< "}\n";
1154 	}
1155 	else if (m_bufferType == BUFFER_TYPE_SSBO)
1156 	{
1157 		src << "#version 310 es\n"
1158 			<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1159 			<< "layout(binding = 0, std140) readonly buffer Input {\n"
1160 			<< "    uint values[" << m_numValues << "];\n"
1161 			<< "} sb_in;\n"
1162 			<< "layout (binding = 1, std140) writeonly buffer Output {\n"
1163 			<< "    uint values[" << m_numValues << "];\n"
1164 			<< "} sb_out;\n"
1165 			<< "void main (void) {\n"
1166 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1167 			<< "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1168 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1169 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
1170 			<< "\n"
1171 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1172 			<< "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1173 			<< "}\n";
1174 	}
1175 
1176 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1177 }
1178 
createInstance(Context & context) const1179 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1180 {
1181 	return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1182 }
1183 
BufferToBufferInvertTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType)1184 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context&			context,
1185 																	const deUint32		numValues,
1186 																	const tcu::IVec3&	localSize,
1187 																	const tcu::IVec3&	workSize,
1188 																	const BufferType	bufferType)
1189 	: TestInstance	(context)
1190 	, m_bufferType	(bufferType)
1191 	, m_numValues	(numValues)
1192 	, m_localSize	(localSize)
1193 	, m_workSize	(workSize)
1194 {
1195 }
1196 
iterate(void)1197 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1198 {
1199 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1200 	const VkDevice			device				= m_context.getDevice();
1201 	const VkQueue			queue				= m_context.getUniversalQueue();
1202 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1203 	Allocator&				allocator			= m_context.getDefaultAllocator();
1204 
1205 	// Customize the test based on buffer type
1206 
1207 	const VkBufferUsageFlags inputBufferUsageFlags		= (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1208 	const VkDescriptorType inputBufferDescriptorType	= (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1209 	const deUint32 randomSeed							= (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1210 
1211 	// Create an input buffer
1212 
1213 	const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1214 	const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1215 
1216 	// Fill the input buffer with data
1217 	{
1218 		de::Random rnd(randomSeed);
1219 		const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1220 		tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1221 		for (deUint32 i = 0; i < m_numValues; ++i)
1222 			bufferPtr[i].x() = rnd.getUint32();
1223 
1224 		flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1225 	}
1226 
1227 	// Create an output buffer
1228 
1229 	const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1230 
1231 	// Create descriptor set
1232 
1233 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1234 		DescriptorSetLayoutBuilder()
1235 		.addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1236 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1237 		.build(vk, device));
1238 
1239 	const Unique<VkDescriptorPool> descriptorPool(
1240 		DescriptorPoolBuilder()
1241 		.addType(inputBufferDescriptorType)
1242 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1243 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1244 
1245 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1246 
1247 	const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1248 	const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1249 	DescriptorSetUpdateBuilder()
1250 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1251 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1252 		.update(vk, device);
1253 
1254 	// Perform the computation
1255 
1256 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1257 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1258 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1259 
1260 	const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1261 
1262 	const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1263 
1264 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1265 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1266 
1267 	// Start recording commands
1268 
1269 	beginCommandBuffer(vk, *cmdBuffer);
1270 
1271 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1272 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1273 
1274 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1275 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1276 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1277 
1278 	endCommandBuffer(vk, *cmdBuffer);
1279 
1280 	// Wait for completion
1281 
1282 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1283 
1284 	// Validate the results
1285 
1286 	const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1287 	invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1288 
1289 	const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1290 	const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1291 
1292 	for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1293 	{
1294 		const deUint32 res = bufferPtr[ndx].x();
1295 		const deUint32 ref = ~refBufferPtr[ndx].x();
1296 
1297 		if (res != ref)
1298 		{
1299 			std::ostringstream msg;
1300 			msg << "Comparison failed for Output.values[" << ndx << "]";
1301 			return tcu::TestStatus::fail(msg.str());
1302 		}
1303 	}
1304 	return tcu::TestStatus::pass("Compute succeeded");
1305 }
1306 
1307 class InvertSSBOInPlaceTest : public vkt::TestCase
1308 {
1309 public:
1310 						InvertSSBOInPlaceTest	(tcu::TestContext&	testCtx,
1311 												 const std::string&	name,
1312 												 const std::string&	description,
1313 												 const deUint32		numValues,
1314 												 const bool			sized,
1315 												 const tcu::IVec3&	localSize,
1316 												 const tcu::IVec3&	workSize);
1317 
1318 
1319 	void				initPrograms			(SourceCollections& sourceCollections) const;
1320 	TestInstance*		createInstance			(Context&			context) const;
1321 
1322 private:
1323 	const deUint32		m_numValues;
1324 	const bool			m_sized;
1325 	const tcu::IVec3	m_localSize;
1326 	const tcu::IVec3	m_workSize;
1327 };
1328 
1329 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1330 {
1331 public:
1332 									InvertSSBOInPlaceTestInstance	(Context&			context,
1333 																	 const deUint32		numValues,
1334 																	 const tcu::IVec3&	localSize,
1335 																	 const tcu::IVec3&	workSize);
1336 
1337 	tcu::TestStatus					iterate							(void);
1338 
1339 private:
1340 	const deUint32					m_numValues;
1341 	const tcu::IVec3				m_localSize;
1342 	const tcu::IVec3				m_workSize;
1343 };
1344 
InvertSSBOInPlaceTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1345 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext&		testCtx,
1346 											  const std::string&	name,
1347 											  const std::string&	description,
1348 											  const deUint32		numValues,
1349 											  const bool			sized,
1350 											  const tcu::IVec3&		localSize,
1351 											  const tcu::IVec3&		workSize)
1352 	: TestCase		(testCtx, name, description)
1353 	, m_numValues	(numValues)
1354 	, m_sized		(sized)
1355 	, m_localSize	(localSize)
1356 	, m_workSize	(workSize)
1357 {
1358 	DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1359 }
1360 
initPrograms(SourceCollections & sourceCollections) const1361 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1362 {
1363 	std::ostringstream src;
1364 	src << "#version 310 es\n"
1365 		<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1366 		<< "layout(binding = 0) buffer InOut {\n"
1367 		<< "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1368 		<< "} sb_inout;\n"
1369 		<< "void main (void) {\n"
1370 		<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1371 		<< "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1372 		<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1373 		<< "    uint offset          = numValuesPerInv*groupNdx;\n"
1374 		<< "\n"
1375 		<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1376 		<< "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1377 		<< "}\n";
1378 
1379 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1380 }
1381 
createInstance(Context & context) const1382 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1383 {
1384 	return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1385 }
1386 
InvertSSBOInPlaceTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1387 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context&			context,
1388 															  const deUint32	numValues,
1389 															  const tcu::IVec3&	localSize,
1390 															  const tcu::IVec3&	workSize)
1391 	: TestInstance	(context)
1392 	, m_numValues	(numValues)
1393 	, m_localSize	(localSize)
1394 	, m_workSize	(workSize)
1395 {
1396 }
1397 
iterate(void)1398 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1399 {
1400 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1401 	const VkDevice			device				= m_context.getDevice();
1402 	const VkQueue			queue				= m_context.getUniversalQueue();
1403 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1404 	Allocator&				allocator			= m_context.getDefaultAllocator();
1405 
1406 	// Create an input/output buffer
1407 
1408 	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1409 	const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1410 
1411 	// Fill the buffer with data
1412 
1413 	typedef std::vector<deUint32> data_vector_t;
1414 	data_vector_t inputData(m_numValues);
1415 
1416 	{
1417 		de::Random rnd(0x82ce7f);
1418 		const Allocation& bufferAllocation = buffer.getAllocation();
1419 		deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1420 		for (deUint32 i = 0; i < m_numValues; ++i)
1421 			inputData[i] = *bufferPtr++ = rnd.getUint32();
1422 
1423 		flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1424 	}
1425 
1426 	// Create descriptor set
1427 
1428 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1429 		DescriptorSetLayoutBuilder()
1430 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1431 		.build(vk, device));
1432 
1433 	const Unique<VkDescriptorPool> descriptorPool(
1434 		DescriptorPoolBuilder()
1435 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1436 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1437 
1438 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1439 
1440 	const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1441 	DescriptorSetUpdateBuilder()
1442 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1443 		.update(vk, device);
1444 
1445 	// Perform the computation
1446 
1447 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1448 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1449 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1450 
1451 	const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1452 
1453 	const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1454 
1455 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1456 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1457 
1458 	// Start recording commands
1459 
1460 	beginCommandBuffer(vk, *cmdBuffer);
1461 
1462 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1463 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1464 
1465 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1466 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1467 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1468 
1469 	endCommandBuffer(vk, *cmdBuffer);
1470 
1471 	// Wait for completion
1472 
1473 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1474 
1475 	// Validate the results
1476 
1477 	const Allocation& bufferAllocation = buffer.getAllocation();
1478 	invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1479 
1480 	const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1481 
1482 	for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1483 	{
1484 		const deUint32 res = bufferPtr[ndx];
1485 		const deUint32 ref = ~inputData[ndx];
1486 
1487 		if (res != ref)
1488 		{
1489 			std::ostringstream msg;
1490 			msg << "Comparison failed for InOut.values[" << ndx << "]";
1491 			return tcu::TestStatus::fail(msg.str());
1492 		}
1493 	}
1494 	return tcu::TestStatus::pass("Compute succeeded");
1495 }
1496 
1497 class WriteToMultipleSSBOTest : public vkt::TestCase
1498 {
1499 public:
1500 						WriteToMultipleSSBOTest	(tcu::TestContext&	testCtx,
1501 												 const std::string&	name,
1502 												 const std::string&	description,
1503 												 const deUint32		numValues,
1504 												 const bool			sized,
1505 												 const tcu::IVec3&	localSize,
1506 												 const tcu::IVec3&	workSize);
1507 
1508 	void				initPrograms			(SourceCollections& sourceCollections) const;
1509 	TestInstance*		createInstance			(Context&			context) const;
1510 
1511 private:
1512 	const deUint32		m_numValues;
1513 	const bool			m_sized;
1514 	const tcu::IVec3	m_localSize;
1515 	const tcu::IVec3	m_workSize;
1516 };
1517 
1518 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1519 {
1520 public:
1521 									WriteToMultipleSSBOTestInstance	(Context&			context,
1522 																	 const deUint32		numValues,
1523 																	 const tcu::IVec3&	localSize,
1524 																	 const tcu::IVec3&	workSize);
1525 
1526 	tcu::TestStatus					iterate							(void);
1527 
1528 private:
1529 	const deUint32					m_numValues;
1530 	const tcu::IVec3				m_localSize;
1531 	const tcu::IVec3				m_workSize;
1532 };
1533 
WriteToMultipleSSBOTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1534 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext&		testCtx,
1535 												  const std::string&	name,
1536 												  const std::string&	description,
1537 												  const deUint32		numValues,
1538 												  const bool			sized,
1539 												  const tcu::IVec3&		localSize,
1540 												  const tcu::IVec3&		workSize)
1541 	: TestCase		(testCtx, name, description)
1542 	, m_numValues	(numValues)
1543 	, m_sized		(sized)
1544 	, m_localSize	(localSize)
1545 	, m_workSize	(workSize)
1546 {
1547 	DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1548 }
1549 
initPrograms(SourceCollections & sourceCollections) const1550 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1551 {
1552 	std::ostringstream src;
1553 	src << "#version 310 es\n"
1554 		<< "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1555 		<< "layout(binding = 0) writeonly buffer Out0 {\n"
1556 		<< "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1557 		<< "} sb_out0;\n"
1558 		<< "layout(binding = 1) writeonly buffer Out1 {\n"
1559 		<< "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1560 		<< "} sb_out1;\n"
1561 		<< "void main (void) {\n"
1562 		<< "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1563 		<< "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1564 		<< "\n"
1565 		<< "    {\n"
1566 		<< "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1567 		<< "        uint offset          = numValuesPerInv*groupNdx;\n"
1568 		<< "\n"
1569 		<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1570 		<< "            sb_out0.values[offset + ndx] = offset + ndx;\n"
1571 		<< "    }\n"
1572 		<< "    {\n"
1573 		<< "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1574 		<< "        uint offset          = numValuesPerInv*groupNdx;\n"
1575 		<< "\n"
1576 		<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1577 		<< "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1578 		<< "    }\n"
1579 		<< "}\n";
1580 
1581 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1582 }
1583 
createInstance(Context & context) const1584 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1585 {
1586 	return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1587 }
1588 
WriteToMultipleSSBOTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1589 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context&			context,
1590 																  const deUint32	numValues,
1591 																  const tcu::IVec3&	localSize,
1592 																  const tcu::IVec3&	workSize)
1593 	: TestInstance	(context)
1594 	, m_numValues	(numValues)
1595 	, m_localSize	(localSize)
1596 	, m_workSize	(workSize)
1597 {
1598 }
1599 
iterate(void)1600 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1601 {
1602 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1603 	const VkDevice			device				= m_context.getDevice();
1604 	const VkQueue			queue				= m_context.getUniversalQueue();
1605 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1606 	Allocator&				allocator			= m_context.getDefaultAllocator();
1607 
1608 	// Create two output buffers
1609 
1610 	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1611 	const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1612 	const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1613 
1614 	// Create descriptor set
1615 
1616 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1617 		DescriptorSetLayoutBuilder()
1618 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1619 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1620 		.build(vk, device));
1621 
1622 	const Unique<VkDescriptorPool> descriptorPool(
1623 		DescriptorPoolBuilder()
1624 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1625 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1626 
1627 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1628 
1629 	const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1630 	const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1631 	DescriptorSetUpdateBuilder()
1632 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1633 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1634 		.update(vk, device);
1635 
1636 	// Perform the computation
1637 
1638 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1639 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1640 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1641 
1642 	const VkBufferMemoryBarrier shaderWriteBarriers[] =
1643 	{
1644 		makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1645 		makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1646 	};
1647 
1648 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1649 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1650 
1651 	// Start recording commands
1652 
1653 	beginCommandBuffer(vk, *cmdBuffer);
1654 
1655 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1656 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1657 
1658 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1659 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1660 
1661 	endCommandBuffer(vk, *cmdBuffer);
1662 
1663 	// Wait for completion
1664 
1665 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1666 
1667 	// Validate the results
1668 	{
1669 		const Allocation& buffer0Allocation = buffer0.getAllocation();
1670 		invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1671 		const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1672 
1673 		for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1674 		{
1675 			const deUint32 res = buffer0Ptr[ndx];
1676 			const deUint32 ref = ndx;
1677 
1678 			if (res != ref)
1679 			{
1680 				std::ostringstream msg;
1681 				msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1682 				return tcu::TestStatus::fail(msg.str());
1683 			}
1684 		}
1685 	}
1686 	{
1687 		const Allocation& buffer1Allocation = buffer1.getAllocation();
1688 		invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1689 		const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1690 
1691 		for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1692 		{
1693 			const deUint32 res = buffer1Ptr[ndx];
1694 			const deUint32 ref = m_numValues - ndx;
1695 
1696 			if (res != ref)
1697 			{
1698 				std::ostringstream msg;
1699 				msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1700 				return tcu::TestStatus::fail(msg.str());
1701 			}
1702 		}
1703 	}
1704 	return tcu::TestStatus::pass("Compute succeeded");
1705 }
1706 
1707 class SSBOBarrierTest : public vkt::TestCase
1708 {
1709 public:
1710 						SSBOBarrierTest		(tcu::TestContext&	testCtx,
1711 											 const std::string&	name,
1712 											 const std::string&	description,
1713 											 const tcu::IVec3&	workSize);
1714 
1715 	void				initPrograms		(SourceCollections& sourceCollections) const;
1716 	TestInstance*		createInstance		(Context&			context) const;
1717 
1718 private:
1719 	const tcu::IVec3	m_workSize;
1720 };
1721 
1722 class SSBOBarrierTestInstance : public vkt::TestInstance
1723 {
1724 public:
1725 									SSBOBarrierTestInstance		(Context&			context,
1726 																 const tcu::IVec3&	workSize);
1727 
1728 	tcu::TestStatus					iterate						(void);
1729 
1730 private:
1731 	const tcu::IVec3				m_workSize;
1732 };
1733 
SSBOBarrierTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec3 & workSize)1734 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext&		testCtx,
1735 								  const std::string&	name,
1736 								  const std::string&	description,
1737 								  const tcu::IVec3&		workSize)
1738 	: TestCase		(testCtx, name, description)
1739 	, m_workSize	(workSize)
1740 {
1741 }
1742 
initPrograms(SourceCollections & sourceCollections) const1743 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1744 {
1745 	sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1746 		"#version 310 es\n"
1747 		"layout (local_size_x = 1) in;\n"
1748 		"layout(binding = 2) readonly uniform Constants {\n"
1749 		"    uint u_baseVal;\n"
1750 		"};\n"
1751 		"layout(binding = 1) writeonly buffer Output {\n"
1752 		"    uint values[];\n"
1753 		"};\n"
1754 		"void main (void) {\n"
1755 		"    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1756 		"    values[offset] = u_baseVal + offset;\n"
1757 		"}\n");
1758 
1759 	sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1760 		"#version 310 es\n"
1761 		"layout (local_size_x = 1) in;\n"
1762 		"layout(binding = 1) readonly buffer Input {\n"
1763 		"    uint values[];\n"
1764 		"};\n"
1765 		"layout(binding = 0) coherent buffer Output {\n"
1766 		"    uint sum;\n"
1767 		"};\n"
1768 		"void main (void) {\n"
1769 		"    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1770 		"    uint value  = values[offset];\n"
1771 		"    atomicAdd(sum, value);\n"
1772 		"}\n");
1773 }
1774 
createInstance(Context & context) const1775 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1776 {
1777 	return new SSBOBarrierTestInstance(context, m_workSize);
1778 }
1779 
SSBOBarrierTestInstance(Context & context,const tcu::IVec3 & workSize)1780 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1781 	: TestInstance	(context)
1782 	, m_workSize	(workSize)
1783 {
1784 }
1785 
iterate(void)1786 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1787 {
1788 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1789 	const VkDevice			device				= m_context.getDevice();
1790 	const VkQueue			queue				= m_context.getUniversalQueue();
1791 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1792 	Allocator&				allocator			= m_context.getDefaultAllocator();
1793 
1794 	// Create a work buffer used by both shaders
1795 
1796 	const int workGroupCount = multiplyComponents(m_workSize);
1797 	const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1798 	const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1799 
1800 	// Create an output buffer
1801 
1802 	const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1803 	const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1804 
1805 	// Initialize atomic counter value to zero
1806 	{
1807 		const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1808 		deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1809 		*outputBufferPtr = 0;
1810 		flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1811 	}
1812 
1813 	// Create a uniform buffer (to pass uniform constants)
1814 
1815 	const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1816 	const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1817 
1818 	// Set the constants in the uniform buffer
1819 
1820 	const deUint32	baseValue = 127;
1821 	{
1822 		const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1823 		deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1824 		uniformBufferPtr[0] = baseValue;
1825 
1826 		flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1827 	}
1828 
1829 	// Create descriptor set
1830 
1831 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1832 		DescriptorSetLayoutBuilder()
1833 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1834 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1835 		.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1836 		.build(vk, device));
1837 
1838 	const Unique<VkDescriptorPool> descriptorPool(
1839 		DescriptorPoolBuilder()
1840 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1841 		.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1842 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1843 
1844 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1845 
1846 	const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1847 	const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1848 	const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1849 	DescriptorSetUpdateBuilder()
1850 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1851 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1852 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1853 		.update(vk, device);
1854 
1855 	// Perform the computation
1856 
1857 	const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1858 	const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1859 
1860 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1861 	const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1862 	const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1863 
1864 	const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1865 
1866 	const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1867 
1868 	const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1869 
1870 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1871 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1872 
1873 	// Start recording commands
1874 
1875 	beginCommandBuffer(vk, *cmdBuffer);
1876 
1877 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1878 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1879 
1880 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1881 
1882 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1883 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1884 
1885 	// Switch to the second shader program
1886 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1887 
1888 	vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1889 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1890 
1891 	endCommandBuffer(vk, *cmdBuffer);
1892 
1893 	// Wait for completion
1894 
1895 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1896 
1897 	// Validate the results
1898 
1899 	const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1900 	invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1901 
1902 	const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1903 	const deUint32	res = *bufferPtr;
1904 	deUint32		ref = 0;
1905 
1906 	for (int ndx = 0; ndx < workGroupCount; ++ndx)
1907 		ref += baseValue + ndx;
1908 
1909 	if (res != ref)
1910 	{
1911 		std::ostringstream msg;
1912 		msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1913 		return tcu::TestStatus::fail(msg.str());
1914 	}
1915 	return tcu::TestStatus::pass("Compute succeeded");
1916 }
1917 
1918 class ImageAtomicOpTest : public vkt::TestCase
1919 {
1920 public:
1921 						ImageAtomicOpTest		(tcu::TestContext&	testCtx,
1922 												 const std::string& name,
1923 												 const std::string& description,
1924 												 const deUint32		localSize,
1925 												 const tcu::IVec2&	imageSize);
1926 
1927 	void				initPrograms			(SourceCollections& sourceCollections) const;
1928 	TestInstance*		createInstance			(Context&			context) const;
1929 
1930 private:
1931 	const deUint32		m_localSize;
1932 	const tcu::IVec2	m_imageSize;
1933 };
1934 
1935 class ImageAtomicOpTestInstance : public vkt::TestInstance
1936 {
1937 public:
1938 									ImageAtomicOpTestInstance		(Context&			context,
1939 																	 const deUint32		localSize,
1940 																	 const tcu::IVec2&	imageSize);
1941 
1942 	tcu::TestStatus					iterate							(void);
1943 
1944 private:
1945 	const deUint32					m_localSize;
1946 	const tcu::IVec2				m_imageSize;
1947 };
1948 
ImageAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 localSize,const tcu::IVec2 & imageSize)1949 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext&		testCtx,
1950 									  const std::string&	name,
1951 									  const std::string&	description,
1952 									  const deUint32		localSize,
1953 									  const tcu::IVec2&		imageSize)
1954 	: TestCase		(testCtx, name, description)
1955 	, m_localSize	(localSize)
1956 	, m_imageSize	(imageSize)
1957 {
1958 }
1959 
initPrograms(SourceCollections & sourceCollections) const1960 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1961 {
1962 	std::ostringstream src;
1963 	src << "#version 310 es\n"
1964 		<< "#extension GL_OES_shader_image_atomic : require\n"
1965 		<< "layout (local_size_x = " << m_localSize << ") in;\n"
1966 		<< "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1967 		<< "layout(binding = 0) readonly buffer Input {\n"
1968 		<< "    uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1969 		<< "} sb_in;\n\n"
1970 		<< "void main (void) {\n"
1971 		<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1972 		<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1973 		<< "\n"
1974 		<< "    if (gl_LocalInvocationIndex == 0u)\n"
1975 		<< "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1976 		<< "    memoryBarrierImage();\n"
1977 		<< "    barrier();\n"
1978 		<< "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1979 		<< "}\n";
1980 
1981 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1982 }
1983 
createInstance(Context & context) const1984 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
1985 {
1986 	return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
1987 }
1988 
ImageAtomicOpTestInstance(Context & context,const deUint32 localSize,const tcu::IVec2 & imageSize)1989 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
1990 	: TestInstance	(context)
1991 	, m_localSize	(localSize)
1992 	, m_imageSize	(imageSize)
1993 {
1994 }
1995 
iterate(void)1996 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
1997 {
1998 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1999 	const VkDevice			device				= m_context.getDevice();
2000 	const VkQueue			queue				= m_context.getUniversalQueue();
2001 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2002 	Allocator&				allocator			= m_context.getDefaultAllocator();
2003 
2004 	// Create an image
2005 
2006 	const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2007 	const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2008 
2009 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2010 	const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2011 
2012 	// Input buffer
2013 
2014 	const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2015 	const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2016 
2017 	const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2018 
2019 	// Populate the input buffer with test data
2020 	{
2021 		de::Random rnd(0x77238ac2);
2022 		const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2023 		deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2024 		for (deUint32 i = 0; i < numInputValues; ++i)
2025 			*bufferPtr++ = rnd.getUint32();
2026 
2027 		flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2028 	}
2029 
2030 	// Create a buffer to store shader output (copied from image data)
2031 
2032 	const deUint32 imageArea = multiplyComponents(m_imageSize);
2033 	const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2034 	const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2035 
2036 	// Create descriptor set
2037 
2038 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2039 		DescriptorSetLayoutBuilder()
2040 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2041 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2042 		.build(vk, device));
2043 
2044 	const Unique<VkDescriptorPool> descriptorPool(
2045 		DescriptorPoolBuilder()
2046 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2047 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2048 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2049 
2050 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2051 
2052 	// Set the bindings
2053 
2054 	const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2055 	const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2056 
2057 	DescriptorSetUpdateBuilder()
2058 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2059 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2060 		.update(vk, device);
2061 
2062 	// Perform the computation
2063 	{
2064 		const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2065 		const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2066 		const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2067 
2068 		const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2069 
2070 		const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2071 			(VkAccessFlags)0, VK_ACCESS_SHADER_WRITE_BIT,
2072 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2073 			*image, subresourceRange);
2074 
2075 		const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2076 			VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2077 			VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2078 			*image, subresourceRange);
2079 
2080 		const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2081 
2082 		const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2083 
2084 		// Prepare the command buffer
2085 
2086 		const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2087 		const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2088 
2089 		// Start recording commands
2090 
2091 		beginCommandBuffer(vk, *cmdBuffer);
2092 
2093 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2094 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2095 
2096 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
2097 		vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2098 
2099 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
2100 		vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
2101 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2102 
2103 		endCommandBuffer(vk, *cmdBuffer);
2104 
2105 		// Wait for completion
2106 
2107 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2108 	}
2109 
2110 	// Validate the results
2111 
2112 	const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2113 	invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2114 
2115 	const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2116 	const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2117 
2118 	for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2119 	{
2120 		const deUint32	res = bufferPtr[pixelNdx];
2121 		deUint32		ref = 0;
2122 
2123 		for (deUint32 offs = 0; offs < m_localSize; ++offs)
2124 			ref += refBufferPtr[pixelNdx * m_localSize + offs];
2125 
2126 		if (res != ref)
2127 		{
2128 			std::ostringstream msg;
2129 			msg << "Comparison failed for pixel " << pixelNdx;
2130 			return tcu::TestStatus::fail(msg.str());
2131 		}
2132 	}
2133 	return tcu::TestStatus::pass("Compute succeeded");
2134 }
2135 
2136 class ImageBarrierTest : public vkt::TestCase
2137 {
2138 public:
2139 						ImageBarrierTest	(tcu::TestContext&	testCtx,
2140 											const std::string&	name,
2141 											const std::string&	description,
2142 											const tcu::IVec2&	imageSize);
2143 
2144 	void				initPrograms		(SourceCollections& sourceCollections) const;
2145 	TestInstance*		createInstance		(Context&			context) const;
2146 
2147 private:
2148 	const tcu::IVec2	m_imageSize;
2149 };
2150 
2151 class ImageBarrierTestInstance : public vkt::TestInstance
2152 {
2153 public:
2154 									ImageBarrierTestInstance	(Context&			context,
2155 																 const tcu::IVec2&	imageSize);
2156 
2157 	tcu::TestStatus					iterate						(void);
2158 
2159 private:
2160 	const tcu::IVec2				m_imageSize;
2161 };
2162 
ImageBarrierTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const tcu::IVec2 & imageSize)2163 ImageBarrierTest::ImageBarrierTest (tcu::TestContext&	testCtx,
2164 									const std::string&	name,
2165 									const std::string&	description,
2166 									const tcu::IVec2&	imageSize)
2167 	: TestCase		(testCtx, name, description)
2168 	, m_imageSize	(imageSize)
2169 {
2170 }
2171 
initPrograms(SourceCollections & sourceCollections) const2172 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2173 {
2174 	sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2175 		"#version 310 es\n"
2176 		"layout (local_size_x = 1) in;\n"
2177 		"layout(binding = 2) readonly uniform Constants {\n"
2178 		"    uint u_baseVal;\n"
2179 		"};\n"
2180 		"layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2181 		"void main (void) {\n"
2182 		"    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2183 		"    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2184 		"}\n");
2185 
2186 	sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2187 		"#version 310 es\n"
2188 		"layout (local_size_x = 1) in;\n"
2189 		"layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2190 		"layout(binding = 0) coherent buffer Output {\n"
2191 		"    uint sum;\n"
2192 		"};\n"
2193 		"void main (void) {\n"
2194 		"    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2195 		"    atomicAdd(sum, value);\n"
2196 		"}\n");
2197 }
2198 
createInstance(Context & context) const2199 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2200 {
2201 	return new ImageBarrierTestInstance(context, m_imageSize);
2202 }
2203 
ImageBarrierTestInstance(Context & context,const tcu::IVec2 & imageSize)2204 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2205 	: TestInstance	(context)
2206 	, m_imageSize	(imageSize)
2207 {
2208 }
2209 
iterate(void)2210 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2211 {
2212 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
2213 	const VkDevice			device				= m_context.getDevice();
2214 	const VkQueue			queue				= m_context.getUniversalQueue();
2215 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2216 	Allocator&				allocator			= m_context.getDefaultAllocator();
2217 
2218 	// Create an image used by both shaders
2219 
2220 	const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2221 	const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2222 
2223 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2224 	const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2225 
2226 	// Create an output buffer
2227 
2228 	const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2229 	const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2230 
2231 	// Initialize atomic counter value to zero
2232 	{
2233 		const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2234 		deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2235 		*outputBufferPtr = 0;
2236 		flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2237 	}
2238 
2239 	// Create a uniform buffer (to pass uniform constants)
2240 
2241 	const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2242 	const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2243 
2244 	// Set the constants in the uniform buffer
2245 
2246 	const deUint32	baseValue = 127;
2247 	{
2248 		const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2249 		deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2250 		uniformBufferPtr[0] = baseValue;
2251 
2252 		flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2253 	}
2254 
2255 	// Create descriptor set
2256 
2257 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2258 		DescriptorSetLayoutBuilder()
2259 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2260 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2261 		.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2262 		.build(vk, device));
2263 
2264 	const Unique<VkDescriptorPool> descriptorPool(
2265 		DescriptorPoolBuilder()
2266 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2267 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2268 		.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2269 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2270 
2271 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2272 
2273 	const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2274 	const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2275 	const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2276 	DescriptorSetUpdateBuilder()
2277 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2278 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2279 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2280 		.update(vk, device);
2281 
2282 	// Perform the computation
2283 
2284 	const Unique<VkShaderModule>	shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2285 	const Unique<VkShaderModule>	shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2286 
2287 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2288 	const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2289 	const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2290 
2291 	const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2292 
2293 	const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2294 		0u, 0u,
2295 		VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2296 		*image, subresourceRange);
2297 
2298 	const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2299 		VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2300 		VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2301 		*image, subresourceRange);
2302 
2303 	const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2304 
2305 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2306 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2307 
2308 	// Start recording commands
2309 
2310 	beginCommandBuffer(vk, *cmdBuffer);
2311 
2312 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2313 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2314 
2315 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2316 
2317 	vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2318 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2319 
2320 	// Switch to the second shader program
2321 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2322 
2323 	vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2324 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2325 
2326 	endCommandBuffer(vk, *cmdBuffer);
2327 
2328 	// Wait for completion
2329 
2330 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2331 
2332 	// Validate the results
2333 
2334 	const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2335 	invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2336 
2337 	const int		numValues = multiplyComponents(m_imageSize);
2338 	const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2339 	const deUint32	res = *bufferPtr;
2340 	deUint32		ref = 0;
2341 
2342 	for (int ndx = 0; ndx < numValues; ++ndx)
2343 		ref += baseValue + ndx;
2344 
2345 	if (res != ref)
2346 	{
2347 		std::ostringstream msg;
2348 		msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2349 		return tcu::TestStatus::fail(msg.str());
2350 	}
2351 	return tcu::TestStatus::pass("Compute succeeded");
2352 }
2353 
2354 namespace EmptyShaderTest
2355 {
2356 
createProgram(SourceCollections & dst)2357 void createProgram (SourceCollections& dst)
2358 {
2359 	dst.glslSources.add("comp") << glu::ComputeSource(
2360 		"#version 310 es\n"
2361 		"layout (local_size_x = 1) in;\n"
2362 		"void main (void) {}\n"
2363 	);
2364 }
2365 
createTest(Context & context)2366 tcu::TestStatus createTest (Context& context)
2367 {
2368 	const DeviceInterface&	vk					= context.getDeviceInterface();
2369 	const VkDevice			device				= context.getDevice();
2370 	const VkQueue			queue				= context.getUniversalQueue();
2371 	const deUint32			queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
2372 
2373 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2374 
2375 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2376 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2377 
2378 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2379 	const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2380 
2381 	// Start recording commands
2382 
2383 	beginCommandBuffer(vk, *cmdBuffer);
2384 
2385 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2386 
2387 	const tcu::IVec3 workGroups(1, 1, 1);
2388 	vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2389 
2390 	endCommandBuffer(vk, *cmdBuffer);
2391 
2392 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2393 
2394 	return tcu::TestStatus::pass("Compute succeeded");
2395 }
2396 
2397 } // EmptyShaderTest ns
2398 } // anonymous
2399 
createBasicComputeShaderTests(tcu::TestContext & testCtx)2400 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2401 {
2402 	de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2403 
2404 	addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2405 
2406 	basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,	"ubo_to_ssbo_single_invocation",	"Copy from UBO to SSBO, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2407 	basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,	"ubo_to_ssbo_single_group",			"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(2,1,4),	tcu::IVec3(1,1,1)));
2408 	basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,	"ubo_to_ssbo_multiple_invocations",	"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
2409 	basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,	"ubo_to_ssbo_multiple_groups",		"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
2410 
2411 	basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,	"copy_ssbo_single_invocation",		"Copy between SSBOs, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2412 	basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,	"copy_ssbo_multiple_invocations",	"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
2413 	basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,	"copy_ssbo_multiple_groups",		"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
2414 
2415 	basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,	"ssbo_rw_single_invocation",			"Read and write same SSBO",		256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2416 	basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,	"ssbo_rw_multiple_groups",				"Read and write same SSBO",		1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
2417 	basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,	"ssbo_unsized_arr_single_invocation",	"Read and write same SSBO",		256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2418 	basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,	"ssbo_unsized_arr_multiple_groups",		"Read and write same SSBO",		1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
2419 
2420 	basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,	"write_multiple_arr_single_invocation",			"Write to multiple SSBOs",	256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2421 	basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,	"write_multiple_arr_multiple_groups",			"Write to multiple SSBOs",	1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
2422 	basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,	"write_multiple_unsized_arr_single_invocation",	"Write to multiple SSBOs",	256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2423 	basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,	"write_multiple_unsized_arr_multiple_groups",	"Write to multiple SSBOs",	1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
2424 
2425 	basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,	"ssbo_local_barrier_single_invocation",	"SSBO local barrier usage",	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2426 	basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,	"ssbo_local_barrier_single_group",		"SSBO local barrier usage",	tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
2427 	basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,	"ssbo_local_barrier_multiple_groups",	"SSBO local barrier usage",	tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
2428 
2429 	basicComputeTests->addChild(new SSBOBarrierTest(testCtx,	"ssbo_cmd_barrier_single",		"SSBO memory barrier usage",	tcu::IVec3(1,1,1)));
2430 	basicComputeTests->addChild(new SSBOBarrierTest(testCtx,	"ssbo_cmd_barrier_multiple",	"SSBO memory barrier usage",	tcu::IVec3(11,5,7)));
2431 
2432 	basicComputeTests->addChild(new SharedVarTest(testCtx,	"shared_var_single_invocation",		"Basic shared variable usage",	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2433 	basicComputeTests->addChild(new SharedVarTest(testCtx,	"shared_var_single_group",			"Basic shared variable usage",	tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
2434 	basicComputeTests->addChild(new SharedVarTest(testCtx,	"shared_var_multiple_invocations",	"Basic shared variable usage",	tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
2435 	basicComputeTests->addChild(new SharedVarTest(testCtx,	"shared_var_multiple_groups",		"Basic shared variable usage",	tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
2436 
2437 	basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,	"shared_atomic_op_single_invocation",		"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
2438 	basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,	"shared_atomic_op_single_group",			"Atomic operation with shared var",		tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
2439 	basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,	"shared_atomic_op_multiple_invocations",	"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
2440 	basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,	"shared_atomic_op_multiple_groups",			"Atomic operation with shared var",		tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
2441 
2442 	basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,	"copy_image_to_ssbo_small",	"Image to SSBO copy",	tcu::IVec2(1,1),	tcu::IVec2(64,64)));
2443 	basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,	"copy_image_to_ssbo_large",	"Image to SSBO copy",	tcu::IVec2(2,4),	tcu::IVec2(512,512)));
2444 
2445 	basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,	"copy_ssbo_to_image_small",	"SSBO to image copy",	tcu::IVec2(1, 1),	tcu::IVec2(64, 64)));
2446 	basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,	"copy_ssbo_to_image_large",	"SSBO to image copy",	tcu::IVec2(2, 4),	tcu::IVec2(512, 512)));
2447 
2448 	basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,	"image_atomic_op_local_size_1",	"Atomic operation with image",	1,	tcu::IVec2(64,64)));
2449 	basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,	"image_atomic_op_local_size_8",	"Atomic operation with image",	8,	tcu::IVec2(64,64)));
2450 
2451 	basicComputeTests->addChild(new ImageBarrierTest(testCtx,	"image_barrier_single",		"Image barrier",	tcu::IVec2(1,1)));
2452 	basicComputeTests->addChild(new ImageBarrierTest(testCtx,	"image_barrier_multiple",	"Image barrier",	tcu::IVec2(64,64)));
2453 
2454 	return basicComputeTests.release();
2455 }
2456 
2457 } // compute
2458 } // vkt
2459