1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktImageAtomicOperationTests.hpp"
25 
26 #include "deUniquePtr.hpp"
27 #include "deStringUtil.hpp"
28 
29 #include "vktTestCaseUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkImageUtil.hpp"
32 #include "vktImageTestsUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkRef.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 
38 #include "tcuTextureUtil.hpp"
39 #include "tcuTexture.hpp"
40 #include "tcuVectorType.hpp"
41 
42 namespace vkt
43 {
44 namespace image
45 {
46 namespace
47 {
48 
49 using namespace vk;
50 using namespace std;
51 using de::toString;
52 
53 using tcu::TextureFormat;
54 using tcu::IVec2;
55 using tcu::IVec3;
56 using tcu::UVec3;
57 using tcu::Vec4;
58 using tcu::IVec4;
59 using tcu::UVec4;
60 using tcu::CubeFace;
61 using tcu::Texture1D;
62 using tcu::Texture2D;
63 using tcu::Texture3D;
64 using tcu::Texture2DArray;
65 using tcu::TextureCube;
66 using tcu::PixelBufferAccess;
67 using tcu::ConstPixelBufferAccess;
68 using tcu::Vector;
69 using tcu::TestContext;
70 
71 enum
72 {
73 	NUM_INVOCATIONS_PER_PIXEL = 5u
74 };
75 
76 enum AtomicOperation
77 {
78 	ATOMIC_OPERATION_ADD = 0,
79 	ATOMIC_OPERATION_MIN,
80 	ATOMIC_OPERATION_MAX,
81 	ATOMIC_OPERATION_AND,
82 	ATOMIC_OPERATION_OR,
83 	ATOMIC_OPERATION_XOR,
84 	ATOMIC_OPERATION_EXCHANGE,
85 
86 	ATOMIC_OPERATION_LAST
87 };
88 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)89 static string getCoordStr (const ImageType		imageType,
90 						   const std::string&	x,
91 						   const std::string&	y,
92 						   const std::string&	z)
93 {
94 	switch (imageType)
95 	{
96 		case IMAGE_TYPE_1D:
97 		case IMAGE_TYPE_BUFFER:
98 			return x;
99 		case IMAGE_TYPE_1D_ARRAY:
100 		case IMAGE_TYPE_2D:
101 			return string("ivec2(" + x + "," + y + ")");
102 		case IMAGE_TYPE_2D_ARRAY:
103 		case IMAGE_TYPE_3D:
104 		case IMAGE_TYPE_CUBE:
105 		case IMAGE_TYPE_CUBE_ARRAY:
106 			return string("ivec3(" + x + "," + y + "," + z + ")");
107 		default:
108 			DE_ASSERT(false);
109 			return DE_NULL;
110 	}
111 }
112 
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)113 static string getAtomicFuncArgumentShaderStr (const AtomicOperation	op,
114 											  const string&			x,
115 											  const string&			y,
116 											  const string&			z,
117 											  const IVec3&			gridSize)
118 {
119 	switch (op)
120 	{
121 		case ATOMIC_OPERATION_ADD:
122 		case ATOMIC_OPERATION_MIN:
123 		case ATOMIC_OPERATION_MAX:
124 		case ATOMIC_OPERATION_AND:
125 		case ATOMIC_OPERATION_OR:
126 		case ATOMIC_OPERATION_XOR:
127 			return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
128 		case ATOMIC_OPERATION_EXCHANGE:
129 			return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
130 		default:
131 			DE_ASSERT(false);
132 			return DE_NULL;
133 	}
134 }
135 
getAtomicOperationCaseName(const AtomicOperation op)136 static string getAtomicOperationCaseName (const AtomicOperation op)
137 {
138 	switch (op)
139 	{
140 		case ATOMIC_OPERATION_ADD:			return string("add");
141 		case ATOMIC_OPERATION_MIN:			return string("min");
142 		case ATOMIC_OPERATION_MAX:			return string("max");
143 		case ATOMIC_OPERATION_AND:			return string("and");
144 		case ATOMIC_OPERATION_OR:			return string("or");
145 		case ATOMIC_OPERATION_XOR:			return string("xor");
146 		case ATOMIC_OPERATION_EXCHANGE:		return string("exchange");
147 		default:
148 			DE_ASSERT(false);
149 			return DE_NULL;
150 	}
151 }
152 
getAtomicOperationShaderFuncName(const AtomicOperation op)153 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
154 {
155 	switch (op)
156 	{
157 		case ATOMIC_OPERATION_ADD:			return string("imageAtomicAdd");
158 		case ATOMIC_OPERATION_MIN:			return string("imageAtomicMin");
159 		case ATOMIC_OPERATION_MAX:			return string("imageAtomicMax");
160 		case ATOMIC_OPERATION_AND:			return string("imageAtomicAnd");
161 		case ATOMIC_OPERATION_OR:			return string("imageAtomicOr");
162 		case ATOMIC_OPERATION_XOR:			return string("imageAtomicXor");
163 		case ATOMIC_OPERATION_EXCHANGE:		return string("imageAtomicExchange");
164 		default:
165 			DE_ASSERT(false);
166 			return DE_NULL;
167 	}
168 }
169 
getOperationInitialValue(const AtomicOperation op)170 static deInt32 getOperationInitialValue (const AtomicOperation op)
171 {
172 	switch (op)
173 	{
174 		// \note 18 is just an arbitrary small nonzero value.
175 		case ATOMIC_OPERATION_ADD:			return 18;
176 		case ATOMIC_OPERATION_MIN:			return (1 << 15) - 1;
177 		case ATOMIC_OPERATION_MAX:			return 18;
178 		case ATOMIC_OPERATION_AND:			return (1 << 15) - 1;
179 		case ATOMIC_OPERATION_OR:			return 18;
180 		case ATOMIC_OPERATION_XOR:			return 18;
181 		case ATOMIC_OPERATION_EXCHANGE:		return 18;
182 		default:
183 			DE_ASSERT(false);
184 			return -1;
185 	}
186 }
187 
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)188 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
189 {
190 	const int x = invocationID.x();
191 	const int y = invocationID.y();
192 	const int z = invocationID.z();
193 
194 	switch (op)
195 	{
196 		// \note Fall-throughs.
197 		case ATOMIC_OPERATION_ADD:
198 		case ATOMIC_OPERATION_MIN:
199 		case ATOMIC_OPERATION_MAX:
200 		case ATOMIC_OPERATION_AND:
201 		case ATOMIC_OPERATION_OR:
202 		case ATOMIC_OPERATION_XOR:
203 			return x*x + y*y + z*z;
204 		case ATOMIC_OPERATION_EXCHANGE:
205 			return (z*gridSize.x() + x)*gridSize.y() + y;
206 		default:
207 			DE_ASSERT(false);
208 			return -1;
209 	}
210 }
211 
212 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)213 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
214 {
215 	return	op == ATOMIC_OPERATION_ADD ||
216 			op == ATOMIC_OPERATION_MIN ||
217 			op == ATOMIC_OPERATION_MAX ||
218 			op == ATOMIC_OPERATION_AND ||
219 			op == ATOMIC_OPERATION_OR ||
220 			op == ATOMIC_OPERATION_XOR;
221 }
222 
223 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
computeBinaryAtomicOperationResult(const AtomicOperation op,const deInt32 a,const deInt32 b)224 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
225 {
226 	switch (op)
227 	{
228 		case ATOMIC_OPERATION_ADD:			return a + b;
229 		case ATOMIC_OPERATION_MIN:			return de::min(a, b);
230 		case ATOMIC_OPERATION_MAX:			return de::max(a, b);
231 		case ATOMIC_OPERATION_AND:			return a & b;
232 		case ATOMIC_OPERATION_OR:			return a | b;
233 		case ATOMIC_OPERATION_XOR:			return a ^ b;
234 		case ATOMIC_OPERATION_EXCHANGE:		return b;
235 		default:
236 			DE_ASSERT(false);
237 			return -1;
238 	}
239 }
240 
241 class BinaryAtomicEndResultCase : public vkt::TestCase
242 {
243 public:
244 								BinaryAtomicEndResultCase  (tcu::TestContext&			testCtx,
245 															const string&				name,
246 															const string&				description,
247 															const ImageType				imageType,
248 															const tcu::UVec3&			imageSize,
249 															const tcu::TextureFormat&	format,
250 															const AtomicOperation		operation,
251 															const glu::GLSLVersion		glslVersion);
252 
253 	void						initPrograms			   (SourceCollections&			sourceCollections) const;
254 	TestInstance*				createInstance			   (Context&					context) const;
255 private:
256 
257 	const ImageType				m_imageType;
258 	const tcu::UVec3			m_imageSize;
259 	const tcu::TextureFormat	m_format;
260 	const AtomicOperation		m_operation;
261 	const glu::GLSLVersion		m_glslVersion;
262 };
263 
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const AtomicOperation operation,const glu::GLSLVersion glslVersion)264 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&			testCtx,
265 													  const string&				name,
266 													  const string&				description,
267 													  const ImageType			imageType,
268 													  const tcu::UVec3&			imageSize,
269 													  const tcu::TextureFormat&	format,
270 													  const AtomicOperation		operation,
271 													  const glu::GLSLVersion	glslVersion)
272 	: TestCase		(testCtx, name, description)
273 	, m_imageType	(imageType)
274 	, m_imageSize	(imageSize)
275 	, m_format		(format)
276 	, m_operation	(operation)
277 	, m_glslVersion	(glslVersion)
278 {
279 }
280 
initPrograms(SourceCollections & sourceCollections) const281 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
282 {
283 	const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
284 
285 	const bool		uintFormat				= isUintFormat(mapTextureFormat(m_format));
286 	const bool		intFormat				= isIntFormat(mapTextureFormat(m_format));
287 	const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
288 	const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
289 
290 	const string	atomicArgExpr			= (uintFormat ? "uint" : intFormat ? "int" : "float")
291 											+ getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
292 
293 	const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
294 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
295 	const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
296 
297 	string source = versionDecl + "\n"
298 					"precision highp " + shaderImageTypeStr + ";\n"
299 					"\n"
300 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
301 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
302 					"\n"
303 					"void main (void)\n"
304 					"{\n"
305 					"	int gx = int(gl_GlobalInvocationID.x);\n"
306 					"	int gy = int(gl_GlobalInvocationID.y);\n"
307 					"	int gz = int(gl_GlobalInvocationID.z);\n"
308 					"	" + atomicInvocation + ";\n"
309 					"}\n";
310 
311 	sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
312 }
313 
314 class BinaryAtomicIntermValuesCase : public vkt::TestCase
315 {
316 public:
317 								BinaryAtomicIntermValuesCase   (tcu::TestContext&			testCtx,
318 																const string&				name,
319 																const string&				description,
320 																const ImageType				imageType,
321 																const tcu::UVec3&			imageSize,
322 																const tcu::TextureFormat&	format,
323 																const AtomicOperation		operation,
324 																const glu::GLSLVersion		glslVersion);
325 
326 	void						initPrograms				   (SourceCollections&			sourceCollections) const;
327 	TestInstance*				createInstance				   (Context&					context) const;
328 private:
329 
330 	const ImageType				m_imageType;
331 	const tcu::UVec3			m_imageSize;
332 	const tcu::TextureFormat	m_format;
333 	const AtomicOperation		m_operation;
334 	const glu::GLSLVersion		m_glslVersion;
335 };
336 
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const glu::GLSLVersion glslVersion)337 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&			testCtx,
338 															const string&			name,
339 															const string&			description,
340 															const ImageType			imageType,
341 															const tcu::UVec3&		imageSize,
342 															const TextureFormat&	format,
343 															const AtomicOperation	operation,
344 															const glu::GLSLVersion	glslVersion)
345 	: TestCase		(testCtx, name, description)
346 	, m_imageType	(imageType)
347 	, m_imageSize	(imageSize)
348 	, m_format		(format)
349 	, m_operation	(operation)
350 	, m_glslVersion	(glslVersion)
351 {
352 }
353 
initPrograms(SourceCollections & sourceCollections) const354 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
355 {
356 	const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
357 
358 	const bool		uintFormat				= isUintFormat(mapTextureFormat(m_format));
359 	const bool		intFormat				= isIntFormat(mapTextureFormat(m_format));
360 	const string	colorVecTypeName		= string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
361 	const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
362 	const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
363 	const string	invocationCoord			= getCoordStr(m_imageType, "gx", "gy", "gz");
364 	const string	atomicArgExpr			= (uintFormat ? "uint" : intFormat ? "int" : "float")
365 											+ getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
366 
367 	const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
368 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
369 	const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
370 
371 	string source = versionDecl + "\n"
372 					"precision highp " + shaderImageTypeStr + ";\n"
373 					"\n"
374 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
375 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
376 					"layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
377 					"\n"
378 					"void main (void)\n"
379 					"{\n"
380 					"	int gx = int(gl_GlobalInvocationID.x);\n"
381 					"	int gy = int(gl_GlobalInvocationID.y);\n"
382 					"	int gz = int(gl_GlobalInvocationID.z);\n"
383 					"	imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
384 					"}\n";
385 
386 	sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
387 }
388 
389 class BinaryAtomicInstanceBase : public vkt::TestInstance
390 {
391 public:
392 
393 								BinaryAtomicInstanceBase (Context&						context,
394 														  const string&					name,
395 														  const ImageType				imageType,
396 														  const tcu::UVec3&				imageSize,
397 														  const TextureFormat&			format,
398 														  const AtomicOperation			operation);
399 
400 	tcu::TestStatus				iterate					 (void);
401 
402 	virtual deUint32			getOutputBufferSize		 (void) const = 0;
403 
404 	virtual void				prepareResources		 (void) = 0;
405 	virtual void				prepareDescriptors		 (void) = 0;
406 
407 	virtual void				commandsBeforeCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
408 	virtual void				commandsAfterCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
409 
410 	virtual bool				verifyResult			 (Allocation&					outputBufferAllocation) const = 0;
411 
412 protected:
413 	const string				m_name;
414 	const ImageType				m_imageType;
415 	const tcu::UVec3			m_imageSize;
416 	const TextureFormat			m_format;
417 	const AtomicOperation		m_operation;
418 
419 	de::MovePtr<Buffer>			m_outputBuffer;
420 	Move<VkDescriptorPool>		m_descriptorPool;
421 	Move<VkDescriptorSetLayout>	m_descriptorSetLayout;
422 	Move<VkDescriptorSet>		m_descriptorSet;
423 	de::MovePtr<Image>			m_resultImage;
424 	Move<VkImageView>			m_resultImageView;
425 };
426 
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation)427 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&				context,
428 													const string&			name,
429 													const ImageType			imageType,
430 													const tcu::UVec3&		imageSize,
431 													const TextureFormat&	format,
432 													const AtomicOperation	operation)
433 	: vkt::TestInstance	(context)
434 	, m_name			(name)
435 	, m_imageType		(imageType)
436 	, m_imageSize		(imageSize)
437 	, m_format			(format)
438 	, m_operation		(operation)
439 {
440 }
441 
iterate(void)442 tcu::TestStatus	BinaryAtomicInstanceBase::iterate (void)
443 {
444 	const VkDevice			device				= m_context.getDevice();
445 	const DeviceInterface&	deviceInterface		= m_context.getDeviceInterface();
446 	const VkQueue			queue				= m_context.getUniversalQueue();
447 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
448 	Allocator&				allocator			= m_context.getDefaultAllocator();
449 	const VkDeviceSize		imageSizeInBytes	= tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
450 	const VkDeviceSize		outBuffSizeInBytes	= getOutputBufferSize();
451 
452 	const VkImageCreateInfo imageParams	=
453 	{
454 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,					// VkStructureType			sType;
455 		DE_NULL,												// const void*				pNext;
456 		(m_imageType == IMAGE_TYPE_CUBE ||
457 		 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
458 		 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
459 		 (VkImageCreateFlags)0u),								// VkImageCreateFlags		flags;
460 		mapImageType(m_imageType),								// VkImageType				imageType;
461 		mapTextureFormat(m_format),								// VkFormat					format;
462 		makeExtent3D(getLayerSize(m_imageType, m_imageSize)),	// VkExtent3D				extent;
463 		1u,														// deUint32					mipLevels;
464 		getNumLayers(m_imageType, m_imageSize),					// deUint32					arrayLayers;
465 		VK_SAMPLE_COUNT_1_BIT,									// VkSampleCountFlagBits	samples;
466 		VK_IMAGE_TILING_OPTIMAL,								// VkImageTiling			tiling;
467 		VK_IMAGE_USAGE_STORAGE_BIT |
468 		VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
469 		VK_IMAGE_USAGE_TRANSFER_DST_BIT,						// VkImageUsageFlags		usage;
470 		VK_SHARING_MODE_EXCLUSIVE,								// VkSharingMode			sharingMode;
471 		0u,														// deUint32					queueFamilyIndexCount;
472 		DE_NULL,												// const deUint32*			pQueueFamilyIndices;
473 		VK_IMAGE_LAYOUT_UNDEFINED,								// VkImageLayout			initialLayout;
474 	};
475 
476 	//Create the image that is going to store results of atomic operations
477 	m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
478 
479 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
480 
481 	m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
482 
483 	//Prepare the buffer with the initial data for the image
484 	const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
485 
486 	Allocation& inputBufferAllocation = inputBuffer.getAllocation();
487 
488 	//Prepare the initial data for the image
489 	const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
490 
491 	tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
492 	tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
493 
494 	for (deUint32 z = 0; z < gridSize.z(); z++)
495 	for (deUint32 y = 0; y < gridSize.y(); y++)
496 	for (deUint32 x = 0; x < gridSize.x(); x++)
497 	{
498 		inputPixelBuffer.setPixel(initialValue, x, y, z);
499 	}
500 
501 	flushMappedMemoryRange(deviceInterface, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), imageSizeInBytes);
502 
503 	// Create a buffer to store shader output copied from result image
504 	m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
505 
506 	prepareResources();
507 
508 	prepareDescriptors();
509 
510 	// Create pipeline
511 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
512 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
513 	const Unique<VkPipeline>		pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
514 
515 	// Create command buffer
516 	const Unique<VkCommandPool>		cmdPool(makeCommandPool(deviceInterface, device, queueFamilyIndex));
517 	const Unique<VkCommandBuffer>	cmdBuffer(makeCommandBuffer(deviceInterface, device, *cmdPool));
518 
519 	beginCommandBuffer(deviceInterface, *cmdBuffer);
520 
521 	deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
522 	deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
523 
524 	const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier	=
525 		makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT,
526 								VK_ACCESS_TRANSFER_READ_BIT,
527 								*inputBuffer,
528 								0ull,
529 								imageSizeInBytes);
530 
531 	const VkImageMemoryBarrier	resultImagePreCopyBarrier =
532 		makeImageMemoryBarrier(	0u,
533 								VK_ACCESS_TRANSFER_WRITE_BIT,
534 								VK_IMAGE_LAYOUT_UNDEFINED,
535 								VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
536 								m_resultImage->get(),
537 								subresourceRange);
538 
539 	deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 1u, &inputBufferPostHostWriteBarrier, 1u, &resultImagePreCopyBarrier);
540 
541 	const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
542 
543 	deviceInterface.cmdCopyBufferToImage(*cmdBuffer, *inputBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &bufferImageCopyParams);
544 
545 	const VkImageMemoryBarrier	resultImagePostCopyBarrier	=
546 		makeImageMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
547 								VK_ACCESS_SHADER_READ_BIT,
548 								VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
549 								VK_IMAGE_LAYOUT_GENERAL,
550 								m_resultImage->get(),
551 								subresourceRange);
552 
553 	deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostCopyBarrier);
554 
555 	commandsBeforeCompute(*cmdBuffer);
556 
557 	deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
558 
559 	commandsAfterCompute(*cmdBuffer);
560 
561 	const VkBufferMemoryBarrier	outputBufferPreHostReadBarrier
562 		= makeBufferMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
563 									VK_ACCESS_HOST_READ_BIT,
564 									m_outputBuffer->get(),
565 									0ull,
566 									outBuffSizeInBytes);
567 
568 	deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
569 
570 	endCommandBuffer(deviceInterface, *cmdBuffer);
571 
572 	submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
573 
574 	Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
575 
576 	invalidateMappedMemoryRange(deviceInterface, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outBuffSizeInBytes);
577 
578 	if (verifyResult(outputBufferAllocation))
579 		return tcu::TestStatus::pass("Comparison succeeded");
580 	else
581 		return tcu::TestStatus::fail("Comparison failed");
582 }
583 
584 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
585 {
586 public:
587 
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation)588 						BinaryAtomicEndResultInstance  (Context&				context,
589 														const string&			name,
590 														const ImageType			imageType,
591 														const tcu::UVec3&		imageSize,
592 														const TextureFormat&	format,
593 														const AtomicOperation	operation)
594 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
595 
596 	virtual deUint32	getOutputBufferSize			   (void) const;
597 
prepareResources(void)598 	virtual void		prepareResources			   (void) {}
599 	virtual void		prepareDescriptors			   (void);
600 
commandsBeforeCompute(const VkCommandBuffer) const601 	virtual void		commandsBeforeCompute		   (const VkCommandBuffer) const {}
602 	virtual void		commandsAfterCompute		   (const VkCommandBuffer	cmdBuffer) const;
603 
604 	virtual bool		verifyResult				   (Allocation&				outputBufferAllocation) const;
605 };
606 
getOutputBufferSize(void) const607 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
608 {
609 	return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
610 }
611 
prepareDescriptors(void)612 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
613 {
614 	const VkDevice			device			= m_context.getDevice();
615 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
616 
617 	m_descriptorSetLayout =
618 		DescriptorSetLayoutBuilder()
619 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
620 		.build(deviceInterface, device);
621 
622 	m_descriptorPool =
623 		DescriptorPoolBuilder()
624 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
625 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
626 
627 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
628 
629 	const VkDescriptorImageInfo	descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
630 
631 	DescriptorSetUpdateBuilder()
632 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
633 		.update(deviceInterface, device);
634 }
635 
commandsAfterCompute(const VkCommandBuffer cmdBuffer) const636 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer	cmdBuffer) const
637 {
638 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
639 	const VkImageSubresourceRange	subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
640 
641 	const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
642 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
643 								VK_ACCESS_TRANSFER_READ_BIT,
644 								VK_IMAGE_LAYOUT_GENERAL,
645 								VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
646 								m_resultImage->get(),
647 								subresourceRange);
648 
649 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
650 
651 	const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
652 
653 	deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
654 }
655 
verifyResult(Allocation & outputBufferAllocation) const656 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
657 {
658 	const UVec3	gridSize			= getShaderGridSize(m_imageType, m_imageSize);
659 	const IVec3 extendedGridSize	= IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
660 
661 	tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
662 
663 	for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
664 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
665 	for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
666 	{
667 		deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
668 
669 		if (isOrderIndependentAtomicOperation(m_operation))
670 		{
671 			deInt32 reference = getOperationInitialValue(m_operation);
672 
673 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
674 			{
675 				const IVec3 gid(x + i*gridSize.x(), y, z);
676 				reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
677 			}
678 
679 			if (resultValue != reference)
680 				return false;
681 		}
682 		else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
683 		{
684 			// Check if the end result equals one of the atomic args.
685 			bool matchFound = false;
686 
687 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
688 			{
689 				const IVec3 gid(x + i*gridSize.x(), y, z);
690 				matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
691 			}
692 
693 			if (!matchFound)
694 				return false;
695 		}
696 		else
697 			DE_ASSERT(false);
698 	}
699 	return true;
700 }
701 
createInstance(Context & context) const702 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
703 {
704 	return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
705 }
706 
707 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
708 {
709 public:
710 
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation)711 						BinaryAtomicIntermValuesInstance   (Context&				context,
712 															const string&			name,
713 															const ImageType			imageType,
714 															const tcu::UVec3&		imageSize,
715 															const TextureFormat&	format,
716 															const AtomicOperation	operation)
717 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
718 
719 	virtual deUint32	getOutputBufferSize				   (void) const;
720 
721 	virtual void		prepareResources				   (void);
722 	virtual void		prepareDescriptors				   (void);
723 
724 	virtual void		commandsBeforeCompute			   (const VkCommandBuffer	cmdBuffer) const;
725 	virtual void		commandsAfterCompute			   (const VkCommandBuffer	cmdBuffer) const;
726 
727 	virtual bool		verifyResult					   (Allocation&				outputBufferAllocation) const;
728 
729 protected:
730 
731 	bool				verifyRecursive					   (const deInt32			index,
732 															const deInt32			valueSoFar,
733 															bool					argsUsed[NUM_INVOCATIONS_PER_PIXEL],
734 															const deInt32			atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
735 															const deInt32			resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
736 	de::MovePtr<Image>	m_intermResultsImage;
737 	Move<VkImageView>	m_intermResultsImageView;
738 };
739 
getOutputBufferSize(void) const740 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
741 {
742 	return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
743 }
744 
prepareResources(void)745 void BinaryAtomicIntermValuesInstance::prepareResources (void)
746 {
747 	const VkDevice			device			= m_context.getDevice();
748 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
749 	Allocator&				allocator		= m_context.getDefaultAllocator();
750 
751 	const UVec3 layerSize			= getLayerSize(m_imageType, m_imageSize);
752 	const bool  isCubeBasedImage	= (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
753 	const UVec3 extendedLayerSize	= isCubeBasedImage	? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
754 														: UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
755 
756 	const VkImageCreateInfo imageParams =
757 	{
758 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,		// VkStructureType			sType;
759 		DE_NULL,									// const void*				pNext;
760 		(m_imageType == IMAGE_TYPE_CUBE ||
761 		 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
762 		 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
763 		 (VkImageCreateFlags)0u),					// VkImageCreateFlags		flags;
764 		mapImageType(m_imageType),					// VkImageType				imageType;
765 		mapTextureFormat(m_format),					// VkFormat					format;
766 		makeExtent3D(extendedLayerSize),			// VkExtent3D				extent;
767 		1u,											// deUint32					mipLevels;
768 		getNumLayers(m_imageType, m_imageSize),		// deUint32					arrayLayers;
769 		VK_SAMPLE_COUNT_1_BIT,						// VkSampleCountFlagBits	samples;
770 		VK_IMAGE_TILING_OPTIMAL,					// VkImageTiling			tiling;
771 		VK_IMAGE_USAGE_STORAGE_BIT |
772 		VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
773 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode			sharingMode;
774 		0u,											// deUint32					queueFamilyIndexCount;
775 		DE_NULL,									// const deUint32*			pQueueFamilyIndices;
776 		VK_IMAGE_LAYOUT_UNDEFINED,					// VkImageLayout			initialLayout;
777 	};
778 
779 	m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
780 
781 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
782 
783 	m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
784 }
785 
prepareDescriptors(void)786 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
787 {
788 	const VkDevice			device			= m_context.getDevice();
789 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
790 
791 	m_descriptorSetLayout =
792 		DescriptorSetLayoutBuilder()
793 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
794 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
795 		.build(deviceInterface, device);
796 
797 	m_descriptorPool =
798 		DescriptorPoolBuilder()
799 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
800 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
801 
802 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
803 
804 	const VkDescriptorImageInfo	descResultImageInfo			= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
805 	const VkDescriptorImageInfo	descIntermResultsImageInfo	= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
806 
807 	DescriptorSetUpdateBuilder()
808 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
809 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
810 		.update(deviceInterface, device);
811 }
812 
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const813 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
814 {
815 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
816 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
817 
818 	const VkImageMemoryBarrier	imagePreDispatchBarrier =
819 		makeImageMemoryBarrier(	0u,
820 								VK_ACCESS_SHADER_WRITE_BIT,
821 								VK_IMAGE_LAYOUT_UNDEFINED,
822 								VK_IMAGE_LAYOUT_GENERAL,
823 								m_intermResultsImage->get(),
824 								subresourceRange);
825 
826 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
827 }
828 
commandsAfterCompute(const VkCommandBuffer cmdBuffer) const829 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
830 {
831 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
832 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
833 
834 	const VkImageMemoryBarrier	imagePostDispatchBarrier =
835 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
836 								VK_ACCESS_TRANSFER_READ_BIT,
837 								VK_IMAGE_LAYOUT_GENERAL,
838 								VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
839 								m_intermResultsImage->get(),
840 								subresourceRange);
841 
842 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
843 
844 	const UVec3					layerSize				= getLayerSize(m_imageType, m_imageSize);
845 	const UVec3					extendedLayerSize		= UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
846 	const VkBufferImageCopy		bufferImageCopyParams	= makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
847 
848 	deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
849 }
850 
verifyResult(Allocation & outputBufferAllocation) const851 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&	outputBufferAllocation) const
852 {
853 	const UVec3	gridSize		 = getShaderGridSize(m_imageType, m_imageSize);
854 	const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
855 
856 	tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
857 
858 	for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
859 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
860 	for (deUint32 x = 0; x < gridSize.x(); x++)
861 	{
862 		deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
863 		deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
864 		bool	argsUsed[NUM_INVOCATIONS_PER_PIXEL];
865 
866 		for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
867 		{
868 			IVec3 gid(x + i*gridSize.x(), y, z);
869 
870 			resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
871 			atomicArgs[i]	= getAtomicFuncArgument(m_operation, gid, extendedGridSize);
872 			argsUsed[i]		= false;
873 		}
874 
875 		// Verify that the return values form a valid sequence.
876 		if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
877 		{
878 			return false;
879 		}
880 	}
881 
882 	return true;
883 }
884 
verifyRecursive(const deInt32 index,const deInt32 valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const885 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32	index,
886 														const deInt32	valueSoFar,
887 														bool			argsUsed[NUM_INVOCATIONS_PER_PIXEL],
888 														const deInt32	atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
889 														const deInt32	resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
890 {
891 	if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
892 		return true;
893 
894 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
895 	{
896 		if (!argsUsed[i] && resultValues[i] == valueSoFar)
897 		{
898 			argsUsed[i] = true;
899 
900 			if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
901 			{
902 				return true;
903 			}
904 
905 			argsUsed[i] = false;
906 		}
907 	}
908 
909 	return false;
910 }
911 
createInstance(Context & context) const912 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
913 {
914 	return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
915 }
916 
917 } // anonymous ns
918 
createImageAtomicOperationTests(tcu::TestContext & testCtx)919 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
920 {
921 	de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
922 
923 	struct ImageParams
924 	{
925 		ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
926 			: m_imageType	(imageType)
927 			, m_imageSize	(imageSize)
928 		{
929 		}
930 		const ImageType		m_imageType;
931 		const tcu::UVec3	m_imageSize;
932 	};
933 
934 	static const ImageParams imageParamsArray[] =
935 	{
936 		ImageParams(IMAGE_TYPE_1D,			tcu::UVec3(64u, 1u, 1u)),
937 		ImageParams(IMAGE_TYPE_1D_ARRAY,	tcu::UVec3(64u, 1u, 8u)),
938 		ImageParams(IMAGE_TYPE_2D,			tcu::UVec3(64u, 64u, 1u)),
939 		ImageParams(IMAGE_TYPE_2D_ARRAY,	tcu::UVec3(64u, 64u, 8u)),
940 		ImageParams(IMAGE_TYPE_3D,			tcu::UVec3(64u, 64u, 8u)),
941 		ImageParams(IMAGE_TYPE_CUBE,		tcu::UVec3(64u, 64u, 1u)),
942 		ImageParams(IMAGE_TYPE_CUBE_ARRAY,	tcu::UVec3(64u, 64u, 2u))
943 	};
944 
945 	static const tcu::TextureFormat formats[] =
946 	{
947 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
948 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
949 	};
950 
951 	for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
952 	{
953 		const AtomicOperation operation = (AtomicOperation)operationI;
954 
955 		de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
956 
957 		for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
958 		{
959 			const ImageType	 imageType = imageParamsArray[imageTypeNdx].m_imageType;
960 			const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
961 
962 			for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
963 			{
964 				const TextureFormat&	format		= formats[formatNdx];
965 				const std::string		formatName	= getShaderImageFormatQualifier(format);
966 
967 				//!< Atomic case checks the end result of the operations, and not the intermediate return values
968 				const string caseEndResult = getImageTypeName(imageType) + "_" + formatName + "_end_result";
969 				operationGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
970 
971 				//!< Atomic case checks the return values of the atomic function and not the end result.
972 				const string caseIntermValues = getImageTypeName(imageType) + "_" + formatName + "_intermediate_values";
973 				operationGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
974 			}
975 		}
976 
977 		imageAtomicOperationsTests->addChild(operationGroup.release());
978 	}
979 
980 	return imageAtomicOperationsTests.release();
981 }
982 
983 } // image
984 } // vkt
985