1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktImageAtomicOperationTests.hpp"
25 
26 #include "deUniquePtr.hpp"
27 #include "deStringUtil.hpp"
28 
29 #include "vktTestCaseUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkImageUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vktImageTestsUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkRef.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkTypeUtil.hpp"
38 #include "vkCmdUtil.hpp"
39 
40 #include "tcuTextureUtil.hpp"
41 #include "tcuTexture.hpp"
42 #include "tcuVectorType.hpp"
43 
44 namespace vkt
45 {
46 namespace image
47 {
48 namespace
49 {
50 
51 using namespace vk;
52 using namespace std;
53 using de::toString;
54 
55 using tcu::TextureFormat;
56 using tcu::IVec2;
57 using tcu::IVec3;
58 using tcu::UVec3;
59 using tcu::Vec4;
60 using tcu::IVec4;
61 using tcu::UVec4;
62 using tcu::CubeFace;
63 using tcu::Texture1D;
64 using tcu::Texture2D;
65 using tcu::Texture3D;
66 using tcu::Texture2DArray;
67 using tcu::TextureCube;
68 using tcu::PixelBufferAccess;
69 using tcu::ConstPixelBufferAccess;
70 using tcu::Vector;
71 using tcu::TestContext;
72 
73 enum
74 {
75 	NUM_INVOCATIONS_PER_PIXEL = 5u
76 };
77 
78 enum AtomicOperation
79 {
80 	ATOMIC_OPERATION_ADD = 0,
81 	ATOMIC_OPERATION_MIN,
82 	ATOMIC_OPERATION_MAX,
83 	ATOMIC_OPERATION_AND,
84 	ATOMIC_OPERATION_OR,
85 	ATOMIC_OPERATION_XOR,
86 	ATOMIC_OPERATION_EXCHANGE,
87 	ATOMIC_OPERATION_COMPARE_EXCHANGE,
88 
89 	ATOMIC_OPERATION_LAST
90 };
91 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)92 static string getCoordStr (const ImageType		imageType,
93 						   const std::string&	x,
94 						   const std::string&	y,
95 						   const std::string&	z)
96 {
97 	switch (imageType)
98 	{
99 		case IMAGE_TYPE_1D:
100 		case IMAGE_TYPE_BUFFER:
101 			return x;
102 		case IMAGE_TYPE_1D_ARRAY:
103 		case IMAGE_TYPE_2D:
104 			return string("ivec2(" + x + "," + y + ")");
105 		case IMAGE_TYPE_2D_ARRAY:
106 		case IMAGE_TYPE_3D:
107 		case IMAGE_TYPE_CUBE:
108 		case IMAGE_TYPE_CUBE_ARRAY:
109 			return string("ivec3(" + x + "," + y + "," + z + ")");
110 		default:
111 			DE_ASSERT(false);
112 			return DE_NULL;
113 	}
114 }
115 
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)116 static string getAtomicFuncArgumentShaderStr (const AtomicOperation	op,
117 											  const string&			x,
118 											  const string&			y,
119 											  const string&			z,
120 											  const IVec3&			gridSize)
121 {
122 	switch (op)
123 	{
124 		case ATOMIC_OPERATION_ADD:
125 		case ATOMIC_OPERATION_MIN:
126 		case ATOMIC_OPERATION_MAX:
127 		case ATOMIC_OPERATION_AND:
128 		case ATOMIC_OPERATION_OR:
129 		case ATOMIC_OPERATION_XOR:
130 			return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
131 		case ATOMIC_OPERATION_EXCHANGE:
132 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
133 			return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
134 		default:
135 			DE_ASSERT(false);
136 			return DE_NULL;
137 	}
138 }
139 
getAtomicOperationCaseName(const AtomicOperation op)140 static string getAtomicOperationCaseName (const AtomicOperation op)
141 {
142 	switch (op)
143 	{
144 		case ATOMIC_OPERATION_ADD:				return string("add");
145 		case ATOMIC_OPERATION_MIN:				return string("min");
146 		case ATOMIC_OPERATION_MAX:				return string("max");
147 		case ATOMIC_OPERATION_AND:				return string("and");
148 		case ATOMIC_OPERATION_OR:				return string("or");
149 		case ATOMIC_OPERATION_XOR:				return string("xor");
150 		case ATOMIC_OPERATION_EXCHANGE:			return string("exchange");
151 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("compare_exchange");
152 		default:
153 			DE_ASSERT(false);
154 			return DE_NULL;
155 	}
156 }
157 
getAtomicOperationShaderFuncName(const AtomicOperation op)158 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
159 {
160 	switch (op)
161 	{
162 		case ATOMIC_OPERATION_ADD:				return string("imageAtomicAdd");
163 		case ATOMIC_OPERATION_MIN:				return string("imageAtomicMin");
164 		case ATOMIC_OPERATION_MAX:				return string("imageAtomicMax");
165 		case ATOMIC_OPERATION_AND:				return string("imageAtomicAnd");
166 		case ATOMIC_OPERATION_OR:				return string("imageAtomicOr");
167 		case ATOMIC_OPERATION_XOR:				return string("imageAtomicXor");
168 		case ATOMIC_OPERATION_EXCHANGE:			return string("imageAtomicExchange");
169 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("imageAtomicCompSwap");
170 		default:
171 			DE_ASSERT(false);
172 			return DE_NULL;
173 	}
174 }
175 
getOperationInitialValue(const AtomicOperation op)176 static deInt32 getOperationInitialValue (const AtomicOperation op)
177 {
178 	switch (op)
179 	{
180 		// \note 18 is just an arbitrary small nonzero value.
181 		case ATOMIC_OPERATION_ADD:				return 18;
182 		case ATOMIC_OPERATION_MIN:				return (1 << 15) - 1;
183 		case ATOMIC_OPERATION_MAX:				return 18;
184 		case ATOMIC_OPERATION_AND:				return (1 << 15) - 1;
185 		case ATOMIC_OPERATION_OR:				return 18;
186 		case ATOMIC_OPERATION_XOR:				return 18;
187 		case ATOMIC_OPERATION_EXCHANGE:			return 18;
188 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return 18;
189 		default:
190 			DE_ASSERT(false);
191 			return -1;
192 	}
193 }
194 
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)195 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
196 {
197 	const int x = invocationID.x();
198 	const int y = invocationID.y();
199 	const int z = invocationID.z();
200 
201 	switch (op)
202 	{
203 		// \note Fall-throughs.
204 		case ATOMIC_OPERATION_ADD:
205 		case ATOMIC_OPERATION_MIN:
206 		case ATOMIC_OPERATION_MAX:
207 		case ATOMIC_OPERATION_AND:
208 		case ATOMIC_OPERATION_OR:
209 		case ATOMIC_OPERATION_XOR:
210 			return x*x + y*y + z*z;
211 		case ATOMIC_OPERATION_EXCHANGE:
212 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
213 			return (z*gridSize.x() + x)*gridSize.y() + y;
214 		default:
215 			DE_ASSERT(false);
216 			return -1;
217 	}
218 }
219 
220 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)221 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
222 {
223 	return	op == ATOMIC_OPERATION_ADD ||
224 			op == ATOMIC_OPERATION_MIN ||
225 			op == ATOMIC_OPERATION_MAX ||
226 			op == ATOMIC_OPERATION_AND ||
227 			op == ATOMIC_OPERATION_OR ||
228 			op == ATOMIC_OPERATION_XOR;
229 }
230 
231 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
computeBinaryAtomicOperationResult(const AtomicOperation op,const deInt32 a,const deInt32 b)232 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
233 {
234 	switch (op)
235 	{
236 		case ATOMIC_OPERATION_ADD:				return a + b;
237 		case ATOMIC_OPERATION_MIN:				return de::min(a, b);
238 		case ATOMIC_OPERATION_MAX:				return de::max(a, b);
239 		case ATOMIC_OPERATION_AND:				return a & b;
240 		case ATOMIC_OPERATION_OR:				return a | b;
241 		case ATOMIC_OPERATION_XOR:				return a ^ b;
242 		case ATOMIC_OPERATION_EXCHANGE:			return b;
243 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return (a == 18) ? b : a;
244 		default:
245 			DE_ASSERT(false);
246 			return -1;
247 	}
248 }
249 
250 class BinaryAtomicEndResultCase : public vkt::TestCase
251 {
252 public:
253 								BinaryAtomicEndResultCase  (tcu::TestContext&			testCtx,
254 															const string&				name,
255 															const string&				description,
256 															const ImageType				imageType,
257 															const tcu::UVec3&			imageSize,
258 															const tcu::TextureFormat&	format,
259 															const AtomicOperation		operation,
260 															const glu::GLSLVersion		glslVersion);
261 
262 	void						initPrograms			   (SourceCollections&			sourceCollections) const;
263 	TestInstance*				createInstance			   (Context&					context) const;
264 private:
265 
266 	const ImageType				m_imageType;
267 	const tcu::UVec3			m_imageSize;
268 	const tcu::TextureFormat	m_format;
269 	const AtomicOperation		m_operation;
270 	const glu::GLSLVersion		m_glslVersion;
271 };
272 
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const AtomicOperation operation,const glu::GLSLVersion glslVersion)273 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&			testCtx,
274 													  const string&				name,
275 													  const string&				description,
276 													  const ImageType			imageType,
277 													  const tcu::UVec3&			imageSize,
278 													  const tcu::TextureFormat&	format,
279 													  const AtomicOperation		operation,
280 													  const glu::GLSLVersion	glslVersion)
281 	: TestCase		(testCtx, name, description)
282 	, m_imageType	(imageType)
283 	, m_imageSize	(imageSize)
284 	, m_format		(format)
285 	, m_operation	(operation)
286 	, m_glslVersion	(glslVersion)
287 {
288 }
289 
initPrograms(SourceCollections & sourceCollections) const290 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
291 {
292 	const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
293 
294 	const bool		uintFormat				= isUintFormat(mapTextureFormat(m_format));
295 	const bool		intFormat				= isIntFormat(mapTextureFormat(m_format));
296 	const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
297 	const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
298 
299 	const string	atomicArgExpr			= (uintFormat ? "uint" : intFormat ? "int" : "float")
300 											+ getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
301 
302 	const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ? ", 18" + string(uintFormat ? "u" : "") : "";
303 	const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
304 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
305 	const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
306 
307 	string source = versionDecl + "\n"
308 					"precision highp " + shaderImageTypeStr + ";\n"
309 					"\n"
310 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
311 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
312 					"\n"
313 					"void main (void)\n"
314 					"{\n"
315 					"	int gx = int(gl_GlobalInvocationID.x);\n"
316 					"	int gy = int(gl_GlobalInvocationID.y);\n"
317 					"	int gz = int(gl_GlobalInvocationID.z);\n"
318 					"	" + atomicInvocation + ";\n"
319 					"}\n";
320 
321 	sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
322 }
323 
324 class BinaryAtomicIntermValuesCase : public vkt::TestCase
325 {
326 public:
327 								BinaryAtomicIntermValuesCase   (tcu::TestContext&			testCtx,
328 																const string&				name,
329 																const string&				description,
330 																const ImageType				imageType,
331 																const tcu::UVec3&			imageSize,
332 																const tcu::TextureFormat&	format,
333 																const AtomicOperation		operation,
334 																const glu::GLSLVersion		glslVersion);
335 
336 	void						initPrograms				   (SourceCollections&			sourceCollections) const;
337 	TestInstance*				createInstance				   (Context&					context) const;
338 private:
339 
340 	const ImageType				m_imageType;
341 	const tcu::UVec3			m_imageSize;
342 	const tcu::TextureFormat	m_format;
343 	const AtomicOperation		m_operation;
344 	const glu::GLSLVersion		m_glslVersion;
345 };
346 
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const glu::GLSLVersion glslVersion)347 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&			testCtx,
348 															const string&			name,
349 															const string&			description,
350 															const ImageType			imageType,
351 															const tcu::UVec3&		imageSize,
352 															const TextureFormat&	format,
353 															const AtomicOperation	operation,
354 															const glu::GLSLVersion	glslVersion)
355 	: TestCase		(testCtx, name, description)
356 	, m_imageType	(imageType)
357 	, m_imageSize	(imageSize)
358 	, m_format		(format)
359 	, m_operation	(operation)
360 	, m_glslVersion	(glslVersion)
361 {
362 }
363 
initPrograms(SourceCollections & sourceCollections) const364 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
365 {
366 	const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
367 
368 	const bool		uintFormat				= isUintFormat(mapTextureFormat(m_format));
369 	const bool		intFormat				= isIntFormat(mapTextureFormat(m_format));
370 	const string	colorVecTypeName		= string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
371 	const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
372 	const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
373 	const string	invocationCoord			= getCoordStr(m_imageType, "gx", "gy", "gz");
374 	const string	atomicArgExpr			= (uintFormat ? "uint" : intFormat ? "int" : "float")
375 											+ getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
376 
377 	const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ? ", 18" + string(uintFormat ? "u" : "")  : "";
378 	const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
379 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
380 	const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
381 
382 	string source = versionDecl + "\n"
383 					"precision highp " + shaderImageTypeStr + ";\n"
384 					"\n"
385 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
386 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
387 					"layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
388 					"\n"
389 					"void main (void)\n"
390 					"{\n"
391 					"	int gx = int(gl_GlobalInvocationID.x);\n"
392 					"	int gy = int(gl_GlobalInvocationID.y);\n"
393 					"	int gz = int(gl_GlobalInvocationID.z);\n"
394 					"	imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
395 					"}\n";
396 
397 	sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
398 }
399 
400 class BinaryAtomicInstanceBase : public vkt::TestInstance
401 {
402 public:
403 
404 								BinaryAtomicInstanceBase (Context&						context,
405 														  const string&					name,
406 														  const ImageType				imageType,
407 														  const tcu::UVec3&				imageSize,
408 														  const TextureFormat&			format,
409 														  const AtomicOperation			operation);
410 
411 	tcu::TestStatus				iterate					 (void);
412 
413 	virtual deUint32			getOutputBufferSize		 (void) const = 0;
414 
415 	virtual void				prepareResources		 (void) = 0;
416 	virtual void				prepareDescriptors		 (void) = 0;
417 
418 	virtual void				commandsBeforeCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
419 	virtual void				commandsAfterCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
420 
421 	virtual bool				verifyResult			 (Allocation&					outputBufferAllocation) const = 0;
422 	void						checkRequirements		 (void) const;
423 
424 protected:
425 	const string				m_name;
426 	const ImageType				m_imageType;
427 	const tcu::UVec3			m_imageSize;
428 	const TextureFormat			m_format;
429 	const AtomicOperation		m_operation;
430 
431 	de::MovePtr<Buffer>			m_outputBuffer;
432 	Move<VkDescriptorPool>		m_descriptorPool;
433 	Move<VkDescriptorSetLayout>	m_descriptorSetLayout;
434 	Move<VkDescriptorSet>		m_descriptorSet;
435 	de::MovePtr<Image>			m_resultImage;
436 	Move<VkImageView>			m_resultImageView;
437 };
438 
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation)439 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&				context,
440 													const string&			name,
441 													const ImageType			imageType,
442 													const tcu::UVec3&		imageSize,
443 													const TextureFormat&	format,
444 													const AtomicOperation	operation)
445 	: vkt::TestInstance	(context)
446 	, m_name			(name)
447 	, m_imageType		(imageType)
448 	, m_imageSize		(imageSize)
449 	, m_format			(format)
450 	, m_operation		(operation)
451 {
452 }
453 
checkRequirements(void) const454 void BinaryAtomicInstanceBase::checkRequirements (void) const
455 {
456 	if (m_imageType == IMAGE_TYPE_CUBE_ARRAY && !m_context.getDeviceFeatures().imageCubeArray)
457 	{
458 		TCU_THROW(NotSupportedError, "imageCubeArray feature not supported");
459 	}
460 }
461 
iterate(void)462 tcu::TestStatus	BinaryAtomicInstanceBase::iterate (void)
463 {
464 	const VkDevice			device				= m_context.getDevice();
465 	const DeviceInterface&	deviceInterface		= m_context.getDeviceInterface();
466 	const VkQueue			queue				= m_context.getUniversalQueue();
467 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
468 	Allocator&				allocator			= m_context.getDefaultAllocator();
469 	const VkDeviceSize		imageSizeInBytes	= tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
470 	const VkDeviceSize		outBuffSizeInBytes	= getOutputBufferSize();
471 
472 	checkRequirements();
473 
474 	const VkImageCreateInfo imageParams	=
475 	{
476 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,					// VkStructureType			sType;
477 		DE_NULL,												// const void*				pNext;
478 		(m_imageType == IMAGE_TYPE_CUBE ||
479 		 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
480 		 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
481 		 (VkImageCreateFlags)0u),								// VkImageCreateFlags		flags;
482 		mapImageType(m_imageType),								// VkImageType				imageType;
483 		mapTextureFormat(m_format),								// VkFormat					format;
484 		makeExtent3D(getLayerSize(m_imageType, m_imageSize)),	// VkExtent3D				extent;
485 		1u,														// deUint32					mipLevels;
486 		getNumLayers(m_imageType, m_imageSize),					// deUint32					arrayLayers;
487 		VK_SAMPLE_COUNT_1_BIT,									// VkSampleCountFlagBits	samples;
488 		VK_IMAGE_TILING_OPTIMAL,								// VkImageTiling			tiling;
489 		VK_IMAGE_USAGE_STORAGE_BIT |
490 		VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
491 		VK_IMAGE_USAGE_TRANSFER_DST_BIT,						// VkImageUsageFlags		usage;
492 		VK_SHARING_MODE_EXCLUSIVE,								// VkSharingMode			sharingMode;
493 		0u,														// deUint32					queueFamilyIndexCount;
494 		DE_NULL,												// const deUint32*			pQueueFamilyIndices;
495 		VK_IMAGE_LAYOUT_UNDEFINED,								// VkImageLayout			initialLayout;
496 	};
497 
498 	//Create the image that is going to store results of atomic operations
499 	m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
500 
501 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
502 
503 	m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
504 
505 	//Prepare the buffer with the initial data for the image
506 	const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
507 
508 	Allocation& inputBufferAllocation = inputBuffer.getAllocation();
509 
510 	//Prepare the initial data for the image
511 	const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
512 
513 	tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
514 	tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
515 
516 	for (deUint32 z = 0; z < gridSize.z(); z++)
517 	for (deUint32 y = 0; y < gridSize.y(); y++)
518 	for (deUint32 x = 0; x < gridSize.x(); x++)
519 	{
520 		inputPixelBuffer.setPixel(initialValue, x, y, z);
521 	}
522 
523 	flushAlloc(deviceInterface, device, inputBufferAllocation);
524 
525 	// Create a buffer to store shader output copied from result image
526 	m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
527 
528 	prepareResources();
529 
530 	prepareDescriptors();
531 
532 	// Create pipeline
533 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
534 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
535 	const Unique<VkPipeline>		pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
536 
537 	// Create command buffer
538 	const Unique<VkCommandPool>		cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
539 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
540 
541 	beginCommandBuffer(deviceInterface, *cmdBuffer);
542 
543 	deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
544 	deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
545 
546 	const vector<VkBufferImageCopy>	bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
547 	copyBufferToImage(deviceInterface, *cmdBuffer, *inputBuffer, imageSizeInBytes, bufferImageCopy, VK_IMAGE_ASPECT_COLOR_BIT, 1, getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
548 
549 	commandsBeforeCompute(*cmdBuffer);
550 
551 	deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
552 
553 	commandsAfterCompute(*cmdBuffer);
554 
555 	const VkBufferMemoryBarrier	outputBufferPreHostReadBarrier
556 		= makeBufferMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
557 									VK_ACCESS_HOST_READ_BIT,
558 									m_outputBuffer->get(),
559 									0ull,
560 									outBuffSizeInBytes);
561 
562 	deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
563 
564 	endCommandBuffer(deviceInterface, *cmdBuffer);
565 
566 	submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
567 
568 	Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
569 
570 	invalidateAlloc(deviceInterface, device, outputBufferAllocation);
571 
572 	if (verifyResult(outputBufferAllocation))
573 		return tcu::TestStatus::pass("Comparison succeeded");
574 	else
575 		return tcu::TestStatus::fail("Comparison failed");
576 }
577 
578 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
579 {
580 public:
581 
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation)582 						BinaryAtomicEndResultInstance  (Context&				context,
583 														const string&			name,
584 														const ImageType			imageType,
585 														const tcu::UVec3&		imageSize,
586 														const TextureFormat&	format,
587 														const AtomicOperation	operation)
588 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
589 
590 	virtual deUint32	getOutputBufferSize			   (void) const;
591 
prepareResources(void)592 	virtual void		prepareResources			   (void) {}
593 	virtual void		prepareDescriptors			   (void);
594 
commandsBeforeCompute(const VkCommandBuffer) const595 	virtual void		commandsBeforeCompute		   (const VkCommandBuffer) const {}
596 	virtual void		commandsAfterCompute		   (const VkCommandBuffer	cmdBuffer) const;
597 
598 	virtual bool		verifyResult				   (Allocation&				outputBufferAllocation) const;
599 };
600 
getOutputBufferSize(void) const601 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
602 {
603 	return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
604 }
605 
prepareDescriptors(void)606 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
607 {
608 	const VkDevice			device			= m_context.getDevice();
609 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
610 
611 	m_descriptorSetLayout =
612 		DescriptorSetLayoutBuilder()
613 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
614 		.build(deviceInterface, device);
615 
616 	m_descriptorPool =
617 		DescriptorPoolBuilder()
618 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
619 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
620 
621 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
622 
623 	const VkDescriptorImageInfo	descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
624 
625 	DescriptorSetUpdateBuilder()
626 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
627 		.update(deviceInterface, device);
628 }
629 
commandsAfterCompute(const VkCommandBuffer cmdBuffer) const630 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer	cmdBuffer) const
631 {
632 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
633 	const VkImageSubresourceRange	subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
634 
635 	const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
636 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
637 								VK_ACCESS_TRANSFER_READ_BIT,
638 								VK_IMAGE_LAYOUT_GENERAL,
639 								VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
640 								m_resultImage->get(),
641 								subresourceRange);
642 
643 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
644 
645 	const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
646 
647 	deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
648 }
649 
verifyResult(Allocation & outputBufferAllocation) const650 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
651 {
652 	const UVec3	gridSize			= getShaderGridSize(m_imageType, m_imageSize);
653 	const IVec3 extendedGridSize	= IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
654 
655 	tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
656 
657 	for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
658 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
659 	for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
660 	{
661 		deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
662 
663 		if (isOrderIndependentAtomicOperation(m_operation))
664 		{
665 			deInt32 reference = getOperationInitialValue(m_operation);
666 
667 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
668 			{
669 				const IVec3 gid(x + i*gridSize.x(), y, z);
670 				reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
671 			}
672 
673 			if (resultValue != reference)
674 				return false;
675 		}
676 		else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
677 		{
678 			// Check if the end result equals one of the atomic args.
679 			bool matchFound = false;
680 
681 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
682 			{
683 				const IVec3 gid(x + i*gridSize.x(), y, z);
684 				matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
685 			}
686 
687 			if (!matchFound)
688 				return false;
689 		}
690 		else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
691 		{
692 			// Check if the end result equals one of the atomic args.
693 			bool matchFound = false;
694 
695 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
696 			{
697 				const IVec3 gid(x + i*gridSize.x(), y, z);
698 				matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
699 			}
700 
701 			if (!matchFound)
702 				return false;
703 		}
704 		else
705 			DE_ASSERT(false);
706 	}
707 	return true;
708 }
709 
createInstance(Context & context) const710 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
711 {
712 	return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
713 }
714 
715 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
716 {
717 public:
718 
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation)719 						BinaryAtomicIntermValuesInstance   (Context&				context,
720 															const string&			name,
721 															const ImageType			imageType,
722 															const tcu::UVec3&		imageSize,
723 															const TextureFormat&	format,
724 															const AtomicOperation	operation)
725 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
726 
727 	virtual deUint32	getOutputBufferSize				   (void) const;
728 
729 	virtual void		prepareResources				   (void);
730 	virtual void		prepareDescriptors				   (void);
731 
732 	virtual void		commandsBeforeCompute			   (const VkCommandBuffer	cmdBuffer) const;
733 	virtual void		commandsAfterCompute			   (const VkCommandBuffer	cmdBuffer) const;
734 
735 	virtual bool		verifyResult					   (Allocation&				outputBufferAllocation) const;
736 
737 protected:
738 
739 	bool				verifyRecursive					   (const deInt32			index,
740 															const deInt32			valueSoFar,
741 															bool					argsUsed[NUM_INVOCATIONS_PER_PIXEL],
742 															const deInt32			atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
743 															const deInt32			resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
744 	de::MovePtr<Image>	m_intermResultsImage;
745 	Move<VkImageView>	m_intermResultsImageView;
746 };
747 
getOutputBufferSize(void) const748 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
749 {
750 	return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
751 }
752 
prepareResources(void)753 void BinaryAtomicIntermValuesInstance::prepareResources (void)
754 {
755 	const VkDevice			device			= m_context.getDevice();
756 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
757 	Allocator&				allocator		= m_context.getDefaultAllocator();
758 
759 	const UVec3 layerSize			= getLayerSize(m_imageType, m_imageSize);
760 	const bool  isCubeBasedImage	= (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
761 	const UVec3 extendedLayerSize	= isCubeBasedImage	? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
762 														: UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
763 
764 	const VkImageCreateInfo imageParams =
765 	{
766 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,		// VkStructureType			sType;
767 		DE_NULL,									// const void*				pNext;
768 		(m_imageType == IMAGE_TYPE_CUBE ||
769 		 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
770 		 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
771 		 (VkImageCreateFlags)0u),					// VkImageCreateFlags		flags;
772 		mapImageType(m_imageType),					// VkImageType				imageType;
773 		mapTextureFormat(m_format),					// VkFormat					format;
774 		makeExtent3D(extendedLayerSize),			// VkExtent3D				extent;
775 		1u,											// deUint32					mipLevels;
776 		getNumLayers(m_imageType, m_imageSize),		// deUint32					arrayLayers;
777 		VK_SAMPLE_COUNT_1_BIT,						// VkSampleCountFlagBits	samples;
778 		VK_IMAGE_TILING_OPTIMAL,					// VkImageTiling			tiling;
779 		VK_IMAGE_USAGE_STORAGE_BIT |
780 		VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
781 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode			sharingMode;
782 		0u,											// deUint32					queueFamilyIndexCount;
783 		DE_NULL,									// const deUint32*			pQueueFamilyIndices;
784 		VK_IMAGE_LAYOUT_UNDEFINED,					// VkImageLayout			initialLayout;
785 	};
786 
787 	m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
788 
789 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
790 
791 	m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
792 }
793 
prepareDescriptors(void)794 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
795 {
796 	const VkDevice			device			= m_context.getDevice();
797 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
798 
799 	m_descriptorSetLayout =
800 		DescriptorSetLayoutBuilder()
801 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
802 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
803 		.build(deviceInterface, device);
804 
805 	m_descriptorPool =
806 		DescriptorPoolBuilder()
807 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
808 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
809 
810 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
811 
812 	const VkDescriptorImageInfo	descResultImageInfo			= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
813 	const VkDescriptorImageInfo	descIntermResultsImageInfo	= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
814 
815 	DescriptorSetUpdateBuilder()
816 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
817 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
818 		.update(deviceInterface, device);
819 }
820 
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const821 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
822 {
823 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
824 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
825 
826 	const VkImageMemoryBarrier	imagePreDispatchBarrier =
827 		makeImageMemoryBarrier(	0u,
828 								VK_ACCESS_SHADER_WRITE_BIT,
829 								VK_IMAGE_LAYOUT_UNDEFINED,
830 								VK_IMAGE_LAYOUT_GENERAL,
831 								m_intermResultsImage->get(),
832 								subresourceRange);
833 
834 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
835 }
836 
commandsAfterCompute(const VkCommandBuffer cmdBuffer) const837 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
838 {
839 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
840 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
841 
842 	const VkImageMemoryBarrier	imagePostDispatchBarrier =
843 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
844 								VK_ACCESS_TRANSFER_READ_BIT,
845 								VK_IMAGE_LAYOUT_GENERAL,
846 								VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
847 								m_intermResultsImage->get(),
848 								subresourceRange);
849 
850 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
851 
852 	const UVec3					layerSize				= getLayerSize(m_imageType, m_imageSize);
853 	const UVec3					extendedLayerSize		= UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
854 	const VkBufferImageCopy		bufferImageCopyParams	= makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
855 
856 	deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
857 }
858 
verifyResult(Allocation & outputBufferAllocation) const859 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&	outputBufferAllocation) const
860 {
861 	const UVec3	gridSize		 = getShaderGridSize(m_imageType, m_imageSize);
862 	const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
863 
864 	tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
865 
866 	for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
867 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
868 	for (deUint32 x = 0; x < gridSize.x(); x++)
869 	{
870 		deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
871 		deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
872 		bool	argsUsed[NUM_INVOCATIONS_PER_PIXEL];
873 
874 		for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
875 		{
876 			IVec3 gid(x + i*gridSize.x(), y, z);
877 
878 			resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
879 			atomicArgs[i]	= getAtomicFuncArgument(m_operation, gid, extendedGridSize);
880 			argsUsed[i]		= false;
881 		}
882 
883 		// Verify that the return values form a valid sequence.
884 		if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
885 		{
886 			return false;
887 		}
888 	}
889 
890 	return true;
891 }
892 
verifyRecursive(const deInt32 index,const deInt32 valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const893 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32	index,
894 														const deInt32	valueSoFar,
895 														bool			argsUsed[NUM_INVOCATIONS_PER_PIXEL],
896 														const deInt32	atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
897 														const deInt32	resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
898 {
899 	if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
900 		return true;
901 
902 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
903 	{
904 		if (!argsUsed[i] && resultValues[i] == valueSoFar)
905 		{
906 			argsUsed[i] = true;
907 
908 			if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
909 			{
910 				return true;
911 			}
912 
913 			argsUsed[i] = false;
914 		}
915 	}
916 
917 	return false;
918 }
919 
createInstance(Context & context) const920 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
921 {
922 	return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
923 }
924 
925 } // anonymous ns
926 
createImageAtomicOperationTests(tcu::TestContext & testCtx)927 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
928 {
929 	de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
930 
931 	struct ImageParams
932 	{
933 		ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
934 			: m_imageType	(imageType)
935 			, m_imageSize	(imageSize)
936 		{
937 		}
938 		const ImageType		m_imageType;
939 		const tcu::UVec3	m_imageSize;
940 	};
941 
942 	static const ImageParams imageParamsArray[] =
943 	{
944 		ImageParams(IMAGE_TYPE_1D,			tcu::UVec3(64u, 1u, 1u)),
945 		ImageParams(IMAGE_TYPE_1D_ARRAY,	tcu::UVec3(64u, 1u, 8u)),
946 		ImageParams(IMAGE_TYPE_2D,			tcu::UVec3(64u, 64u, 1u)),
947 		ImageParams(IMAGE_TYPE_2D_ARRAY,	tcu::UVec3(64u, 64u, 8u)),
948 		ImageParams(IMAGE_TYPE_3D,			tcu::UVec3(64u, 64u, 8u)),
949 		ImageParams(IMAGE_TYPE_CUBE,		tcu::UVec3(64u, 64u, 1u)),
950 		ImageParams(IMAGE_TYPE_CUBE_ARRAY,	tcu::UVec3(64u, 64u, 2u))
951 	};
952 
953 	static const tcu::TextureFormat formats[] =
954 	{
955 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
956 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
957 	};
958 
959 	for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
960 	{
961 		const AtomicOperation operation = (AtomicOperation)operationI;
962 
963 		de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
964 
965 		for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
966 		{
967 			const ImageType	 imageType = imageParamsArray[imageTypeNdx].m_imageType;
968 			const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
969 
970 			de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
971 
972 			for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
973 			{
974 				const TextureFormat&	format		= formats[formatNdx];
975 				const std::string		formatName	= getShaderImageFormatQualifier(format);
976 
977 				//!< Atomic case checks the end result of the operations, and not the intermediate return values
978 				const string caseEndResult = formatName + "_end_result";
979 				imageTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
980 
981 				//!< Atomic case checks the return values of the atomic function and not the end result.
982 				const string caseIntermValues = formatName + "_intermediate_values";
983 				imageTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
984 			}
985 
986 			operationGroup->addChild(imageTypeGroup.release());
987 		}
988 
989 		imageAtomicOperationsTests->addChild(operationGroup.release());
990 	}
991 
992 	return imageAtomicOperationsTests.release();
993 }
994 
995 } // image
996 } // vkt
997