1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Shader atomic operation tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fShaderAtomicOpTests.hpp"
25 #include "gluShaderProgram.hpp"
26 #include "gluShaderUtil.hpp"
27 #include "gluRenderContext.hpp"
28 #include "gluObjectWrapper.hpp"
29 #include "gluProgramInterfaceQuery.hpp"
30 #include "tcuVector.hpp"
31 #include "tcuTestLog.hpp"
32 #include "tcuVectorUtil.hpp"
33 #include "tcuFormatUtil.hpp"
34 #include "deStringUtil.hpp"
35 #include "deRandom.hpp"
36 #include "glwFunctions.hpp"
37 #include "glwEnums.hpp"
38 
39 #include <algorithm>
40 #include <set>
41 
42 namespace deqp
43 {
44 namespace gles31
45 {
46 namespace Functional
47 {
48 
49 using std::string;
50 using std::vector;
51 using tcu::TestLog;
52 using tcu::UVec3;
53 using std::set;
54 using namespace glu;
55 
56 template<typename T, int Size>
product(const tcu::Vector<T,Size> & v)57 static inline T product (const tcu::Vector<T, Size>& v)
58 {
59 	T res = v[0];
60 	for (int ndx = 1; ndx < Size; ndx++)
61 		res *= v[ndx];
62 	return res;
63 }
64 
65 class ShaderAtomicOpCase : public TestCase
66 {
67 public:
68 							ShaderAtomicOpCase	(Context& context, const char* name, const char* funcName, AtomicOperandType operandType, DataType type, Precision precision, const UVec3& workGroupSize);
69 							~ShaderAtomicOpCase	(void);
70 
71 	void					init				(void);
72 	void					deinit				(void);
73 	IterateResult			iterate				(void);
74 
75 protected:
76 	virtual void			getInputs			(int numValues, int stride, void* inputs) const = 0;
77 	virtual bool			verify				(int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const = 0;
78 
79 	const string			m_funcName;
80 	const AtomicOperandType	m_operandType;
81 	const DataType			m_type;
82 	const Precision			m_precision;
83 
84 	const UVec3				m_workGroupSize;
85 	const UVec3				m_numWorkGroups;
86 
87 	deUint32				m_initialValue;
88 
89 private:
90 							ShaderAtomicOpCase	(const ShaderAtomicOpCase& other);
91 	ShaderAtomicOpCase&		operator=			(const ShaderAtomicOpCase& other);
92 
93 	ShaderProgram*			m_program;
94 };
95 
ShaderAtomicOpCase(Context & context,const char * name,const char * funcName,AtomicOperandType operandType,DataType type,Precision precision,const UVec3 & workGroupSize)96 ShaderAtomicOpCase::ShaderAtomicOpCase (Context& context, const char* name, const char* funcName, AtomicOperandType operandType, DataType type, Precision precision, const UVec3& workGroupSize)
97 	: TestCase			(context, name, funcName)
98 	, m_funcName		(funcName)
99 	, m_operandType		(operandType)
100 	, m_type			(type)
101 	, m_precision		(precision)
102 	, m_workGroupSize	(workGroupSize)
103 	, m_numWorkGroups	(4,4,4)
104 	, m_initialValue	(0)
105 	, m_program			(DE_NULL)
106 {
107 }
108 
~ShaderAtomicOpCase(void)109 ShaderAtomicOpCase::~ShaderAtomicOpCase (void)
110 {
111 	ShaderAtomicOpCase::deinit();
112 }
113 
init(void)114 void ShaderAtomicOpCase::init (void)
115 {
116 	const bool			isSSBO		= m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE;
117 	const char*			precName	= getPrecisionName(m_precision);
118 	const char*			typeName	= getDataTypeName(m_type);
119 
120 	const DataType		outType		= isSSBO ? m_type : glu::TYPE_UINT;
121 	const char*			outTypeName	= getDataTypeName(outType);
122 
123 	const deUint32		numValues	= product(m_workGroupSize)*product(m_numWorkGroups);
124 	std::ostringstream	src;
125 
126 	src << glu::getGLSLVersionDeclaration(getContextTypeGLSLVersion(m_context.getRenderContext().getType())) << "\n"
127 		<< "layout(local_size_x = " << m_workGroupSize.x()
128 		<< ", local_size_y = " << m_workGroupSize.y()
129 		<< ", local_size_z = " << m_workGroupSize.z() << ") in;\n"
130 		<< "layout(binding = 0) buffer InOut\n"
131 		<< "{\n"
132 		<< "	" << precName << " " << typeName << " inputValues[" << numValues << "];\n"
133 		<< "	" << precName << " " << outTypeName << " outputValues[" << numValues << "];\n"
134 		<< "	" << (isSSBO ? "coherent " : "") << precName << " " << outTypeName << " groupValues[" << product(m_numWorkGroups) << "];\n"
135 		<< "} sb_inout;\n";
136 
137 	if (!isSSBO)
138 		src << "shared " << precName << " " << typeName << " s_var;\n";
139 
140 	src << "\n"
141 		<< "void main (void)\n"
142 		<< "{\n"
143 		<< "	uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
144 		<< "	uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
145 		<< "	uint globalOffs = localSize*globalNdx;\n"
146 		<< "	uint offset     = globalOffs + gl_LocalInvocationIndex;\n"
147 		<< "\n";
148 
149 	if (isSSBO)
150 	{
151 		DE_ASSERT(outType == m_type);
152 		src << "	sb_inout.outputValues[offset] = " << m_funcName << "(sb_inout.groupValues[globalNdx], sb_inout.inputValues[offset]);\n";
153 	}
154 	else
155 	{
156 		const string		castBeg	= outType != m_type ? (string(outTypeName) + "(") : string("");
157 		const char* const	castEnd	= outType != m_type ? ")" : "";
158 
159 		src << "	if (gl_LocalInvocationIndex == 0u)\n"
160 			<< "		s_var = " << typeName << "(" << tcu::toHex(m_initialValue) << "u);\n"
161 			<< "	barrier();\n"
162 			<< "	" << precName << " " << typeName << " res = " << m_funcName << "(s_var, sb_inout.inputValues[offset]);\n"
163 			<< "	sb_inout.outputValues[offset] = " << castBeg << "res" << castEnd << ";\n"
164 			<< "	barrier();\n"
165 			<< "	if (gl_LocalInvocationIndex == 0u)\n"
166 			<< "		sb_inout.groupValues[globalNdx] = " << castBeg << "s_var" << castEnd << ";\n";
167 	}
168 
169 	src << "}\n";
170 
171 	DE_ASSERT(!m_program);
172 	m_program = new ShaderProgram(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
173 
174 	m_testCtx.getLog() << *m_program;
175 
176 	if (!m_program->isOk())
177 	{
178 		delete m_program;
179 		m_program = DE_NULL;
180 		throw tcu::TestError("Compile failed");
181 	}
182 }
183 
deinit(void)184 void ShaderAtomicOpCase::deinit (void)
185 {
186 	delete m_program;
187 	m_program = DE_NULL;
188 }
189 
iterate(void)190 ShaderAtomicOpCase::IterateResult ShaderAtomicOpCase::iterate (void)
191 {
192 	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
193 	const deUint32				program			= m_program->getProgram();
194 	const Buffer				inoutBuffer		(m_context.getRenderContext());
195 	const deUint32				blockNdx		= gl.getProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, "InOut");
196 	const InterfaceBlockInfo	blockInfo		= getProgramInterfaceBlockInfo(gl, program, GL_SHADER_STORAGE_BLOCK, blockNdx);
197 	const deUint32				inVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.inputValues[0]");
198 	const InterfaceVariableInfo	inVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, inVarNdx);
199 	const deUint32				outVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.outputValues[0]");
200 	const InterfaceVariableInfo	outVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, outVarNdx);
201 	const deUint32				groupVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.groupValues[0]");
202 	const InterfaceVariableInfo	groupVarInfo	= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, groupVarNdx);
203 	const deUint32				numValues		= product(m_workGroupSize)*product(m_numWorkGroups);
204 
205 	TCU_CHECK(inVarInfo.arraySize == numValues &&
206 			  outVarInfo.arraySize == numValues &&
207 			  groupVarInfo.arraySize == product(m_numWorkGroups));
208 
209 	gl.useProgram(program);
210 
211 	// Setup buffer.
212 	{
213 		vector<deUint8> bufData(blockInfo.dataSize);
214 		std::fill(bufData.begin(), bufData.end(), 0);
215 
216 		getInputs((int)numValues, (int)inVarInfo.arrayStride, &bufData[0] + inVarInfo.offset);
217 
218 		if (m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE)
219 		{
220 			for (deUint32 valNdx = 0; valNdx < product(m_numWorkGroups); valNdx++)
221 				*(deUint32*)(&bufData[0] + groupVarInfo.offset + groupVarInfo.arrayStride*valNdx) = m_initialValue;
222 		}
223 
224 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inoutBuffer);
225 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, &bufData[0], GL_STATIC_READ);
226 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *inoutBuffer);
227 		GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
228 	}
229 
230 	gl.dispatchCompute(m_numWorkGroups.x(), m_numWorkGroups.y(), m_numWorkGroups.z());
231 
232 	// Read back and compare
233 	{
234 		const void*		resPtr		= gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, blockInfo.dataSize, GL_MAP_READ_BIT);
235 		bool			isOk		= true;
236 
237 		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
238 		TCU_CHECK(resPtr);
239 
240 		isOk = verify((int)numValues,
241 					  (int)inVarInfo.arrayStride, (const deUint8*)resPtr + inVarInfo.offset,
242 					  (int)outVarInfo.arrayStride, (const deUint8*)resPtr + outVarInfo.offset,
243 					  (int)groupVarInfo.arrayStride, (const deUint8*)resPtr + groupVarInfo.offset);
244 
245 		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
246 		GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
247 
248 		m_testCtx.setTestResult(isOk ? QP_TEST_RESULT_PASS	: QP_TEST_RESULT_FAIL,
249 								isOk ? "Pass"				: "Comparison failed");
250 	}
251 
252 	return STOP;
253 }
254 
255 class ShaderAtomicAddCase : public ShaderAtomicOpCase
256 {
257 public:
ShaderAtomicAddCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)258 	ShaderAtomicAddCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
259 		: ShaderAtomicOpCase(context, name, "atomicAdd", operandType, type, precision, UVec3(3,2,1))
260 	{
261 		m_initialValue = 1;
262 	}
263 
264 protected:
getInputs(int numValues,int stride,void * inputs) const265 	void getInputs (int numValues, int stride, void* inputs) const
266 	{
267 		de::Random	rnd			(deStringHash(getName()));
268 		const int	maxVal		= m_precision == PRECISION_LOWP ? 2 : 32;
269 		const int	minVal		= 1;
270 
271 		// \todo [2013-09-04 pyry] Negative values!
272 
273 		for (int valNdx = 0; valNdx < numValues; valNdx++)
274 			*(int*)((deUint8*)inputs + stride*valNdx) = rnd.getInt(minVal, maxVal);
275 	}
276 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const277 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
278 	{
279 		const int	workGroupSize	= (int)product(m_workGroupSize);
280 		const int	numWorkGroups	= numValues/workGroupSize;
281 
282 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
283 		{
284 			const int	groupOffset		= groupNdx*workGroupSize;
285 			const int	groupOutput		= *(const deInt32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
286 			set<int>	outValues;
287 			bool		maxFound		= false;
288 			int			valueSum		= (int)m_initialValue;
289 
290 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
291 			{
292 				const int inputValue = *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
293 				valueSum += inputValue;
294 			}
295 
296 			if (groupOutput != valueSum)
297 			{
298 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected sum " << valueSum << ", got " << groupOutput << TestLog::EndMessage;
299 				return false;
300 			}
301 
302 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
303 			{
304 				const int	inputValue		= *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
305 				const int	outputValue		= *(const deInt32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
306 
307 				if (!de::inRange(outputValue, (int)m_initialValue, valueSum-inputValue))
308 				{
309 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
310 														   << ": expected value in range [" << m_initialValue << ", " << (valueSum-inputValue)
311 														   << "], got " << outputValue
312 									   << TestLog::EndMessage;
313 					return false;
314 				}
315 
316 				if (outValues.find(outputValue) != outValues.end())
317 				{
318 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
319 														   << ": found duplicate value " << outputValue
320 									   << TestLog::EndMessage;
321 					return false;
322 				}
323 
324 				outValues.insert(outputValue);
325 				if (outputValue == valueSum-inputValue)
326 					maxFound = true;
327 			}
328 
329 			if (!maxFound)
330 			{
331 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find maximum expected value from group " << groupNdx << TestLog::EndMessage;
332 				return false;
333 			}
334 
335 			if (outValues.find((int)m_initialValue) == outValues.end())
336 			{
337 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx << TestLog::EndMessage;
338 				return false;
339 			}
340 		}
341 
342 		return true;
343 	}
344 };
345 
346 
getPrecisionNumIntegerBits(glu::Precision precision)347 static int getPrecisionNumIntegerBits (glu::Precision precision)
348 {
349 	switch (precision)
350 	{
351 		case glu::PRECISION_HIGHP:		return 32;
352 		case glu::PRECISION_MEDIUMP:	return 16;
353 		case glu::PRECISION_LOWP:		return 9;
354 		default:
355 			DE_ASSERT(false);
356 			return 0;
357 	}
358 }
359 
getPrecisionMask(int numPreciseBits)360 static deUint32 getPrecisionMask (int numPreciseBits)
361 {
362 	// \note: bit shift with larger or equal than var length is undefined, use 64 bit ints
363 	return (deUint32)((((deUint64)1u) << numPreciseBits) - 1) ;
364 }
365 
intEqualsAfterUintCast(deInt32 value,deUint32 casted,glu::Precision precision)366 static bool intEqualsAfterUintCast (deInt32 value, deUint32 casted, glu::Precision precision)
367 {
368 	// Bit format of 'casted' = [ uint -> highp uint promotion bits (0) ] [ sign extend bits (s) ] [ value bits ]
369 	//                                                                                             |--min len---|
370 	//                                                                    |---------------signed length---------|
371 	//                          |-------------------------------- highp uint length ----------------------------|
372 
373 	const deUint32	reference		= (deUint32)value;
374 	const int		signBitOn		= value < 0;
375 	const int		numPreciseBits	= getPrecisionNumIntegerBits(precision);
376 	const deUint32	preciseMask		= getPrecisionMask(numPreciseBits);
377 
378 	// Lowest N bits must match, N = minimum precision
379 	if ((reference & preciseMask) != (casted & preciseMask))
380 		return false;
381 
382 	// Other lowest bits must match the sign and the remaining (topmost) if any must be 0
383 	for (int signedIntegerLength = numPreciseBits; signedIntegerLength <= 32; ++signedIntegerLength)
384 	{
385 		const deUint32 signBits = (signBitOn) ? (getPrecisionMask(signedIntegerLength)) : (0u);
386 
387 		if ((signBits & ~preciseMask) == (casted & ~preciseMask))
388 			return true;
389 	}
390 	return false;
391 }
392 
containsAfterUintCast(const std::set<deInt32> & haystack,deUint32 needle,glu::Precision precision)393 static bool containsAfterUintCast (const std::set<deInt32>& haystack, deUint32 needle, glu::Precision precision)
394 {
395 	for (std::set<deInt32>::const_iterator it = haystack.begin(); it != haystack.end(); ++it)
396 		if (intEqualsAfterUintCast(*it, needle, precision))
397 			return true;
398 	return false;
399 }
400 
containsAfterUintCast(const std::set<deUint32> & haystack,deInt32 needle,glu::Precision precision)401 static bool containsAfterUintCast (const std::set<deUint32>& haystack, deInt32 needle, glu::Precision precision)
402 {
403 	for (std::set<deUint32>::const_iterator it = haystack.begin(); it != haystack.end(); ++it)
404 		if (intEqualsAfterUintCast(needle, *it, precision))
405 			return true;
406 	return false;
407 }
408 
409 class ShaderAtomicMinCase : public ShaderAtomicOpCase
410 {
411 public:
ShaderAtomicMinCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)412 	ShaderAtomicMinCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
413 		: ShaderAtomicOpCase(context, name, "atomicMin", operandType, type, precision, UVec3(3,2,1))
414 	{
415 		m_initialValue = m_precision == PRECISION_LOWP ? 100 : 1000;
416 	}
417 
418 protected:
getInputs(int numValues,int stride,void * inputs) const419 	void getInputs (int numValues, int stride, void* inputs) const
420 	{
421 		de::Random	rnd			(deStringHash(getName()));
422 		const bool	isSigned	= m_type == TYPE_INT;
423 		const int	maxVal		= m_precision == PRECISION_LOWP ? 100 : 1000;
424 		const int	minVal		= isSigned ? -maxVal : 0;
425 
426 		for (int valNdx = 0; valNdx < numValues; valNdx++)
427 			*(int*)((deUint8*)inputs + stride*valNdx) = rnd.getInt(minVal, maxVal);
428 	}
429 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const430 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
431 	{
432 		const int	workGroupSize	= (int)product(m_workGroupSize);
433 		const int	numWorkGroups	= numValues/workGroupSize;
434 		bool		anyError		= false;
435 
436 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
437 		{
438 			const int		groupOffset		= groupNdx*workGroupSize;
439 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
440 			set<deInt32>	inValues;
441 			set<deUint32>	outValues;
442 			int				minValue		= (int)m_initialValue;
443 
444 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
445 			{
446 				const deInt32 inputValue = *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
447 				inValues.insert(inputValue);
448 				minValue = de::min(inputValue, minValue);
449 			}
450 
451 			if (!intEqualsAfterUintCast(minValue, groupOutput, m_precision))
452 			{
453 				m_testCtx.getLog()
454 					<< TestLog::Message
455 					<< "ERROR: at group " << groupNdx
456 					<< ": expected minimum " << minValue << " (" << tcu::Format::Hex<8>((deUint32)minValue) << ")"
457 					<< ", got " << groupOutput << " (" << tcu::Format::Hex<8>(groupOutput) << ")"
458 					<< TestLog::EndMessage;
459 				anyError = true;
460 			}
461 
462 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
463 			{
464 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
465 
466 				if (!containsAfterUintCast(inValues, outputValue, m_precision) &&
467 					!intEqualsAfterUintCast((deInt32)m_initialValue, outputValue, m_precision))
468 				{
469 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
470 														   << ": found unexpected value " << outputValue
471 														   << " (" << tcu::Format::Hex<8>(outputValue) << ")"
472 									   << TestLog::EndMessage;
473 					anyError = true;
474 				}
475 
476 				outValues.insert(outputValue);
477 			}
478 
479 			if (!containsAfterUintCast(outValues, (int)m_initialValue, m_precision))
480 			{
481 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx << TestLog::EndMessage;
482 				anyError = true;
483 			}
484 		}
485 
486 		return !anyError;
487 	}
488 };
489 
490 class ShaderAtomicMaxCase : public ShaderAtomicOpCase
491 {
492 public:
ShaderAtomicMaxCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)493 	ShaderAtomicMaxCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
494 		: ShaderAtomicOpCase(context, name, "atomicMax", operandType, type, precision, UVec3(3,2,1))
495 	{
496 		const bool isSigned = m_type == TYPE_INT;
497 		m_initialValue = isSigned ? (m_precision == PRECISION_LOWP ? -100 : -1000) : 0;
498 	}
499 
500 protected:
getInputs(int numValues,int stride,void * inputs) const501 	void getInputs (int numValues, int stride, void* inputs) const
502 	{
503 		de::Random	rnd			(deStringHash(getName()));
504 		const bool	isSigned	= m_type == TYPE_INT;
505 		const int	maxVal		= m_precision == PRECISION_LOWP ? 100 : 1000;
506 		const int	minVal		= isSigned ? -maxVal : 0;
507 
508 		for (int valNdx = 0; valNdx < numValues; valNdx++)
509 			*(int*)((deUint8*)inputs + stride*valNdx) = rnd.getInt(minVal, maxVal);
510 	}
511 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const512 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
513 	{
514 		const int	workGroupSize	= (int)product(m_workGroupSize);
515 		const int	numWorkGroups	= numValues/workGroupSize;
516 		bool		anyError		= false;
517 
518 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
519 		{
520 			const int		groupOffset		= groupNdx*workGroupSize;
521 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
522 			set<int>		inValues;
523 			set<deUint32>	outValues;
524 			int				maxValue		= (int)m_initialValue;
525 
526 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
527 			{
528 				const deInt32 inputValue = *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
529 				inValues.insert(inputValue);
530 				maxValue = de::max(maxValue, inputValue);
531 			}
532 
533 			if (!intEqualsAfterUintCast(maxValue, groupOutput, m_precision))
534 			{
535 				m_testCtx.getLog()
536 					<< TestLog::Message
537 					<< "ERROR: at group " << groupNdx
538 					<< ": expected maximum " << maxValue << " (" << tcu::Format::Hex<8>((deUint32)maxValue) << ")"
539 					<< ", got " << groupOutput << " (" << tcu::Format::Hex<8>(groupOutput) << ")"
540 					<< TestLog::EndMessage;
541 				anyError = true;
542 			}
543 
544 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
545 			{
546 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
547 
548 				if (!containsAfterUintCast(inValues, outputValue, m_precision) &&
549 					!intEqualsAfterUintCast((deInt32)m_initialValue, outputValue, m_precision))
550 				{
551 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
552 														   << ": found unexpected value " << outputValue
553 														   << " (" << tcu::Format::Hex<8>(outputValue) << ")"
554 									   << TestLog::EndMessage;
555 					anyError = true;
556 				}
557 
558 				outValues.insert(outputValue);
559 			}
560 
561 			if (!containsAfterUintCast(outValues, (int)m_initialValue, m_precision))
562 			{
563 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx << TestLog::EndMessage;
564 				anyError = true;
565 			}
566 		}
567 
568 		return !anyError;
569 	}
570 };
571 
572 class ShaderAtomicAndCase : public ShaderAtomicOpCase
573 {
574 public:
ShaderAtomicAndCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)575 	ShaderAtomicAndCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
576 		: ShaderAtomicOpCase(context, name, "atomicAnd", operandType, type, precision, UVec3(3,2,1))
577 	{
578 		const int		numBits		= m_precision == PRECISION_HIGHP ? 32 :
579 									  m_precision == PRECISION_MEDIUMP ? 16 : 8;
580 		const deUint32	valueMask	= numBits == 32 ? ~0u : (1u<<numBits)-1u;
581 		m_initialValue = ~((1u<<(numBits-1u)) | 1u) & valueMask; // All bits except lowest and highest set.
582 	}
583 
584 protected:
getInputs(int numValues,int stride,void * inputs) const585 	void getInputs (int numValues, int stride, void* inputs) const
586 	{
587 		de::Random		rnd				(deStringHash(getName()));
588 		const int		workGroupSize	= (int)product(m_workGroupSize);
589 		const int		numWorkGroups	= numValues/workGroupSize;
590 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
591 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
592 		const deUint32	valueMask		= numBits == 32 ? ~0u : (1u<<numBits)-1u;
593 
594 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
595 		{
596 			const int		groupOffset		= groupNdx*workGroupSize;
597 			const deUint32	groupMask		= 1<<rnd.getInt(0, numBits-2); // One bit is always set.
598 
599 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
600 				*(deUint32*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = (rnd.getUint32() & valueMask) | groupMask;
601 		}
602 	}
603 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const604 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
605 	{
606 		const int		workGroupSize	= (int)product(m_workGroupSize);
607 		const int		numWorkGroups	= numValues/workGroupSize;
608 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
609 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
610 		const deUint32	compareMask		= (m_type == TYPE_UINT || numBits == 32) ? ~0u : (1u<<numBits)-1u;
611 
612 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
613 		{
614 			const int		groupOffset		= groupNdx*workGroupSize;
615 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
616 			deUint32		expectedValue	= m_initialValue;
617 
618 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
619 			{
620 				const deUint32 inputValue = *(const deUint32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
621 				expectedValue &= inputValue;
622 			}
623 
624 			if ((groupOutput & compareMask) != (expectedValue & compareMask))
625 			{
626 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected " << tcu::toHex(expectedValue) << ", got " << tcu::toHex(groupOutput) << TestLog::EndMessage;
627 				return false;
628 			}
629 
630 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
631 			{
632 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
633 
634 				if ((compareMask & (outputValue & ~m_initialValue)) != 0)
635 				{
636 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
637 														   << ": found unexpected value " << tcu::toHex(outputValue)
638 									   << TestLog::EndMessage;
639 					return false;
640 				}
641 			}
642 		}
643 
644 		return true;
645 	}
646 };
647 
648 class ShaderAtomicOrCase : public ShaderAtomicOpCase
649 {
650 public:
ShaderAtomicOrCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)651 	ShaderAtomicOrCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
652 		: ShaderAtomicOpCase(context, name, "atomicOr", operandType, type, precision, UVec3(3,2,1))
653 	{
654 		m_initialValue = 1u; // Lowest bit set.
655 	}
656 
657 protected:
getInputs(int numValues,int stride,void * inputs) const658 	void getInputs (int numValues, int stride, void* inputs) const
659 	{
660 		de::Random		rnd				(deStringHash(getName()));
661 		const int		workGroupSize	= (int)product(m_workGroupSize);
662 		const int		numWorkGroups	= numValues/workGroupSize;
663 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
664 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
665 
666 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
667 		{
668 			const int groupOffset = groupNdx*workGroupSize;
669 
670 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
671 				*(deUint32*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = 1u<<rnd.getInt(0, numBits-1);
672 		}
673 	}
674 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const675 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
676 	{
677 		const int		workGroupSize	= (int)product(m_workGroupSize);
678 		const int		numWorkGroups	= numValues/workGroupSize;
679 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
680 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
681 		const deUint32	compareMask		= (m_type == TYPE_UINT || numBits == 32) ? ~0u : (1u<<numBits)-1u;
682 
683 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
684 		{
685 			const int		groupOffset		= groupNdx*workGroupSize;
686 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
687 			deUint32		expectedValue	= m_initialValue;
688 
689 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
690 			{
691 				const deUint32 inputValue = *(const deUint32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
692 				expectedValue |= inputValue;
693 			}
694 
695 			if ((groupOutput & compareMask) != (expectedValue & compareMask))
696 			{
697 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected " << tcu::toHex(expectedValue) << ", got " << tcu::toHex(groupOutput) << TestLog::EndMessage;
698 				return false;
699 			}
700 
701 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
702 			{
703 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
704 
705 				if ((compareMask & (outputValue & m_initialValue)) == 0)
706 				{
707 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
708 														   << ": found unexpected value " << tcu::toHex(outputValue)
709 									   << TestLog::EndMessage;
710 					return false;
711 				}
712 			}
713 		}
714 
715 		return true;
716 	}
717 };
718 
719 class ShaderAtomicXorCase : public ShaderAtomicOpCase
720 {
721 public:
ShaderAtomicXorCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)722 	ShaderAtomicXorCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
723 		: ShaderAtomicOpCase(context, name, "atomicXor", operandType, type, precision, UVec3(3,2,1))
724 	{
725 		m_initialValue = 0;
726 	}
727 
728 protected:
getInputs(int numValues,int stride,void * inputs) const729 	void getInputs (int numValues, int stride, void* inputs) const
730 	{
731 		de::Random		rnd				(deStringHash(getName()));
732 		const int		workGroupSize	= (int)product(m_workGroupSize);
733 		const int		numWorkGroups	= numValues/workGroupSize;
734 
735 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
736 		{
737 			const int groupOffset = groupNdx*workGroupSize;
738 
739 			// First uses random bit-pattern.
740 			*(deUint32*)((deUint8*)inputs + stride*(groupOffset)) = rnd.getUint32();
741 
742 			// Rest have either all or no bits set.
743 			for (int localNdx = 1; localNdx < workGroupSize; localNdx++)
744 				*(deUint32*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = rnd.getBool() ? ~0u : 0u;
745 		}
746 	}
747 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const748 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
749 	{
750 		const int		workGroupSize	= (int)product(m_workGroupSize);
751 		const int		numWorkGroups	= numValues/workGroupSize;
752 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
753 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
754 		const deUint32	compareMask		= numBits == 32 ? ~0u : (1u<<numBits)-1u;
755 
756 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
757 		{
758 			const int		groupOffset		= groupNdx*workGroupSize;
759 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
760 			const deUint32	randomValue		= *(const deInt32*)((const deUint8*)inputs + inputStride*groupOffset);
761 			const deUint32	expected0		= randomValue ^ 0u;
762 			const deUint32	expected1		= randomValue ^ ~0u;
763 			int				numXorZeros		= (m_initialValue == 0) ? 1 : 0;
764 
765 			for (int localNdx = 1; localNdx < workGroupSize; localNdx++)
766 			{
767 				const deUint32 inputValue = *(const deUint32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
768 				if (inputValue == 0)
769 					numXorZeros += 1;
770 			}
771 
772 			const deUint32 expected = (numXorZeros%2 == 0) ? expected0 : expected1;
773 
774 			if ((groupOutput & compareMask) != (expected & compareMask))
775 			{
776 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected " << tcu::toHex(expected0)
777 													   << " or " << tcu::toHex(expected1) << " (compare mask " << tcu::toHex(compareMask)
778 													   << "), got " << tcu::toHex(groupOutput) << TestLog::EndMessage;
779 				return false;
780 			}
781 
782 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
783 			{
784 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
785 
786 				if ((outputValue & compareMask) != 0 &&
787 					(outputValue & compareMask) != compareMask &&
788 					(outputValue & compareMask) != (expected0&compareMask) &&
789 					(outputValue & compareMask) != (expected1&compareMask))
790 				{
791 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
792 														   << ": found unexpected value " << tcu::toHex(outputValue)
793 									   << TestLog::EndMessage;
794 					return false;
795 				}
796 			}
797 		}
798 
799 		return true;
800 	}
801 };
802 
803 class ShaderAtomicExchangeCase : public ShaderAtomicOpCase
804 {
805 public:
ShaderAtomicExchangeCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)806 	ShaderAtomicExchangeCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
807 		: ShaderAtomicOpCase(context, name, "atomicExchange", operandType, type, precision, UVec3(3,2,1))
808 	{
809 		m_initialValue = 0;
810 	}
811 
812 protected:
getInputs(int numValues,int stride,void * inputs) const813 	void getInputs (int numValues, int stride, void* inputs) const
814 	{
815 		const int	workGroupSize	= (int)product(m_workGroupSize);
816 		const int	numWorkGroups	= numValues/workGroupSize;
817 
818 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
819 		{
820 			const int groupOffset = groupNdx*workGroupSize;
821 
822 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
823 				*(int*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = localNdx+1;
824 		}
825 	}
826 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const827 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
828 	{
829 		const int	workGroupSize	= (int)product(m_workGroupSize);
830 		const int	numWorkGroups	= numValues/workGroupSize;
831 
832 		DE_UNREF(inputStride && inputs);
833 
834 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
835 		{
836 			const int	groupOffset		= groupNdx*workGroupSize;
837 			const int	groupOutput		= *(const deInt32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
838 			set<int>	usedValues;
839 
840 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
841 			{
842 				const int outputValue = *(const deInt32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
843 
844 				if (!de::inRange(outputValue, 0, workGroupSize) || usedValues.find(outputValue) != usedValues.end())
845 				{
846 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
847 														   << ": found unexpected value " << outputValue
848 									   << TestLog::EndMessage;
849 					return false;
850 				}
851 				usedValues.insert(outputValue);
852 			}
853 
854 			if (!de::inRange(groupOutput, 0, workGroupSize) || usedValues.find(groupOutput) != usedValues.end())
855 			{
856 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": unexpected final value" << groupOutput << TestLog::EndMessage;
857 				return false;
858 			}
859 		}
860 
861 		return true;
862 	}
863 };
864 
865 class ShaderAtomicCompSwapCase : public TestCase
866 {
867 public:
868 									ShaderAtomicCompSwapCase	(Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision);
869 									~ShaderAtomicCompSwapCase	(void);
870 
871 	void							init						(void);
872 	void							deinit						(void);
873 	IterateResult					iterate						(void);
874 
875 protected:
876 
877 private:
878 									ShaderAtomicCompSwapCase	(const ShaderAtomicCompSwapCase& other);
879 	ShaderAtomicCompSwapCase&		operator=					(const ShaderAtomicCompSwapCase& other);
880 
881 	const AtomicOperandType			m_operandType;
882 	const DataType					m_type;
883 	const Precision					m_precision;
884 
885 	const UVec3						m_workGroupSize;
886 	const UVec3						m_numWorkGroups;
887 
888 	ShaderProgram*					m_program;
889 };
890 
ShaderAtomicCompSwapCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)891 ShaderAtomicCompSwapCase::ShaderAtomicCompSwapCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
892 	: TestCase			(context, name, "atomicCompSwap() Test")
893 	, m_operandType		(operandType)
894 	, m_type			(type)
895 	, m_precision		(precision)
896 	, m_workGroupSize	(3,2,1)
897 	, m_numWorkGroups	(4,4,4)
898 	, m_program			(DE_NULL)
899 {
900 }
901 
~ShaderAtomicCompSwapCase(void)902 ShaderAtomicCompSwapCase::~ShaderAtomicCompSwapCase (void)
903 {
904 	ShaderAtomicCompSwapCase::deinit();
905 }
906 
init(void)907 void ShaderAtomicCompSwapCase::init (void)
908 {
909 	const bool			isSSBO		= m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE;
910 	const char*			precName	= getPrecisionName(m_precision);
911 	const char*			typeName	= getDataTypeName(m_type);
912 	const deUint32		numValues	= product(m_workGroupSize)*product(m_numWorkGroups);
913 	std::ostringstream	src;
914 
915 	src << "#version 310 es\n"
916 		<< "layout(local_size_x = " << m_workGroupSize.x()
917 		<< ", local_size_y = " << m_workGroupSize.y()
918 		<< ", local_size_z = " << m_workGroupSize.z() << ") in;\n"
919 		<< "layout(binding = 0) buffer InOut\n"
920 		<< "{\n"
921 		<< "	" << precName << " " << typeName << " compareValues[" << numValues << "];\n"
922 		<< "	" << precName << " " << typeName << " exchangeValues[" << numValues << "];\n"
923 		<< "	" << precName << " " << typeName << " outputValues[" << numValues << "];\n"
924 		<< "	" << (isSSBO ? "coherent " : "") << precName << " " << typeName << " groupValues[" << product(m_numWorkGroups) << "];\n"
925 		<< "} sb_inout;\n";
926 
927 	if (!isSSBO)
928 		src << "shared " << precName << " " << typeName << " s_var;\n";
929 
930 	src << "\n"
931 		<< "void main (void)\n"
932 		<< "{\n"
933 		<< "	uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
934 		<< "	uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
935 		<< "	uint globalOffs = localSize*globalNdx;\n"
936 		<< "	uint offset     = globalOffs + gl_LocalInvocationIndex;\n"
937 		<< "\n";
938 
939 	if (!isSSBO)
940 	{
941 		src << "	if (gl_LocalInvocationIndex == 0u)\n"
942 			<< "		s_var = " << typeName << "(" << 0 << ");\n"
943 			<< "\n";
944 	}
945 
946 	src << "	" << precName << " " << typeName << " compare = sb_inout.compareValues[offset];\n"
947 		<< "	" << precName << " " << typeName << " exchange = sb_inout.exchangeValues[offset];\n"
948 		<< "	" << precName << " " << typeName << " result;\n"
949 		<< "	bool swapDone = false;\n"
950 		<< "\n"
951 		<< "	for (uint ndx = 0u; ndx < localSize; ndx++)\n"
952 		<< "	{\n"
953 		<< "		barrier();\n"
954 		<< "		if (!swapDone)\n"
955 		<< "		{\n"
956 		<< "			result = atomicCompSwap(" << (isSSBO ? "sb_inout.groupValues[globalNdx]" : "s_var") << ", compare, exchange);\n"
957 		<< "			if (result == compare)\n"
958 		<< "				swapDone = true;\n"
959 		<< "		}\n"
960 		<< "	}\n"
961 		<< "\n"
962 		<< "	sb_inout.outputValues[offset] = result;\n";
963 
964 	if (!isSSBO)
965 	{
966 		src << "	barrier();\n"
967 			<< "	if (gl_LocalInvocationIndex == 0u)\n"
968 			<< "		sb_inout.groupValues[globalNdx] = s_var;\n";
969 	}
970 
971 	src << "}\n";
972 
973 	DE_ASSERT(!m_program);
974 	m_program = new ShaderProgram(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
975 
976 	m_testCtx.getLog() << *m_program;
977 
978 	if (!m_program->isOk())
979 	{
980 		delete m_program;
981 		m_program = DE_NULL;
982 		throw tcu::TestError("Compile failed");
983 	}
984 }
985 
deinit(void)986 void ShaderAtomicCompSwapCase::deinit (void)
987 {
988 	delete m_program;
989 	m_program = DE_NULL;
990 }
991 
iterate(void)992 ShaderAtomicOpCase::IterateResult ShaderAtomicCompSwapCase::iterate (void)
993 {
994 	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
995 	const deUint32				program			= m_program->getProgram();
996 	const Buffer				inoutBuffer		(m_context.getRenderContext());
997 	const deUint32				blockNdx		= gl.getProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, "InOut");
998 	const InterfaceBlockInfo	blockInfo		= getProgramInterfaceBlockInfo(gl, program, GL_SHADER_STORAGE_BLOCK, blockNdx);
999 	const deUint32				cmpVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.compareValues[0]");
1000 	const InterfaceVariableInfo	cmpVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, cmpVarNdx);
1001 	const deUint32				exhVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.exchangeValues[0]");
1002 	const InterfaceVariableInfo	exhVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, exhVarNdx);
1003 	const deUint32				outVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.outputValues[0]");
1004 	const InterfaceVariableInfo	outVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, outVarNdx);
1005 	const deUint32				groupVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.groupValues[0]");
1006 	const InterfaceVariableInfo	groupVarInfo	= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, groupVarNdx);
1007 	const deUint32				numValues		= product(m_workGroupSize)*product(m_numWorkGroups);
1008 
1009 	TCU_CHECK(cmpVarInfo.arraySize == numValues &&
1010 			  exhVarInfo.arraySize == numValues &&
1011 			  outVarInfo.arraySize == numValues &&
1012 			  groupVarInfo.arraySize == product(m_numWorkGroups));
1013 
1014 	gl.useProgram(program);
1015 
1016 	// \todo [2013-09-05 pyry] Use randomized input values!
1017 
1018 	// Setup buffer.
1019 	{
1020 		const deUint32	workGroupSize	= product(m_workGroupSize);
1021 		vector<deUint8>	bufData			(blockInfo.dataSize);
1022 
1023 		std::fill(bufData.begin(), bufData.end(), 0);
1024 
1025 		for (deUint32 ndx = 0; ndx < numValues; ndx++)
1026 			*(deUint32*)(&bufData[0] + cmpVarInfo.offset + cmpVarInfo.arrayStride*ndx) = ndx%workGroupSize;
1027 
1028 		for (deUint32 ndx = 0; ndx < numValues; ndx++)
1029 			*(deUint32*)(&bufData[0] + exhVarInfo.offset + exhVarInfo.arrayStride*ndx) = (ndx%workGroupSize)+1;
1030 
1031 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inoutBuffer);
1032 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, &bufData[0], GL_STATIC_READ);
1033 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *inoutBuffer);
1034 		GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1035 	}
1036 
1037 	gl.dispatchCompute(m_numWorkGroups.x(), m_numWorkGroups.y(), m_numWorkGroups.z());
1038 
1039 	// Read back and compare
1040 	{
1041 		const void*		resPtr			= gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, blockInfo.dataSize, GL_MAP_READ_BIT);
1042 		const int		numWorkGroups	= (int)product(m_numWorkGroups);
1043 		const int		workGroupSize	= (int)product(m_workGroupSize);
1044 		bool			isOk			= true;
1045 
1046 		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1047 		TCU_CHECK(resPtr);
1048 
1049 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
1050 		{
1051 			const int	groupOffset		= groupNdx*workGroupSize;
1052 			const int	groupOutput		= *(const deInt32*)((const deUint8*)resPtr + groupVarInfo.offset + groupNdx*groupVarInfo.arrayStride);
1053 
1054 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
1055 			{
1056 				const int	refValue		= localNdx;
1057 				const int	outputValue		= *(const deInt32*)((const deUint8*)resPtr + outVarInfo.offset + outVarInfo.arrayStride*(groupOffset+localNdx));
1058 
1059 				if (outputValue != refValue)
1060 				{
1061 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
1062 														   << ": expected " << refValue << ", got " << outputValue
1063 									   << TestLog::EndMessage;
1064 					isOk = false;
1065 					break;
1066 				}
1067 			}
1068 
1069 			if (groupOutput != workGroupSize)
1070 			{
1071 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected" << workGroupSize << ", got " << groupOutput << TestLog::EndMessage;
1072 				isOk = false;
1073 				break;
1074 			}
1075 		}
1076 
1077 		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1078 		GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1079 
1080 		m_testCtx.setTestResult(isOk ? QP_TEST_RESULT_PASS	: QP_TEST_RESULT_FAIL,
1081 								isOk ? "Pass"				: "Comparison failed");
1082 	}
1083 
1084 	return STOP;
1085 }
1086 
ShaderAtomicOpTests(Context & context,const char * name,AtomicOperandType operandType)1087 ShaderAtomicOpTests::ShaderAtomicOpTests (Context& context, const char* name, AtomicOperandType operandType)
1088 	: TestCaseGroup	(context, name, "Atomic Operation Tests")
1089 	, m_operandType	(operandType)
1090 {
1091 }
1092 
~ShaderAtomicOpTests(void)1093 ShaderAtomicOpTests::~ShaderAtomicOpTests (void)
1094 {
1095 }
1096 
1097 template<typename T>
createAtomicOpGroup(Context & context,AtomicOperandType operandType,const char * groupName)1098 static tcu::TestCaseGroup* createAtomicOpGroup (Context& context, AtomicOperandType operandType, const char* groupName)
1099 {
1100 	tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(context.getTestContext(), groupName, (string("Atomic ") + groupName).c_str());
1101 	try
1102 	{
1103 		for (int precNdx = 0; precNdx < PRECISION_LAST; precNdx++)
1104 		{
1105 			for (int typeNdx = 0; typeNdx < 2; typeNdx++)
1106 			{
1107 				const Precision		precision		= Precision(precNdx);
1108 				const DataType		type			= typeNdx > 0 ? TYPE_INT : TYPE_UINT;
1109 				const string		caseName		= string(getPrecisionName(precision)) + "_" + getDataTypeName(type);
1110 
1111 				group->addChild(new T(context, caseName.c_str(), operandType, type, precision));
1112 			}
1113 		}
1114 
1115 		return group;
1116 	}
1117 	catch (...)
1118 	{
1119 		delete group;
1120 		throw;
1121 	}
1122 }
1123 
init(void)1124 void ShaderAtomicOpTests::init (void)
1125 {
1126 	addChild(createAtomicOpGroup<ShaderAtomicAddCase>		(m_context, m_operandType, "add"));
1127 	addChild(createAtomicOpGroup<ShaderAtomicMinCase>		(m_context, m_operandType, "min"));
1128 	addChild(createAtomicOpGroup<ShaderAtomicMaxCase>		(m_context, m_operandType, "max"));
1129 	addChild(createAtomicOpGroup<ShaderAtomicAndCase>		(m_context, m_operandType, "and"));
1130 	addChild(createAtomicOpGroup<ShaderAtomicOrCase>		(m_context, m_operandType, "or"));
1131 	addChild(createAtomicOpGroup<ShaderAtomicXorCase>		(m_context, m_operandType, "xor"));
1132 	addChild(createAtomicOpGroup<ShaderAtomicExchangeCase>	(m_context, m_operandType, "exchange"));
1133 	addChild(createAtomicOpGroup<ShaderAtomicCompSwapCase>	(m_context, m_operandType, "compswap"));
1134 }
1135 
1136 } // Functional
1137 } // gles31
1138 } // deqp
1139