1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Synchronization Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuStringTemplate.hpp"
27 #include "tcuSurface.hpp"
28 #include "tcuRenderTarget.hpp"
29 #include "gluRenderContext.hpp"
30 #include "gluShaderProgram.hpp"
31 #include "gluObjectWrapper.hpp"
32 #include "gluPixelTransfer.hpp"
33 #include "gluContextInfo.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deStringUtil.hpp"
37 #include "deSharedPtr.hpp"
38 #include "deMemory.h"
39 #include "deRandom.hpp"
40 
41 #include <map>
42 
43 namespace deqp
44 {
45 namespace gles31
46 {
47 namespace Functional
48 {
49 namespace
50 {
51 
validateSortedAtomicRampAdditionValueChain(const std::vector<deUint32> & valueChain,deUint32 sumValue,int & invalidOperationNdx,deUint32 & errorDelta,deUint32 & errorExpected)52 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
53 {
54 	std::vector<deUint32> chainDelta(valueChain.size());
55 
56 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
57 		chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
58 
59 	// chainDelta contains now the actual additions applied to the value
60 	// check there exists an addition ramp form 1 to ...
61 	std::sort(chainDelta.begin(), chainDelta.end());
62 
63 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
64 	{
65 		if ((int)chainDelta[callNdx] != callNdx+1)
66 		{
67 			invalidOperationNdx = callNdx;
68 			errorDelta = chainDelta[callNdx];
69 			errorExpected = callNdx+1;
70 
71 			return false;
72 		}
73 	}
74 
75 	return true;
76 }
77 
readBuffer(const glw::Functions & gl,deUint32 target,int numElements,std::vector<deUint32> & result)78 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
79 {
80 	const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
81 	GLU_EXPECT_NO_ERROR(gl.getError(), "map");
82 
83 	if (!ptr)
84 		throw tcu::TestError("mapBufferRange returned NULL");
85 
86 	result.resize(numElements);
87 	memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
88 
89 	if (gl.unmapBuffer(target) == GL_FALSE)
90 		throw tcu::TestError("unmapBuffer returned false");
91 }
92 
readBufferUint32(const glw::Functions & gl,deUint32 target)93 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
94 {
95 	std::vector<deUint32> vec;
96 
97 	readBuffer(gl, target, 1, vec);
98 
99 	return vec[0];
100 }
101 
102 //! Generate a ramp of values from 1 to numElements, and shuffle it
generateShuffledRamp(int numElements,std::vector<int> & ramp)103 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
104 {
105 	de::Random rng(0xabcd);
106 
107 	// some positive (non-zero) unique values
108 	ramp.resize(numElements);
109 	for (int callNdx = 0; callNdx < numElements; ++callNdx)
110 		ramp[callNdx] = callNdx + 1;
111 
112 	rng.shuffle(ramp.begin(), ramp.end());
113 }
114 
specializeShader(Context & context,const char * code)115 static std::string specializeShader(Context& context, const char* code)
116 {
117 	const glu::GLSLVersion				glslVersion			= glu::getContextTypeGLSLVersion(context.getRenderContext().getType());
118 	std::map<std::string, std::string>	specializationMap;
119 
120 	specializationMap["GLSL_VERSION_DECL"] = glu::getGLSLVersionDeclaration(glslVersion);
121 
122 	if (glu::contextSupports(context.getRenderContext().getType(), glu::ApiType::es(3, 2)))
123 		specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "";
124 	else
125 		specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "#extension GL_OES_shader_image_atomic : require";
126 
127 	return tcu::StringTemplate(code).specialize(specializationMap);
128 }
129 
130 class InterInvocationTestCase : public TestCase
131 {
132 public:
133 	enum StorageType
134 	{
135 		STORAGE_BUFFER = 0,
136 		STORAGE_IMAGE,
137 
138 		STORAGE_LAST
139 	};
140 	enum CaseFlags
141 	{
142 		FLAG_ATOMIC				= 0x1,
143 		FLAG_ALIASING_STORAGES	= 0x2,
144 		FLAG_IN_GROUP			= 0x4,
145 	};
146 
147 						InterInvocationTestCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
148 						~InterInvocationTestCase	(void);
149 
150 private:
151 	void				init						(void);
152 	void				deinit						(void);
153 	IterateResult		iterate						(void);
154 
155 	void				runCompute					(void);
156 	bool				verifyResults				(void);
157 	virtual std::string	genShaderSource				(void) const = 0;
158 
159 protected:
160 	std::string			genBarrierSource			(void) const;
161 
162 	const StorageType	m_storage;
163 	const bool			m_useAtomic;
164 	const bool			m_aliasingStorages;
165 	const bool			m_syncWithGroup;
166 	const int			m_workWidth;				// !< total work width
167 	const int			m_workHeight;				// !<     ...    height
168 	const int			m_localWidth;				// !< group width
169 	const int			m_localHeight;				// !< group height
170 	const int			m_elementsPerInvocation;	// !< elements accessed by a single invocation
171 
172 private:
173 	glw::GLuint			m_storageBuf;
174 	glw::GLuint			m_storageTex;
175 	glw::GLuint			m_resultBuf;
176 	glu::ShaderProgram*	m_program;
177 };
178 
InterInvocationTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)179 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
180 	: TestCase					(context, name, desc)
181 	, m_storage					(storage)
182 	, m_useAtomic				((flags & FLAG_ATOMIC) != 0)
183 	, m_aliasingStorages		((flags & FLAG_ALIASING_STORAGES) != 0)
184 	, m_syncWithGroup			((flags & FLAG_IN_GROUP) != 0)
185 	, m_workWidth				(256)
186 	, m_workHeight				(256)
187 	, m_localWidth				(16)
188 	, m_localHeight				(8)
189 	, m_elementsPerInvocation	(8)
190 	, m_storageBuf				(0)
191 	, m_storageTex				(0)
192 	, m_resultBuf				(0)
193 	, m_program					(DE_NULL)
194 {
195 	DE_ASSERT(m_storage < STORAGE_LAST);
196 	DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
197 }
198 
~InterInvocationTestCase(void)199 InterInvocationTestCase::~InterInvocationTestCase (void)
200 {
201 	deinit();
202 }
203 
init(void)204 void InterInvocationTestCase::init (void)
205 {
206 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
207 	const bool				supportsES32	= glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
208 
209 	// requirements
210 
211 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
212 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
213 
214 	// program
215 
216 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
217 	m_testCtx.getLog() << *m_program;
218 	if (!m_program->isOk())
219 		throw tcu::TestError("could not build program");
220 
221 	// source
222 
223 	if (m_storage == STORAGE_BUFFER)
224 	{
225 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
226 		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
227 		std::vector<deUint32>	zeroBuffer		(bufferElements, 0);
228 
229 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
230 
231 		gl.genBuffers(1, &m_storageBuf);
232 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
233 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
234 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
235 	}
236 	else if (m_storage == STORAGE_IMAGE)
237 	{
238 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
239 		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
240 
241 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
242 
243 		gl.genTextures(1, &m_storageTex);
244 		gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
245 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
246 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
247 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
248 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
249 
250 		// Zero-fill
251 		m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
252 
253 		{
254 			const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
255 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
256 			GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
257 		}
258 	}
259 	else
260 		DE_ASSERT(DE_FALSE);
261 
262 	// destination
263 
264 	{
265 		const int				bufferElements	= m_workWidth * m_workHeight;
266 		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
267 		std::vector<deInt32>	negativeBuffer	(bufferElements, -1);
268 
269 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
270 
271 		gl.genBuffers(1, &m_resultBuf);
272 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
273 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
274 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
275 	}
276 }
277 
deinit(void)278 void InterInvocationTestCase::deinit (void)
279 {
280 	if (m_storageBuf)
281 	{
282 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
283 		m_storageBuf = DE_NULL;
284 	}
285 
286 	if (m_storageTex)
287 	{
288 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
289 		m_storageTex = DE_NULL;
290 	}
291 
292 	if (m_resultBuf)
293 	{
294 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
295 		m_resultBuf = DE_NULL;
296 	}
297 
298 	delete m_program;
299 	m_program = DE_NULL;
300 }
301 
iterate(void)302 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
303 {
304 	// Dispatch
305 	runCompute();
306 
307 	// Verify buffer contents
308 	if (verifyResults())
309 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
310 	else
311 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
312 
313 	return STOP;
314 }
315 
runCompute(void)316 void InterInvocationTestCase::runCompute (void)
317 {
318 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
319 	const int				groupsX	= m_workWidth / m_localWidth;
320 	const int				groupsY	= m_workHeight / m_localHeight;
321 
322 	DE_ASSERT((m_workWidth % m_localWidth) == 0);
323 	DE_ASSERT((m_workHeight % m_localHeight) == 0);
324 
325 	m_testCtx.getLog()
326 		<< tcu::TestLog::Message
327 		<< "Dispatching compute.\n"
328 		<< "	group size: " << m_localWidth << "x" << m_localHeight << "\n"
329 		<< "	dispatch size: " << groupsX << "x" << groupsY << "\n"
330 		<< "	total work size: " << m_workWidth << "x" << m_workHeight << "\n"
331 		<< tcu::TestLog::EndMessage;
332 
333 	gl.useProgram(m_program->getProgram());
334 
335 	// source
336 	if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
337 	{
338 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
339 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
340 	}
341 	else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
342 	{
343 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
344 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
345 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
346 
347 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
348 	}
349 	else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
350 	{
351 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
352 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
353 	}
354 	else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
355 	{
356 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
357 		gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
358 
359 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
360 
361 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
362 	}
363 	else
364 		DE_ASSERT(DE_FALSE);
365 
366 	// destination
367 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
368 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
369 
370 	// dispatch
371 	gl.dispatchCompute(groupsX, groupsY, 1);
372 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
373 }
374 
verifyResults(void)375 bool InterInvocationTestCase::verifyResults (void)
376 {
377 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
378 	const int				errorFloodThreshold	= 5;
379 	int						numErrorsLogged		= 0;
380 	const void*				mapped				= DE_NULL;
381 	std::vector<deInt32>	results				(m_workWidth * m_workHeight);
382 	bool					error				= false;
383 
384 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
385 	gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
386 	mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
387 	GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
388 
389 	// copy to properly aligned array
390 	deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
391 
392 	if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
393 		throw tcu::TestError("memory map store corrupted");
394 
395 	// check the results
396 	for (int ndx = 0; ndx < (int)results.size(); ++ndx)
397 	{
398 		if (results[ndx] != 1)
399 		{
400 			error = true;
401 
402 			if (numErrorsLogged == 0)
403 				m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
404 			if (numErrorsLogged++ < errorFloodThreshold)
405 				m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
406 			else
407 			{
408 				// after N errors, no point continuing verification
409 				m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
410 				break;
411 			}
412 		}
413 	}
414 
415 	if (!error)
416 		m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
417 	return !error;
418 }
419 
genBarrierSource(void) const420 std::string InterInvocationTestCase::genBarrierSource (void) const
421 {
422 	std::ostringstream buf;
423 
424 	if (m_syncWithGroup)
425 	{
426 		// Wait until all invocations in this work group have their texture/buffer read/write operations complete
427 		// \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
428 		//       we only require intra-workgroup synchronization.
429 		buf << "\n"
430 			<< "	groupMemoryBarrier();\n"
431 			<< "	barrier();\n"
432 			<< "\n";
433 	}
434 	else if (m_storage == STORAGE_BUFFER)
435 	{
436 		DE_ASSERT(!m_syncWithGroup);
437 
438 		// Waiting only for data written by this invocation. Since all buffer reads and writes are
439 		// processed in order (within a single invocation), we don't have to do anything.
440 		buf << "\n";
441 	}
442 	else if (m_storage == STORAGE_IMAGE)
443 	{
444 		DE_ASSERT(!m_syncWithGroup);
445 
446 		// Waiting only for data written by this invocation. But since operations complete in undefined
447 		// order, we have to wait for them to complete.
448 		buf << "\n"
449 			<< "	memoryBarrierImage();\n"
450 			<< "\n";
451 	}
452 	else
453 		DE_ASSERT(DE_FALSE);
454 
455 	return buf.str();
456 }
457 
458 class InvocationBasicCase : public InterInvocationTestCase
459 {
460 public:
461 							InvocationBasicCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
462 private:
463 	std::string				genShaderSource			(void) const;
464 	virtual std::string		genShaderMainBlock		(void) const = 0;
465 };
466 
InvocationBasicCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)467 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
468 	: InterInvocationTestCase(context, name, desc, storage, flags)
469 {
470 }
471 
genShaderSource(void) const472 std::string InvocationBasicCase::genShaderSource (void) const
473 {
474 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
475 	std::ostringstream	buf;
476 
477 	buf << "${GLSL_VERSION_DECL}\n"
478 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
479 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
480 		<< "layout(binding=0, std430) buffer Output\n"
481 		<< "{\n"
482 		<< "	highp int values[];\n"
483 		<< "} sb_result;\n";
484 
485 	if (m_storage == STORAGE_BUFFER)
486 		buf << "layout(binding=1, std430) coherent buffer Storage\n"
487 			<< "{\n"
488 			<< "	highp int values[];\n"
489 			<< "} sb_store;\n"
490 			<< "\n"
491 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
492 			<< "{\n"
493 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
494 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
495 			<< "}\n";
496 	else if (m_storage == STORAGE_IMAGE)
497 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
498 			<< "\n"
499 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
500 			<< "{\n"
501 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
502 			<< "}\n";
503 	else
504 		DE_ASSERT(DE_FALSE);
505 
506 	buf << "\n"
507 		<< "void main (void)\n"
508 		<< "{\n"
509 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
510 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
511 		<< "	bool allOk      = true;\n"
512 		<< "\n"
513 		<< genShaderMainBlock()
514 		<< "\n"
515 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
516 		<< "}\n";
517 
518 	return specializeShader(m_context, buf.str().c_str());
519 }
520 
521 class InvocationWriteReadCase : public InvocationBasicCase
522 {
523 public:
524 					InvocationWriteReadCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
525 private:
526 	std::string		genShaderMainBlock			(void) const;
527 };
528 
InvocationWriteReadCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)529 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
530 	: InvocationBasicCase(context, name, desc, storage, flags)
531 {
532 }
533 
genShaderMainBlock(void) const534 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
535 {
536 	std::ostringstream buf;
537 
538 	// write
539 
540 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
541 	{
542 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
543 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
544 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
545 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
546 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
547 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
548 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
549 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
550 		else
551 			DE_ASSERT(DE_FALSE);
552 	}
553 
554 	// barrier
555 
556 	buf << genBarrierSource();
557 
558 	// read
559 
560 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
561 	{
562 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
563 
564 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
565 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
566 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
567 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
568 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
569 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
570 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
571 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
572 		else
573 			DE_ASSERT(DE_FALSE);
574 	}
575 
576 	return buf.str();
577 }
578 
579 class InvocationReadWriteCase : public InvocationBasicCase
580 {
581 public:
582 					InvocationReadWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
583 private:
584 	std::string		genShaderMainBlock			(void) const;
585 };
586 
InvocationReadWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)587 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
588 	: InvocationBasicCase(context, name, desc, storage, flags)
589 {
590 }
591 
genShaderMainBlock(void) const592 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
593 {
594 	std::ostringstream buf;
595 
596 	// read
597 
598 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
599 	{
600 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
601 
602 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
603 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
604 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
605 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
606 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
607 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
608 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
609 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
610 		else
611 			DE_ASSERT(DE_FALSE);
612 	}
613 
614 	// barrier
615 
616 	buf << genBarrierSource();
617 
618 	// write
619 
620 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
621 	{
622 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
623 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
624 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
625 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
626 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
627 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
628 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
629 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
630 		else
631 			DE_ASSERT(DE_FALSE);
632 	}
633 
634 	return buf.str();
635 }
636 
637 class InvocationOverWriteCase : public InvocationBasicCase
638 {
639 public:
640 					InvocationOverWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
641 private:
642 	std::string		genShaderMainBlock			(void) const;
643 };
644 
InvocationOverWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)645 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
646 	: InvocationBasicCase(context, name, desc, storage, flags)
647 {
648 }
649 
genShaderMainBlock(void) const650 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
651 {
652 	std::ostringstream buf;
653 
654 	// write
655 
656 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
657 	{
658 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
659 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
660 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
661 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
662 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
663 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
664 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
665 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
666 		else
667 			DE_ASSERT(DE_FALSE);
668 	}
669 
670 	// barrier
671 
672 	buf << genBarrierSource();
673 
674 	// write over
675 
676 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
677 	{
678 		// write another invocation's value or our own value depending on test type
679 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
680 
681 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
682 			buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
683 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
684 			buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
685 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
686 			buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
687 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
688 			buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
689 		else
690 			DE_ASSERT(DE_FALSE);
691 	}
692 
693 	// barrier
694 
695 	buf << genBarrierSource();
696 
697 	// read
698 
699 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
700 	{
701 		// check another invocation's value or our own value depending on test type
702 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
703 
704 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
705 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
706 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
707 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
708 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
709 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
710 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
711 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
712 		else
713 			DE_ASSERT(DE_FALSE);
714 	}
715 
716 	return buf.str();
717 }
718 
719 class InvocationAliasWriteCase : public InterInvocationTestCase
720 {
721 public:
722 	enum TestType
723 	{
724 		TYPE_WRITE = 0,
725 		TYPE_OVERWRITE,
726 
727 		TYPE_LAST
728 	};
729 
730 					InvocationAliasWriteCase	(Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
731 private:
732 	std::string		genShaderSource				(void) const;
733 
734 	const TestType	m_type;
735 };
736 
InvocationAliasWriteCase(Context & context,const char * name,const char * desc,TestType type,StorageType storage,int flags)737 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
738 	: InterInvocationTestCase	(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
739 	, m_type					(type)
740 {
741 	DE_ASSERT(type < TYPE_LAST);
742 }
743 
genShaderSource(void) const744 std::string InvocationAliasWriteCase::genShaderSource (void) const
745 {
746 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
747 	std::ostringstream	buf;
748 
749 	buf << "${GLSL_VERSION_DECL}\n"
750 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
751 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
752 		<< "layout(binding=0, std430) buffer Output\n"
753 		<< "{\n"
754 		<< "	highp int values[];\n"
755 		<< "} sb_result;\n";
756 
757 	if (m_storage == STORAGE_BUFFER)
758 		buf << "layout(binding=1, std430) coherent buffer Storage0\n"
759 			<< "{\n"
760 			<< "	highp int values[];\n"
761 			<< "} sb_store0;\n"
762 			<< "layout(binding=2, std430) coherent buffer Storage1\n"
763 			<< "{\n"
764 			<< "	highp int values[];\n"
765 			<< "} sb_store1;\n"
766 			<< "\n"
767 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
768 			<< "{\n"
769 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
770 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
771 			<< "}\n";
772 	else if (m_storage == STORAGE_IMAGE)
773 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
774 			<< "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
775 			<< "\n"
776 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
777 			<< "{\n"
778 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
779 			<< "}\n";
780 	else
781 		DE_ASSERT(DE_FALSE);
782 
783 	buf << "\n"
784 		<< "void main (void)\n"
785 		<< "{\n"
786 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
787 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
788 		<< "	bool allOk      = true;\n"
789 		<< "\n";
790 
791 	if (m_type == TYPE_OVERWRITE)
792 	{
793 		// write
794 
795 		for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
796 		{
797 			if (m_storage == STORAGE_BUFFER && m_useAtomic)
798 				buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
799 			else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
800 				buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
801 			else if (m_storage == STORAGE_IMAGE && m_useAtomic)
802 				buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
803 			else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
804 				buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
805 			else
806 				DE_ASSERT(DE_FALSE);
807 		}
808 
809 		// barrier
810 
811 		buf << genBarrierSource();
812 	}
813 	else
814 		DE_ASSERT(m_type == TYPE_WRITE);
815 
816 	// write (again)
817 
818 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
819 	{
820 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
821 
822 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
823 			buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
824 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
825 			buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
826 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
827 			buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
828 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
829 			buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
830 		else
831 			DE_ASSERT(DE_FALSE);
832 	}
833 
834 	// barrier
835 
836 	buf << genBarrierSource();
837 
838 	// read
839 
840 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
841 	{
842 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
843 			buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
844 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
845 			buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
846 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
847 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
848 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
849 			buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
850 		else
851 			DE_ASSERT(DE_FALSE);
852 	}
853 
854 	// return result
855 
856 	buf << "\n"
857 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
858 		<< "}\n";
859 
860 	return specializeShader(m_context, buf.str().c_str());
861 }
862 
863 namespace op
864 {
865 
866 struct WriteData
867 {
868 	int targetHandle;
869 	int seed;
870 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::WriteData871 	static WriteData Generate(int targetHandle, int seed)
872 	{
873 		WriteData retVal;
874 
875 		retVal.targetHandle = targetHandle;
876 		retVal.seed = seed;
877 
878 		return retVal;
879 	}
880 };
881 
882 struct ReadData
883 {
884 	int targetHandle;
885 	int seed;
886 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadData887 	static ReadData Generate(int targetHandle, int seed)
888 	{
889 		ReadData retVal;
890 
891 		retVal.targetHandle = targetHandle;
892 		retVal.seed = seed;
893 
894 		return retVal;
895 	}
896 };
897 
898 struct Barrier
899 {
900 };
901 
902 struct WriteDataInterleaved
903 {
904 	int		targetHandle;
905 	int		seed;
906 	bool	evenOdd;
907 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::WriteDataInterleaved908 	static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
909 	{
910 		WriteDataInterleaved retVal;
911 
912 		retVal.targetHandle = targetHandle;
913 		retVal.seed = seed;
914 		retVal.evenOdd = evenOdd;
915 
916 		return retVal;
917 	}
918 };
919 
920 struct ReadDataInterleaved
921 {
922 	int targetHandle;
923 	int seed0;
924 	int seed1;
925 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadDataInterleaved926 	static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
927 	{
928 		ReadDataInterleaved retVal;
929 
930 		retVal.targetHandle = targetHandle;
931 		retVal.seed0 = seed0;
932 		retVal.seed1 = seed1;
933 
934 		return retVal;
935 	}
936 };
937 
938 struct ReadMultipleData
939 {
940 	int targetHandle0;
941 	int seed0;
942 	int targetHandle1;
943 	int seed1;
944 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadMultipleData945 	static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
946 	{
947 		ReadMultipleData retVal;
948 
949 		retVal.targetHandle0 = targetHandle0;
950 		retVal.seed0 = seed0;
951 		retVal.targetHandle1 = targetHandle1;
952 		retVal.seed1 = seed1;
953 
954 		return retVal;
955 	}
956 };
957 
958 struct ReadZeroData
959 {
960 	int targetHandle;
961 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadZeroData962 	static ReadZeroData Generate(int targetHandle)
963 	{
964 		ReadZeroData retVal;
965 
966 		retVal.targetHandle = targetHandle;
967 
968 		return retVal;
969 	}
970 };
971 
972 } // namespace op
973 
974 class InterCallTestCase;
975 
976 class InterCallOperations
977 {
978 public:
979 	InterCallOperations& operator<< (const op::WriteData&);
980 	InterCallOperations& operator<< (const op::ReadData&);
981 	InterCallOperations& operator<< (const op::Barrier&);
982 	InterCallOperations& operator<< (const op::ReadMultipleData&);
983 	InterCallOperations& operator<< (const op::WriteDataInterleaved&);
984 	InterCallOperations& operator<< (const op::ReadDataInterleaved&);
985 	InterCallOperations& operator<< (const op::ReadZeroData&);
986 
987 private:
988 	struct Command
989 	{
990 		enum CommandType
991 		{
992 			TYPE_WRITE = 0,
993 			TYPE_READ,
994 			TYPE_BARRIER,
995 			TYPE_READ_MULTIPLE,
996 			TYPE_WRITE_INTERLEAVE,
997 			TYPE_READ_INTERLEAVE,
998 			TYPE_READ_ZERO,
999 
1000 			TYPE_LAST
1001 		};
1002 
1003 		CommandType type;
1004 
1005 		union CommandUnion
1006 		{
1007 			op::WriteData				write;
1008 			op::ReadData				read;
1009 			op::Barrier					barrier;
1010 			op::ReadMultipleData		readMulti;
1011 			op::WriteDataInterleaved	writeInterleave;
1012 			op::ReadDataInterleaved		readInterleave;
1013 			op::ReadZeroData			readZero;
1014 		} u_cmd;
1015 	};
1016 
1017 	friend class InterCallTestCase;
1018 
1019 	std::vector<Command> m_cmds;
1020 };
1021 
operator <<(const op::WriteData & cmd)1022 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1023 {
1024 	m_cmds.push_back(Command());
1025 	m_cmds.back().type = Command::TYPE_WRITE;
1026 	m_cmds.back().u_cmd.write = cmd;
1027 
1028 	return *this;
1029 }
1030 
operator <<(const op::ReadData & cmd)1031 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1032 {
1033 	m_cmds.push_back(Command());
1034 	m_cmds.back().type = Command::TYPE_READ;
1035 	m_cmds.back().u_cmd.read = cmd;
1036 
1037 	return *this;
1038 }
1039 
operator <<(const op::Barrier & cmd)1040 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1041 {
1042 	m_cmds.push_back(Command());
1043 	m_cmds.back().type = Command::TYPE_BARRIER;
1044 	m_cmds.back().u_cmd.barrier = cmd;
1045 
1046 	return *this;
1047 }
1048 
operator <<(const op::ReadMultipleData & cmd)1049 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1050 {
1051 	m_cmds.push_back(Command());
1052 	m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1053 	m_cmds.back().u_cmd.readMulti = cmd;
1054 
1055 	return *this;
1056 }
1057 
operator <<(const op::WriteDataInterleaved & cmd)1058 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1059 {
1060 	m_cmds.push_back(Command());
1061 	m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1062 	m_cmds.back().u_cmd.writeInterleave = cmd;
1063 
1064 	return *this;
1065 }
1066 
operator <<(const op::ReadDataInterleaved & cmd)1067 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1068 {
1069 	m_cmds.push_back(Command());
1070 	m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1071 	m_cmds.back().u_cmd.readInterleave = cmd;
1072 
1073 	return *this;
1074 }
1075 
operator <<(const op::ReadZeroData & cmd)1076 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1077 {
1078 	m_cmds.push_back(Command());
1079 	m_cmds.back().type = Command::TYPE_READ_ZERO;
1080 	m_cmds.back().u_cmd.readZero = cmd;
1081 
1082 	return *this;
1083 }
1084 
1085 class InterCallTestCase : public TestCase
1086 {
1087 public:
1088 	enum StorageType
1089 	{
1090 		STORAGE_BUFFER = 0,
1091 		STORAGE_IMAGE,
1092 
1093 		STORAGE_LAST
1094 	};
1095 	enum Flags
1096 	{
1097 		FLAG_USE_ATOMIC	= 1,
1098 		FLAG_USE_INT	= 2,
1099 	};
1100 													InterCallTestCase			(Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1101 													~InterCallTestCase			(void);
1102 
1103 private:
1104 	void											init						(void);
1105 	void											deinit						(void);
1106 	IterateResult									iterate						(void);
1107 	bool											verifyResults				(void);
1108 
1109 	void											runCommand					(const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1110 	void											runCommand					(const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1111 	void											runCommand					(const op::Barrier&);
1112 	void											runCommand					(const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1113 	void											runCommand					(const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1114 	void											runCommand					(const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1115 	void											runCommand					(const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1116 	void											runSingleRead				(int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117 
1118 	glw::GLuint										genStorage					(int friendlyName);
1119 	glw::GLuint										genResultStorage			(void);
1120 	glu::ShaderProgram*								genWriteProgram				(int seed);
1121 	glu::ShaderProgram*								genReadProgram				(int seed);
1122 	glu::ShaderProgram*								genReadMultipleProgram		(int seed0, int seed1);
1123 	glu::ShaderProgram*								genWriteInterleavedProgram	(int seed, bool evenOdd);
1124 	glu::ShaderProgram*								genReadInterleavedProgram	(int seed0, int seed1);
1125 	glu::ShaderProgram*								genReadZeroProgram			(void);
1126 
1127 	const StorageType								m_storage;
1128 	const int										m_invocationGridSize;	// !< width and height of the two dimensional work dispatch
1129 	const int										m_perInvocationSize;	// !< number of elements accessed in single invocation
1130 	const std::vector<InterCallOperations::Command>	m_cmds;
1131 	const bool										m_useAtomic;
1132 	const bool										m_formatInteger;
1133 
1134 	std::vector<glu::ShaderProgram*>				m_operationPrograms;
1135 	std::vector<glw::GLuint>						m_operationResultStorages;
1136 	std::map<int, glw::GLuint>						m_storageIDs;
1137 };
1138 
InterCallTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags,const InterCallOperations & ops)1139 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1140 	: TestCase					(context, name, desc)
1141 	, m_storage					(storage)
1142 	, m_invocationGridSize		(512)
1143 	, m_perInvocationSize		(2)
1144 	, m_cmds					(ops.m_cmds)
1145 	, m_useAtomic				((flags & FLAG_USE_ATOMIC) != 0)
1146 	, m_formatInteger			((flags & FLAG_USE_INT) != 0)
1147 {
1148 }
1149 
~InterCallTestCase(void)1150 InterCallTestCase::~InterCallTestCase (void)
1151 {
1152 	deinit();
1153 }
1154 
init(void)1155 void InterCallTestCase::init (void)
1156 {
1157 	int			programFriendlyName = 0;
1158 	const bool	supportsES32		= glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
1159 
1160 	// requirements
1161 
1162 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1163 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1164 
1165 	// generate resources and validate command list
1166 
1167 	m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1168 	m_operationResultStorages.resize(m_cmds.size(), 0);
1169 
1170 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1171 	{
1172 		switch (m_cmds[step].type)
1173 		{
1174 			case InterCallOperations::Command::TYPE_WRITE:
1175 			{
1176 				const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1177 
1178 				// new storage handle?
1179 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1180 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1181 
1182 				// program
1183 				{
1184 					glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1185 
1186 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1187 					m_testCtx.getLog() << *program;
1188 
1189 					if (!program->isOk())
1190 						throw tcu::TestError("could not build program");
1191 
1192 					m_operationPrograms[step] = program;
1193 				}
1194 				break;
1195 			}
1196 
1197 			case InterCallOperations::Command::TYPE_READ:
1198 			{
1199 				const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1200 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1201 
1202 				// program and result storage
1203 				{
1204 					glu::ShaderProgram* program = genReadProgram(cmd.seed);
1205 
1206 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1207 					m_testCtx.getLog() << *program;
1208 
1209 					if (!program->isOk())
1210 						throw tcu::TestError("could not build program");
1211 
1212 					m_operationPrograms[step] = program;
1213 					m_operationResultStorages[step] = genResultStorage();
1214 				}
1215 				break;
1216 			}
1217 
1218 			case InterCallOperations::Command::TYPE_BARRIER:
1219 			{
1220 				break;
1221 			}
1222 
1223 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1224 			{
1225 				const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1226 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1227 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1228 
1229 				// program
1230 				{
1231 					glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1232 
1233 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1234 					m_testCtx.getLog() << *program;
1235 
1236 					if (!program->isOk())
1237 						throw tcu::TestError("could not build program");
1238 
1239 					m_operationPrograms[step] = program;
1240 					m_operationResultStorages[step] = genResultStorage();
1241 				}
1242 				break;
1243 			}
1244 
1245 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1246 			{
1247 				const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1248 
1249 				// new storage handle?
1250 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1251 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1252 
1253 				// program
1254 				{
1255 					glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1256 
1257 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1258 					m_testCtx.getLog() << *program;
1259 
1260 					if (!program->isOk())
1261 						throw tcu::TestError("could not build program");
1262 
1263 					m_operationPrograms[step] = program;
1264 				}
1265 				break;
1266 			}
1267 
1268 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1269 			{
1270 				const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1271 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1272 
1273 				// program
1274 				{
1275 					glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1276 
1277 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1278 					m_testCtx.getLog() << *program;
1279 
1280 					if (!program->isOk())
1281 						throw tcu::TestError("could not build program");
1282 
1283 					m_operationPrograms[step] = program;
1284 					m_operationResultStorages[step] = genResultStorage();
1285 				}
1286 				break;
1287 			}
1288 
1289 			case InterCallOperations::Command::TYPE_READ_ZERO:
1290 			{
1291 				const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1292 
1293 				// new storage handle?
1294 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1295 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1296 
1297 				// program
1298 				{
1299 					glu::ShaderProgram* program = genReadZeroProgram();
1300 
1301 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1302 					m_testCtx.getLog() << *program;
1303 
1304 					if (!program->isOk())
1305 						throw tcu::TestError("could not build program");
1306 
1307 					m_operationPrograms[step] = program;
1308 					m_operationResultStorages[step] = genResultStorage();
1309 				}
1310 				break;
1311 			}
1312 
1313 			default:
1314 				DE_ASSERT(DE_FALSE);
1315 		}
1316 	}
1317 }
1318 
deinit(void)1319 void InterCallTestCase::deinit (void)
1320 {
1321 	// programs
1322 	for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1323 		delete m_operationPrograms[ndx];
1324 	m_operationPrograms.clear();
1325 
1326 	// result storages
1327 	for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1328 	{
1329 		if (m_operationResultStorages[ndx])
1330 			m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1331 	}
1332 	m_operationResultStorages.clear();
1333 
1334 	// storage
1335 	for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1336 	{
1337 		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1338 
1339 		if (m_storage == STORAGE_BUFFER)
1340 			gl.deleteBuffers(1, &it->second);
1341 		else if (m_storage == STORAGE_IMAGE)
1342 			gl.deleteTextures(1, &it->second);
1343 		else
1344 			DE_ASSERT(DE_FALSE);
1345 	}
1346 	m_storageIDs.clear();
1347 }
1348 
iterate(void)1349 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1350 {
1351 	int programFriendlyName			= 0;
1352 	int resultStorageFriendlyName	= 0;
1353 
1354 	m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1355 
1356 	// run steps
1357 
1358 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1359 	{
1360 		switch (m_cmds[step].type)
1361 		{
1362 			case InterCallOperations::Command::TYPE_WRITE:				runCommand(m_cmds[step].u_cmd.write,			step,	programFriendlyName);								break;
1363 			case InterCallOperations::Command::TYPE_READ:				runCommand(m_cmds[step].u_cmd.read,				step,	programFriendlyName, resultStorageFriendlyName);	break;
1364 			case InterCallOperations::Command::TYPE_BARRIER:			runCommand(m_cmds[step].u_cmd.barrier);																		break;
1365 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:		runCommand(m_cmds[step].u_cmd.readMulti,		step,	programFriendlyName, resultStorageFriendlyName);	break;
1366 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.writeInterleave,	step,	programFriendlyName);								break;
1367 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.readInterleave,	step,	programFriendlyName, resultStorageFriendlyName);	break;
1368 			case InterCallOperations::Command::TYPE_READ_ZERO:			runCommand(m_cmds[step].u_cmd.readZero,			step,	programFriendlyName, resultStorageFriendlyName);	break;
1369 			default:
1370 				DE_ASSERT(DE_FALSE);
1371 		}
1372 	}
1373 
1374 	// read results from result buffers
1375 	if (verifyResults())
1376 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1377 	else
1378 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1379 
1380 	return STOP;
1381 }
1382 
verifyResults(void)1383 bool InterCallTestCase::verifyResults (void)
1384 {
1385 	int		resultBufferFriendlyName	= 0;
1386 	bool	allResultsOk				= true;
1387 	bool	anyResult					= false;
1388 
1389 	m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1390 
1391 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1392 	{
1393 		const int	errorFloodThreshold	= 5;
1394 		int			numErrorsLogged		= 0;
1395 
1396 		if (m_operationResultStorages[step])
1397 		{
1398 			const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1399 			const void*				mapped	= DE_NULL;
1400 			std::vector<deInt32>	results	(m_invocationGridSize * m_invocationGridSize);
1401 			bool					error	= false;
1402 
1403 			anyResult = true;
1404 
1405 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1406 			mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1407 			GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1408 
1409 			// copy to properly aligned array
1410 			deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1411 
1412 			if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1413 				throw tcu::TestError("memory map store corrupted");
1414 
1415 			// check the results
1416 			for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1417 			{
1418 				if (results[ndx] != 1)
1419 				{
1420 					error = true;
1421 
1422 					if (numErrorsLogged == 0)
1423 						m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1424 					if (numErrorsLogged++ < errorFloodThreshold)
1425 						m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1426 					else
1427 					{
1428 						// after N errors, no point continuing verification
1429 						m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1430 						break;
1431 					}
1432 				}
1433 			}
1434 
1435 			if (error)
1436 			{
1437 				allResultsOk = false;
1438 			}
1439 			else
1440 				m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1441 		}
1442 	}
1443 
1444 	DE_ASSERT(anyResult);
1445 	DE_UNREF(anyResult);
1446 
1447 	return allResultsOk;
1448 }
1449 
runCommand(const op::WriteData & cmd,int stepNdx,int & programFriendlyName)1450 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1451 {
1452 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1453 
1454 	m_testCtx.getLog()
1455 		<< tcu::TestLog::Message
1456 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1457 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1458 		<< tcu::TestLog::EndMessage;
1459 
1460 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1461 
1462 	// set destination
1463 	if (m_storage == STORAGE_BUFFER)
1464 	{
1465 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1466 
1467 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1468 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1469 	}
1470 	else if (m_storage == STORAGE_IMAGE)
1471 	{
1472 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1473 
1474 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1475 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1476 	}
1477 	else
1478 		DE_ASSERT(DE_FALSE);
1479 
1480 	// calc
1481 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1482 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1483 }
1484 
runCommand(const op::ReadData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1485 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1486 {
1487 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1488 }
1489 
runCommand(const op::Barrier & cmd)1490 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1491 {
1492 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1493 
1494 	DE_UNREF(cmd);
1495 
1496 	if (m_storage == STORAGE_BUFFER)
1497 	{
1498 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1499 		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1500 	}
1501 	else if (m_storage == STORAGE_IMAGE)
1502 	{
1503 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1504 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1505 	}
1506 	else
1507 		DE_ASSERT(DE_FALSE);
1508 }
1509 
runCommand(const op::ReadMultipleData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1510 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1511 {
1512 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1513 
1514 	m_testCtx.getLog()
1515 		<< tcu::TestLog::Message
1516 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1517 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1518 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1519 		<< tcu::TestLog::EndMessage;
1520 
1521 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1522 
1523 	// set sources
1524 	if (m_storage == STORAGE_BUFFER)
1525 	{
1526 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1527 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1528 
1529 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1530 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1531 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1532 	}
1533 	else if (m_storage == STORAGE_IMAGE)
1534 	{
1535 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1536 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1537 
1538 		gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1539 		gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1540 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1541 	}
1542 	else
1543 		DE_ASSERT(DE_FALSE);
1544 
1545 	// set destination
1546 	DE_ASSERT(m_operationResultStorages[stepNdx]);
1547 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1548 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1549 
1550 	// calc
1551 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1552 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1553 }
1554 
runCommand(const op::WriteDataInterleaved & cmd,int stepNdx,int & programFriendlyName)1555 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1556 {
1557 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1558 
1559 	m_testCtx.getLog()
1560 		<< tcu::TestLog::Message
1561 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1562 		<< "	Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1563 		<< "	Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1564 		<< tcu::TestLog::EndMessage;
1565 
1566 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1567 
1568 	// set destination
1569 	if (m_storage == STORAGE_BUFFER)
1570 	{
1571 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1572 
1573 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1574 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1575 	}
1576 	else if (m_storage == STORAGE_IMAGE)
1577 	{
1578 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1579 
1580 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1581 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1582 	}
1583 	else
1584 		DE_ASSERT(DE_FALSE);
1585 
1586 	// calc
1587 	gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1588 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1589 }
1590 
runCommand(const op::ReadDataInterleaved & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1591 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1592 {
1593 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1594 }
1595 
runCommand(const op::ReadZeroData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1596 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597 {
1598 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1599 }
1600 
runSingleRead(int targetHandle,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1601 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602 {
1603 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1604 
1605 	m_testCtx.getLog()
1606 		<< tcu::TestLog::Message
1607 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1608 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1609 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1610 		<< tcu::TestLog::EndMessage;
1611 
1612 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1613 
1614 	// set source
1615 	if (m_storage == STORAGE_BUFFER)
1616 	{
1617 		DE_ASSERT(m_storageIDs[targetHandle]);
1618 
1619 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1620 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1621 	}
1622 	else if (m_storage == STORAGE_IMAGE)
1623 	{
1624 		DE_ASSERT(m_storageIDs[targetHandle]);
1625 
1626 		gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1627 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1628 	}
1629 	else
1630 		DE_ASSERT(DE_FALSE);
1631 
1632 	// set destination
1633 	DE_ASSERT(m_operationResultStorages[stepNdx]);
1634 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1635 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1636 
1637 	// calc
1638 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1639 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1640 }
1641 
genStorage(int friendlyName)1642 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1643 {
1644 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1645 
1646 	if (m_storage == STORAGE_BUFFER)
1647 	{
1648 		const int		numElements		= m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1649 		const int		bufferSize		= numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1650 		glw::GLuint		retVal			= 0;
1651 
1652 		m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1653 
1654 		gl.genBuffers(1, &retVal);
1655 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1656 
1657 		if (m_formatInteger)
1658 		{
1659 			const std::vector<deUint32> zeroBuffer(numElements, 0);
1660 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1661 		}
1662 		else
1663 		{
1664 			const std::vector<float> zeroBuffer(numElements, 0.0f);
1665 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666 		}
1667 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1668 
1669 		return retVal;
1670 	}
1671 	else if (m_storage == STORAGE_IMAGE)
1672 	{
1673 		const int	imageWidth	= m_invocationGridSize;
1674 		const int	imageHeight	= m_invocationGridSize * m_perInvocationSize;
1675 		glw::GLuint	retVal		= 0;
1676 
1677 		m_testCtx.getLog()
1678 			<< tcu::TestLog::Message
1679 			<< "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1680 			<< ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1681 			<< ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1682 			<< tcu::TestLog::EndMessage;
1683 
1684 		gl.genTextures(1, &retVal);
1685 		gl.bindTexture(GL_TEXTURE_2D, retVal);
1686 
1687 		if (m_formatInteger)
1688 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1689 		else
1690 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1691 
1692 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1693 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1694 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1695 
1696 		m_testCtx.getLog()
1697 			<< tcu::TestLog::Message
1698 			<< "Filling image with 0"
1699 			<< tcu::TestLog::EndMessage;
1700 
1701 		if (m_formatInteger)
1702 		{
1703 			const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1704 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1705 		}
1706 		else
1707 		{
1708 			const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1709 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1710 		}
1711 
1712 		GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1713 
1714 		return retVal;
1715 	}
1716 	else
1717 	{
1718 		DE_ASSERT(DE_FALSE);
1719 		return 0;
1720 	}
1721 }
1722 
genResultStorage(void)1723 glw::GLuint InterCallTestCase::genResultStorage (void)
1724 {
1725 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1726 	glw::GLuint				retVal	= 0;
1727 
1728 	gl.genBuffers(1, &retVal);
1729 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1730 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1731 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1732 
1733 	return retVal;
1734 }
1735 
genWriteProgram(int seed)1736 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1737 {
1738 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1739 	std::ostringstream	buf;
1740 
1741 	buf << "${GLSL_VERSION_DECL}\n"
1742 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1743 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1744 
1745 	if (m_storage == STORAGE_BUFFER)
1746 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1747 			<< "{\n"
1748 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1749 			<< "} sb_out;\n";
1750 	else if (m_storage == STORAGE_IMAGE)
1751 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1752 	else
1753 		DE_ASSERT(DE_FALSE);
1754 
1755 	buf << "\n"
1756 		<< "void main (void)\n"
1757 		<< "{\n"
1758 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1759 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1760 		<< "\n";
1761 
1762 	// Write to buffer/image m_perInvocationSize elements
1763 	if (m_storage == STORAGE_BUFFER)
1764 	{
1765 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1766 		{
1767 			if (m_useAtomic)
1768 				buf << "	atomicExchange(";
1769 			else
1770 				buf << "	";
1771 
1772 			buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1773 
1774 			if (m_useAtomic)
1775 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1776 			else
1777 				buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1778 		}
1779 	}
1780 	else if (m_storage == STORAGE_IMAGE)
1781 	{
1782 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1783 		{
1784 			if (m_useAtomic)
1785 				buf << "	imageAtomicExchange";
1786 			else
1787 				buf << "	imageStore";
1788 
1789 			buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1790 
1791 			if (m_useAtomic)
1792 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1793 			else
1794 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1795 		}
1796 	}
1797 	else
1798 		DE_ASSERT(DE_FALSE);
1799 
1800 	buf << "}\n";
1801 
1802 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1803 }
1804 
genReadProgram(int seed)1805 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1806 {
1807 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1808 	std::ostringstream	buf;
1809 
1810 	buf << "${GLSL_VERSION_DECL}\n"
1811 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1812 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1813 
1814 	if (m_storage == STORAGE_BUFFER)
1815 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1816 			<< "{\n"
1817 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1818 			<< "} sb_in;\n";
1819 	else if (m_storage == STORAGE_IMAGE)
1820 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1821 	else
1822 		DE_ASSERT(DE_FALSE);
1823 
1824 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1825 		<< "{\n"
1826 		<< "	highp int resultOk[];\n"
1827 		<< "} sb_result;\n"
1828 		<< "\n"
1829 		<< "void main (void)\n"
1830 		<< "{\n"
1831 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1832 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1833 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1834 		<< "	bool allOk = true;\n"
1835 		<< "\n";
1836 
1837 	// Verify data
1838 
1839 	if (m_storage == STORAGE_BUFFER)
1840 	{
1841 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1842 		{
1843 			if (!m_useAtomic)
1844 				buf << "	allOk = allOk && (sb_in.values[(groupNdx + "
1845 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1846 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1847 			else
1848 				buf << "	allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1849 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1850 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1851 		}
1852 	}
1853 	else if (m_storage == STORAGE_IMAGE)
1854 	{
1855 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1856 		{
1857 			if (!m_useAtomic)
1858 				buf	<< "	allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1859 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1860 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1861 			else
1862 				buf << "	allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1863 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1864 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1865 		}
1866 	}
1867 	else
1868 		DE_ASSERT(DE_FALSE);
1869 
1870 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1871 		<< "}\n";
1872 
1873 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1874 }
1875 
genReadMultipleProgram(int seed0,int seed1)1876 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1877 {
1878 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1879 	std::ostringstream	buf;
1880 
1881 	buf << "${GLSL_VERSION_DECL}\n"
1882 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1883 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1884 
1885 	if (m_storage == STORAGE_BUFFER)
1886 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1887 			<< "{\n"
1888 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1889 			<< "} sb_in0;\n"
1890 			<< "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1891 			<< "{\n"
1892 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1893 			<< "} sb_in1;\n";
1894 	else if (m_storage == STORAGE_IMAGE)
1895 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1896 			<< "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1897 	else
1898 		DE_ASSERT(DE_FALSE);
1899 
1900 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1901 		<< "{\n"
1902 		<< "	highp int resultOk[];\n"
1903 		<< "} sb_result;\n"
1904 		<< "\n"
1905 		<< "void main (void)\n"
1906 		<< "{\n"
1907 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1908 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1909 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1910 		<< "	bool allOk = true;\n"
1911 		<< "\n";
1912 
1913 	// Verify data
1914 
1915 	if (m_storage == STORAGE_BUFFER)
1916 	{
1917 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1918 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1919 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1920 	}
1921 	else if (m_storage == STORAGE_IMAGE)
1922 	{
1923 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1924 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1925 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1926 	}
1927 	else
1928 		DE_ASSERT(DE_FALSE);
1929 
1930 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1931 		<< "}\n";
1932 
1933 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1934 }
1935 
genWriteInterleavedProgram(int seed,bool evenOdd)1936 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1937 {
1938 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1939 	std::ostringstream	buf;
1940 
1941 	buf << "${GLSL_VERSION_DECL}\n"
1942 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1943 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1944 
1945 	if (m_storage == STORAGE_BUFFER)
1946 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1947 			<< "{\n"
1948 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1949 			<< "} sb_out;\n";
1950 	else if (m_storage == STORAGE_IMAGE)
1951 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1952 	else
1953 		DE_ASSERT(DE_FALSE);
1954 
1955 	buf << "\n"
1956 		<< "void main (void)\n"
1957 		<< "{\n"
1958 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1959 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1960 		<< "\n";
1961 
1962 	// Write to buffer/image m_perInvocationSize elements
1963 	if (m_storage == STORAGE_BUFFER)
1964 	{
1965 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1966 		{
1967 			if (m_useAtomic)
1968 				buf << "	atomicExchange(";
1969 			else
1970 				buf << "	";
1971 
1972 			buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1973 
1974 			if (m_useAtomic)
1975 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1976 			else
1977 				buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1978 		}
1979 	}
1980 	else if (m_storage == STORAGE_IMAGE)
1981 	{
1982 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1983 		{
1984 			if (m_useAtomic)
1985 				buf << "	imageAtomicExchange";
1986 			else
1987 				buf << "	imageStore";
1988 
1989 			buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1990 
1991 			if (m_useAtomic)
1992 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1993 			else
1994 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1995 		}
1996 	}
1997 	else
1998 		DE_ASSERT(DE_FALSE);
1999 
2000 	buf << "}\n";
2001 
2002 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2003 }
2004 
genReadInterleavedProgram(int seed0,int seed1)2005 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2006 {
2007 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2008 	std::ostringstream	buf;
2009 
2010 	buf << "${GLSL_VERSION_DECL}\n"
2011 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2012 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2013 
2014 	if (m_storage == STORAGE_BUFFER)
2015 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2016 			<< "{\n"
2017 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2018 			<< "} sb_in;\n";
2019 	else if (m_storage == STORAGE_IMAGE)
2020 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2021 	else
2022 		DE_ASSERT(DE_FALSE);
2023 
2024 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2025 		<< "{\n"
2026 		<< "	highp int resultOk[];\n"
2027 		<< "} sb_result;\n"
2028 		<< "\n"
2029 		<< "void main (void)\n"
2030 		<< "{\n"
2031 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2032 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2033 		<< "	int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2034 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2035 		<< "	bool allOk = true;\n"
2036 		<< "\n";
2037 
2038 	// Verify data
2039 
2040 	if (m_storage == STORAGE_BUFFER)
2041 	{
2042 		buf << "	if (groupNdx % 2 == 0)\n"
2043 			<< "	{\n";
2044 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2045 			buf << "		allOk = allOk && ("
2046 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2047 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2048 		buf << "	}\n"
2049 			<< "	else\n"
2050 			<< "	{\n";
2051 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2052 			buf << "		allOk = allOk && ("
2053 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2054 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2055 		buf << "	}\n";
2056 	}
2057 	else if (m_storage == STORAGE_IMAGE)
2058 	{
2059 		buf << "	if (groupNdx % 2 == 0)\n"
2060 			<< "	{\n";
2061 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2062 			buf << "		allOk = allOk && ("
2063 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2064 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2065 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2066 		buf << "	}\n"
2067 			<< "	else\n"
2068 			<< "	{\n";
2069 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2070 			buf << "		allOk = allOk && ("
2071 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2072 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2073 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2074 		buf << "	}\n";
2075 	}
2076 	else
2077 		DE_ASSERT(DE_FALSE);
2078 
2079 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2080 		<< "}\n";
2081 
2082 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2083 }
2084 
genReadZeroProgram(void)2085 glu::ShaderProgram*	InterCallTestCase::genReadZeroProgram (void)
2086 {
2087 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2088 	std::ostringstream	buf;
2089 
2090 	buf << "${GLSL_VERSION_DECL}\n"
2091 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2092 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2093 
2094 	if (m_storage == STORAGE_BUFFER)
2095 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2096 			<< "{\n"
2097 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2098 			<< "} sb_in;\n";
2099 	else if (m_storage == STORAGE_IMAGE)
2100 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2101 	else
2102 		DE_ASSERT(DE_FALSE);
2103 
2104 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2105 		<< "{\n"
2106 		<< "	highp int resultOk[];\n"
2107 		<< "} sb_result;\n"
2108 		<< "\n"
2109 		<< "void main (void)\n"
2110 		<< "{\n"
2111 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2112 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2113 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2114 		<< "	bool allOk = true;\n"
2115 		<< "\n";
2116 
2117 	// Verify data
2118 
2119 	if (m_storage == STORAGE_BUFFER)
2120 	{
2121 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2122 			buf << "	allOk = allOk && ("
2123 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2124 				<< ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2125 	}
2126 	else if (m_storage == STORAGE_IMAGE)
2127 	{
2128 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2129 			buf << "	allOk = allOk && ("
2130 			<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2131 			<< ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2132 	}
2133 	else
2134 		DE_ASSERT(DE_FALSE);
2135 
2136 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2137 		<< "}\n";
2138 
2139 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2140 }
2141 
2142 class SSBOConcurrentAtomicCase : public TestCase
2143 {
2144 public:
2145 
2146 							SSBOConcurrentAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2147 							~SSBOConcurrentAtomicCase	(void);
2148 
2149 	void					init						(void);
2150 	void					deinit						(void);
2151 	IterateResult			iterate						(void);
2152 
2153 private:
2154 	std::string				genComputeSource			(void) const;
2155 
2156 	const int				m_numCalls;
2157 	const int				m_workSize;
2158 	glu::ShaderProgram*		m_program;
2159 	deUint32				m_bufferID;
2160 	std::vector<deUint32>	m_intermediateResultBuffers;
2161 };
2162 
SSBOConcurrentAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2163 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2164 	: TestCase		(context, name, description)
2165 	, m_numCalls	(numCalls)
2166 	, m_workSize	(workSize)
2167 	, m_program		(DE_NULL)
2168 	, m_bufferID	(DE_NULL)
2169 {
2170 }
2171 
~SSBOConcurrentAtomicCase(void)2172 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2173 {
2174 	deinit();
2175 }
2176 
init(void)2177 void SSBOConcurrentAtomicCase::init (void)
2178 {
2179 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2180 	std::vector<deUint32>	zeroData			(m_workSize, 0);
2181 
2182 	// gen buffers
2183 
2184 	gl.genBuffers(1, &m_bufferID);
2185 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2186 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2187 
2188 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2189 	{
2190 		deUint32 buffer = 0;
2191 
2192 		gl.genBuffers(1, &buffer);
2193 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2194 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2195 
2196 		m_intermediateResultBuffers.push_back(buffer);
2197 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2198 	}
2199 
2200 	// gen program
2201 
2202 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2203 	m_testCtx.getLog() << *m_program;
2204 	if (!m_program->isOk())
2205 		throw tcu::TestError("could not build program");
2206 }
2207 
deinit(void)2208 void SSBOConcurrentAtomicCase::deinit (void)
2209 {
2210 	if (m_bufferID)
2211 	{
2212 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2213 		m_bufferID = 0;
2214 	}
2215 
2216 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2217 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2218 	m_intermediateResultBuffers.clear();
2219 
2220 	delete m_program;
2221 	m_program = DE_NULL;
2222 }
2223 
iterate(void)2224 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2225 {
2226 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2227 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2228 	std::vector<int>		deltas;
2229 
2230 	// generate unique deltas
2231 	generateShuffledRamp(m_numCalls, deltas);
2232 
2233 	// invoke program N times, each with a different delta
2234 	{
2235 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2236 
2237 		m_testCtx.getLog()
2238 			<< tcu::TestLog::Message
2239 			<< "Running shader " << m_numCalls << " times.\n"
2240 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2241 			<< "Setting u_atomicDelta to a unique value for each call.\n"
2242 			<< tcu::TestLog::EndMessage;
2243 
2244 		if (deltaLocation == -1)
2245 			throw tcu::TestError("u_atomicDelta location was -1");
2246 
2247 		gl.useProgram(m_program->getProgram());
2248 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2249 
2250 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2251 		{
2252 			m_testCtx.getLog()
2253 				<< tcu::TestLog::Message
2254 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2255 				<< tcu::TestLog::EndMessage;
2256 
2257 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2258 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2259 			gl.dispatchCompute(m_workSize, 1, 1);
2260 		}
2261 
2262 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2263 	}
2264 
2265 	// Verify result
2266 	{
2267 		std::vector<deUint32> result;
2268 
2269 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2270 
2271 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2272 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2273 
2274 		for (int ndx = 0; ndx < m_workSize; ++ndx)
2275 		{
2276 			if (result[ndx] != sumValue)
2277 			{
2278 				m_testCtx.getLog()
2279 					<< tcu::TestLog::Message
2280 					<< "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2281 					<< "Work buffer contains invalid values."
2282 					<< tcu::TestLog::EndMessage;
2283 
2284 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2285 				return STOP;
2286 			}
2287 		}
2288 
2289 		m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2290 	}
2291 
2292 	// verify steps
2293 	{
2294 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2295 		std::vector<deUint32>				valueChain			(m_numCalls);
2296 
2297 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2298 
2299 		// collect results
2300 
2301 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2302 		{
2303 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2304 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2305 		}
2306 
2307 		// verify values
2308 
2309 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2310 		{
2311 			int			invalidOperationNdx;
2312 			deUint32	errorDelta;
2313 			deUint32	errorExpected;
2314 
2315 			// collect result chain for each element
2316 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2317 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2318 
2319 			// check there exists a path from 0 to sumValue using each addition once
2320 			// decompose cumulative results to addition operations (all additions positive => this works)
2321 
2322 			std::sort(valueChain.begin(), valueChain.end());
2323 
2324 			// validate chain
2325 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2326 			{
2327 				m_testCtx.getLog()
2328 					<< tcu::TestLog::Message
2329 					<< "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2330 					<< "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2331 					<< tcu::TestLog::EndMessage;
2332 
2333 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2334 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2335 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2336 
2337 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2338 				return STOP;
2339 			}
2340 		}
2341 
2342 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2343 	}
2344 
2345 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2346 	return STOP;
2347 }
2348 
genComputeSource(void) const2349 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2350 {
2351 	std::ostringstream buf;
2352 
2353 	buf	<< "${GLSL_VERSION_DECL}\n"
2354 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2355 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2356 		<< "{\n"
2357 		<< "	highp uint values[" << m_workSize << "];\n"
2358 		<< "} sb_ires;\n"
2359 		<< "\n"
2360 		<< "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2361 		<< "{\n"
2362 		<< "	highp uint values[" << m_workSize << "];\n"
2363 		<< "} sb_work;\n"
2364 		<< "uniform highp uint u_atomicDelta;\n"
2365 		<< "\n"
2366 		<< "void main ()\n"
2367 		<< "{\n"
2368 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2369 		<< "	sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2370 		<< "}";
2371 
2372 	return specializeShader(m_context, buf.str().c_str());
2373 }
2374 
2375 class ConcurrentAtomicCounterCase : public TestCase
2376 {
2377 public:
2378 
2379 							ConcurrentAtomicCounterCase		(Context& context, const char* name, const char* description, int numCalls, int workSize);
2380 							~ConcurrentAtomicCounterCase	(void);
2381 
2382 	void					init							(void);
2383 	void					deinit							(void);
2384 	IterateResult			iterate							(void);
2385 
2386 private:
2387 	std::string				genComputeSource				(bool evenOdd) const;
2388 
2389 	const int				m_numCalls;
2390 	const int				m_workSize;
2391 	glu::ShaderProgram*		m_evenProgram;
2392 	glu::ShaderProgram*		m_oddProgram;
2393 	deUint32				m_counterBuffer;
2394 	deUint32				m_intermediateResultBuffer;
2395 };
2396 
ConcurrentAtomicCounterCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2397 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2398 	: TestCase					(context, name, description)
2399 	, m_numCalls				(numCalls)
2400 	, m_workSize				(workSize)
2401 	, m_evenProgram				(DE_NULL)
2402 	, m_oddProgram				(DE_NULL)
2403 	, m_counterBuffer			(DE_NULL)
2404 	, m_intermediateResultBuffer(DE_NULL)
2405 {
2406 }
2407 
~ConcurrentAtomicCounterCase(void)2408 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2409 {
2410 	deinit();
2411 }
2412 
init(void)2413 void ConcurrentAtomicCounterCase::init (void)
2414 {
2415 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
2416 	const std::vector<deUint32>	zeroData	(m_numCalls * m_workSize, 0);
2417 
2418 	// gen buffer
2419 
2420 	gl.genBuffers(1, &m_counterBuffer);
2421 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2422 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2423 
2424 	gl.genBuffers(1, &m_intermediateResultBuffer);
2425 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2426 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2427 
2428 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2429 
2430 	// gen programs
2431 
2432 	{
2433 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2434 
2435 		m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2436 		m_testCtx.getLog() << *m_evenProgram;
2437 		if (!m_evenProgram->isOk())
2438 			throw tcu::TestError("could not build program");
2439 	}
2440 	{
2441 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2442 
2443 		m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2444 		m_testCtx.getLog() << *m_oddProgram;
2445 		if (!m_oddProgram->isOk())
2446 			throw tcu::TestError("could not build program");
2447 	}
2448 }
2449 
deinit(void)2450 void ConcurrentAtomicCounterCase::deinit (void)
2451 {
2452 	if (m_counterBuffer)
2453 	{
2454 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2455 		m_counterBuffer = 0;
2456 	}
2457 	if (m_intermediateResultBuffer)
2458 	{
2459 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2460 		m_intermediateResultBuffer = 0;
2461 	}
2462 
2463 	delete m_evenProgram;
2464 	m_evenProgram = DE_NULL;
2465 
2466 	delete m_oddProgram;
2467 	m_oddProgram = DE_NULL;
2468 }
2469 
iterate(void)2470 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2471 {
2472 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2473 
2474 	// invoke program N times, each with a different delta
2475 	{
2476 		const int evenCallNdxLocation	= gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2477 		const int oddCallNdxLocation	= gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2478 
2479 		m_testCtx.getLog()
2480 			<< tcu::TestLog::Message
2481 			<< "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2482 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2483 			<< tcu::TestLog::EndMessage;
2484 
2485 		if (evenCallNdxLocation == -1)
2486 			throw tcu::TestError("u_callNdx location was -1");
2487 		if (oddCallNdxLocation == -1)
2488 			throw tcu::TestError("u_callNdx location was -1");
2489 
2490 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2491 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
2492 
2493 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2494 		{
2495 			gl.useProgram(m_evenProgram->getProgram());
2496 			gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2497 			gl.dispatchCompute(m_workSize, 1, 1);
2498 
2499 			gl.useProgram(m_oddProgram->getProgram());
2500 			gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2501 			gl.dispatchCompute(m_workSize, 1, 1);
2502 		}
2503 
2504 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2505 	}
2506 
2507 	// Verify result
2508 	{
2509 		deUint32 result;
2510 
2511 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2512 
2513 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2514 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2515 
2516 		if ((int)result != m_numCalls*m_workSize)
2517 		{
2518 			m_testCtx.getLog()
2519 				<< tcu::TestLog::Message
2520 				<< "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2521 				<< tcu::TestLog::EndMessage;
2522 
2523 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2524 			return STOP;
2525 		}
2526 
2527 		m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2528 	}
2529 
2530 	// verify steps
2531 	{
2532 		std::vector<deUint32> intermediateResults;
2533 
2534 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2535 
2536 		// collect results
2537 
2538 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2539 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2540 
2541 		// verify values
2542 
2543 		std::sort(intermediateResults.begin(), intermediateResults.end());
2544 
2545 		for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2546 		{
2547 			if ((int)intermediateResults[valueNdx] != valueNdx)
2548 			{
2549 				m_testCtx.getLog()
2550 					<< tcu::TestLog::Message
2551 					<< "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2552 					<< "Intermediate buffer contains invalid values. Intermediate results:\n"
2553 					<< tcu::TestLog::EndMessage;
2554 
2555 				for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2556 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2557 
2558 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2559 				return STOP;
2560 			}
2561 		}
2562 
2563 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2564 	}
2565 
2566 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2567 	return STOP;
2568 }
2569 
genComputeSource(bool evenOdd) const2570 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2571 {
2572 	std::ostringstream buf;
2573 
2574 	buf	<< "${GLSL_VERSION_DECL}\n"
2575 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2576 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2577 		<< "{\n"
2578 		<< "	highp uint values[" << m_workSize * m_numCalls << "];\n"
2579 		<< "} sb_ires;\n"
2580 		<< "\n"
2581 		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
2582 		<< "uniform highp uint u_callNdx;\n"
2583 		<< "\n"
2584 		<< "void main ()\n"
2585 		<< "{\n"
2586 		<< "	highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2587 		<< "	if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2588 		<< "		sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2589 		<< "}";
2590 
2591 	return specializeShader(m_context, buf.str().c_str());
2592 }
2593 
2594 class ConcurrentImageAtomicCase : public TestCase
2595 {
2596 public:
2597 
2598 							ConcurrentImageAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2599 							~ConcurrentImageAtomicCase	(void);
2600 
2601 	void					init						(void);
2602 	void					deinit						(void);
2603 	IterateResult			iterate						(void);
2604 
2605 private:
2606 	void					readWorkImage				(std::vector<deUint32>& result);
2607 
2608 	std::string				genComputeSource			(void) const;
2609 	std::string				genImageReadSource			(void) const;
2610 	std::string				genImageClearSource			(void) const;
2611 
2612 	const int				m_numCalls;
2613 	const int				m_workSize;
2614 	glu::ShaderProgram*		m_program;
2615 	glu::ShaderProgram*		m_imageReadProgram;
2616 	glu::ShaderProgram*		m_imageClearProgram;
2617 	deUint32				m_imageID;
2618 	std::vector<deUint32>	m_intermediateResultBuffers;
2619 };
2620 
ConcurrentImageAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2621 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2622 	: TestCase				(context, name, description)
2623 	, m_numCalls			(numCalls)
2624 	, m_workSize			(workSize)
2625 	, m_program				(DE_NULL)
2626 	, m_imageReadProgram	(DE_NULL)
2627 	, m_imageClearProgram	(DE_NULL)
2628 	, m_imageID				(DE_NULL)
2629 {
2630 }
2631 
~ConcurrentImageAtomicCase(void)2632 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2633 {
2634 	deinit();
2635 }
2636 
init(void)2637 void ConcurrentImageAtomicCase::init (void)
2638 {
2639 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2640 	std::vector<deUint32>	zeroData			(m_workSize * m_workSize, 0);
2641 	const bool				supportsES32		= glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
2642 
2643 	if (!supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
2644 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2645 
2646 	// gen image
2647 
2648 	gl.genTextures(1, &m_imageID);
2649 	gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2650 	gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2651 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2652 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2653 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2654 
2655 	// gen buffers
2656 
2657 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2658 	{
2659 		deUint32 buffer = 0;
2660 
2661 		gl.genBuffers(1, &buffer);
2662 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2663 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2664 
2665 		m_intermediateResultBuffers.push_back(buffer);
2666 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2667 	}
2668 
2669 	// gen programs
2670 
2671 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2672 	m_testCtx.getLog() << *m_program;
2673 	if (!m_program->isOk())
2674 		throw tcu::TestError("could not build program");
2675 
2676 	m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2677 	if (!m_imageReadProgram->isOk())
2678 	{
2679 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2680 
2681 		m_testCtx.getLog() << *m_imageReadProgram;
2682 		throw tcu::TestError("could not build program");
2683 	}
2684 
2685 	m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2686 	if (!m_imageClearProgram->isOk())
2687 	{
2688 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2689 
2690 		m_testCtx.getLog() << *m_imageClearProgram;
2691 		throw tcu::TestError("could not build program");
2692 	}
2693 }
2694 
deinit(void)2695 void ConcurrentImageAtomicCase::deinit (void)
2696 {
2697 	if (m_imageID)
2698 	{
2699 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2700 		m_imageID = 0;
2701 	}
2702 
2703 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2704 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2705 	m_intermediateResultBuffers.clear();
2706 
2707 	delete m_program;
2708 	m_program = DE_NULL;
2709 
2710 	delete m_imageReadProgram;
2711 	m_imageReadProgram = DE_NULL;
2712 
2713 	delete m_imageClearProgram;
2714 	m_imageClearProgram = DE_NULL;
2715 }
2716 
iterate(void)2717 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2718 {
2719 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2720 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2721 	std::vector<int>		deltas;
2722 
2723 	// generate unique deltas
2724 	generateShuffledRamp(m_numCalls, deltas);
2725 
2726 	// clear image
2727 	{
2728 		m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2729 
2730 		gl.useProgram(m_imageClearProgram->getProgram());
2731 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2732 		gl.dispatchCompute(m_workSize, m_workSize, 1);
2733 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2734 
2735 		GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2736 	}
2737 
2738 	// invoke program N times, each with a different delta
2739 	{
2740 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2741 
2742 		m_testCtx.getLog()
2743 			<< tcu::TestLog::Message
2744 			<< "Running shader " << m_numCalls << " times.\n"
2745 			<< "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2746 			<< "Setting u_atomicDelta to a unique value for each call.\n"
2747 			<< tcu::TestLog::EndMessage;
2748 
2749 		if (deltaLocation == -1)
2750 			throw tcu::TestError("u_atomicDelta location was -1");
2751 
2752 		gl.useProgram(m_program->getProgram());
2753 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2754 
2755 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2756 		{
2757 			m_testCtx.getLog()
2758 				<< tcu::TestLog::Message
2759 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2760 				<< tcu::TestLog::EndMessage;
2761 
2762 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2763 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2764 			gl.dispatchCompute(m_workSize, m_workSize, 1);
2765 		}
2766 
2767 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2768 	}
2769 
2770 	// Verify result
2771 	{
2772 		std::vector<deUint32> result;
2773 
2774 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2775 
2776 		readWorkImage(result);
2777 
2778 		for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2779 		{
2780 			if (result[ndx] != sumValue)
2781 			{
2782 				m_testCtx.getLog()
2783 					<< tcu::TestLog::Message
2784 					<< "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2785 					<< "Work image contains invalid values."
2786 					<< tcu::TestLog::EndMessage;
2787 
2788 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2789 				return STOP;
2790 			}
2791 		}
2792 
2793 		m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2794 	}
2795 
2796 	// verify steps
2797 	{
2798 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2799 		std::vector<deUint32>				valueChain			(m_numCalls);
2800 		std::vector<deUint32>				chainDelta			(m_numCalls);
2801 
2802 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2803 
2804 		// collect results
2805 
2806 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2807 		{
2808 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2809 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2810 		}
2811 
2812 		// verify values
2813 
2814 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2815 		{
2816 			int			invalidOperationNdx;
2817 			deUint32	errorDelta;
2818 			deUint32	errorExpected;
2819 
2820 			// collect result chain for each element
2821 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2822 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2823 
2824 			// check there exists a path from 0 to sumValue using each addition once
2825 			// decompose cumulative results to addition operations (all additions positive => this works)
2826 
2827 			std::sort(valueChain.begin(), valueChain.end());
2828 
2829 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2830 				chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2831 
2832 			// chainDelta contains now the actual additions applied to the value
2833 			std::sort(chainDelta.begin(), chainDelta.end());
2834 
2835 			// validate chain
2836 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2837 			{
2838 				m_testCtx.getLog()
2839 					<< tcu::TestLog::Message
2840 					<< "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2841 					<< invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2842 					<< "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2843 					<< tcu::TestLog::EndMessage;
2844 
2845 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2846 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2847 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2848 
2849 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2850 				return STOP;
2851 			}
2852 		}
2853 
2854 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2855 	}
2856 
2857 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2858 	return STOP;
2859 }
2860 
readWorkImage(std::vector<deUint32> & result)2861 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2862 {
2863 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2864 	glu::Buffer				resultBuffer	(m_context.getRenderContext());
2865 
2866 	// Read image to an ssbo
2867 
2868 	{
2869 		const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2870 
2871 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2872 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2873 
2874 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2875 		gl.useProgram(m_imageReadProgram->getProgram());
2876 
2877 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2878 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2879 		gl.dispatchCompute(m_workSize, m_workSize, 1);
2880 
2881 		GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2882 	}
2883 
2884 	// Read ssbo
2885 	{
2886 		const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2887 		GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2888 
2889 		if (!ptr)
2890 			throw tcu::TestError("mapBufferRange returned NULL");
2891 
2892 		result.resize(m_workSize * m_workSize);
2893 		memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2894 
2895 		if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2896 			throw tcu::TestError("unmapBuffer returned false");
2897 	}
2898 }
2899 
genComputeSource(void) const2900 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2901 {
2902 	std::ostringstream buf;
2903 
2904 	buf	<< "${GLSL_VERSION_DECL}\n"
2905 		<< "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2906 		<< "\n"
2907 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2908 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2909 		<< "{\n"
2910 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2911 		<< "} sb_ires;\n"
2912 		<< "\n"
2913 		<< "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2914 		<< "uniform highp uint u_atomicDelta;\n"
2915 		<< "\n"
2916 		<< "void main ()\n"
2917 		<< "{\n"
2918 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2919 		<< "	sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2920 		<< "}";
2921 
2922 	return specializeShader(m_context, buf.str().c_str());
2923 }
2924 
genImageReadSource(void) const2925 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2926 {
2927 	std::ostringstream buf;
2928 
2929 	buf	<< "${GLSL_VERSION_DECL}\n"
2930 		<< "\n"
2931 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2932 		<< "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2933 		<< "{\n"
2934 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2935 		<< "} sb_res;\n"
2936 		<< "\n"
2937 		<< "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2938 		<< "\n"
2939 		<< "void main ()\n"
2940 		<< "{\n"
2941 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2942 		<< "	sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2943 		<< "}";
2944 
2945 	return specializeShader(m_context, buf.str().c_str());
2946 }
2947 
genImageClearSource(void) const2948 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2949 {
2950 	std::ostringstream buf;
2951 
2952 	buf	<< "${GLSL_VERSION_DECL}\n"
2953 		<< "\n"
2954 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2955 		<< "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2956 		<< "\n"
2957 		<< "void main ()\n"
2958 		<< "{\n"
2959 		<< "	imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2960 		<< "}";
2961 
2962 	return specializeShader(m_context, buf.str().c_str());
2963 }
2964 
2965 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2966 {
2967 public:
2968 							ConcurrentSSBOAtomicCounterMixedCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2969 							~ConcurrentSSBOAtomicCounterMixedCase	(void);
2970 
2971 	void					init									(void);
2972 	void					deinit									(void);
2973 	IterateResult			iterate									(void);
2974 
2975 private:
2976 	std::string				genSSBOComputeSource					(void) const;
2977 	std::string				genAtomicCounterComputeSource			(void) const;
2978 
2979 	const int				m_numCalls;
2980 	const int				m_workSize;
2981 	deUint32				m_bufferID;
2982 	glu::ShaderProgram*		m_ssboAtomicProgram;
2983 	glu::ShaderProgram*		m_atomicCounterProgram;
2984 };
2985 
ConcurrentSSBOAtomicCounterMixedCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2986 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2987 	: TestCase					(context, name, description)
2988 	, m_numCalls				(numCalls)
2989 	, m_workSize				(workSize)
2990 	, m_bufferID				(DE_NULL)
2991 	, m_ssboAtomicProgram		(DE_NULL)
2992 	, m_atomicCounterProgram	(DE_NULL)
2993 {
2994 	// SSBO atomic XORs cancel out
2995 	DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
2996 }
2997 
~ConcurrentSSBOAtomicCounterMixedCase(void)2998 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
2999 {
3000 	deinit();
3001 }
3002 
init(void)3003 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3004 {
3005 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
3006 	const deUint32				zeroBuf[2]	= { 0, 0 };
3007 
3008 	// gen buffer
3009 
3010 	gl.genBuffers(1, &m_bufferID);
3011 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3012 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3013 
3014 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3015 
3016 	// gen programs
3017 
3018 	{
3019 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3020 
3021 		m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3022 		m_testCtx.getLog() << *m_ssboAtomicProgram;
3023 		if (!m_ssboAtomicProgram->isOk())
3024 			throw tcu::TestError("could not build program");
3025 	}
3026 	{
3027 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3028 
3029 		m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3030 		m_testCtx.getLog() << *m_atomicCounterProgram;
3031 		if (!m_atomicCounterProgram->isOk())
3032 			throw tcu::TestError("could not build program");
3033 	}
3034 }
3035 
deinit(void)3036 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3037 {
3038 	if (m_bufferID)
3039 	{
3040 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3041 		m_bufferID = 0;
3042 	}
3043 
3044 	delete m_ssboAtomicProgram;
3045 	m_ssboAtomicProgram = DE_NULL;
3046 
3047 	delete m_atomicCounterProgram;
3048 	m_atomicCounterProgram = DE_NULL;
3049 }
3050 
iterate(void)3051 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3052 {
3053 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3054 
3055 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3056 
3057 	// invoke programs N times
3058 	{
3059 		m_testCtx.getLog()
3060 			<< tcu::TestLog::Message
3061 			<< "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3062 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
3063 			<< tcu::TestLog::EndMessage;
3064 
3065 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3066 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
3067 
3068 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3069 		{
3070 			gl.useProgram(m_atomicCounterProgram->getProgram());
3071 			gl.dispatchCompute(m_workSize, 1, 1);
3072 
3073 			gl.useProgram(m_ssboAtomicProgram->getProgram());
3074 			gl.dispatchCompute(m_workSize, 1, 1);
3075 		}
3076 
3077 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3078 	}
3079 
3080 	// Verify result
3081 	{
3082 		deUint32 result;
3083 
3084 		// XORs cancel out, only addition is left
3085 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3086 
3087 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3088 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3089 
3090 		if ((int)result != m_numCalls*m_workSize)
3091 		{
3092 			m_testCtx.getLog()
3093 				<< tcu::TestLog::Message
3094 				<< "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3095 				<< tcu::TestLog::EndMessage;
3096 
3097 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3098 			return STOP;
3099 		}
3100 
3101 		m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3102 	}
3103 
3104 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3105 	return STOP;
3106 }
3107 
genSSBOComputeSource(void) const3108 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3109 {
3110 	std::ostringstream buf;
3111 
3112 	buf	<< "${GLSL_VERSION_DECL}\n"
3113 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3114 		<< "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3115 		<< "{\n"
3116 		<< "	highp uint targetValue;\n"
3117 		<< "	highp uint dummy;\n"
3118 		<< "} sb_work;\n"
3119 		<< "\n"
3120 		<< "void main ()\n"
3121 		<< "{\n"
3122 		<< "	// flip high bits\n"
3123 		<< "	highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3124 		<< "	sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3125 		<< "}";
3126 
3127 	return specializeShader(m_context, buf.str().c_str());
3128 }
3129 
genAtomicCounterComputeSource(void) const3130 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3131 {
3132 	std::ostringstream buf;
3133 
3134 	buf	<< "${GLSL_VERSION_DECL}\n"
3135 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3136 		<< "\n"
3137 		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
3138 		<< "\n"
3139 		<< "void main ()\n"
3140 		<< "{\n"
3141 		<< "	atomicCounterIncrement(u_counter);\n"
3142 		<< "}";
3143 
3144 	return specializeShader(m_context, buf.str().c_str());
3145 }
3146 
3147 } // anonymous
3148 
SynchronizationTests(Context & context)3149 SynchronizationTests::SynchronizationTests (Context& context)
3150 	: TestCaseGroup(context, "synchronization", "Synchronization tests")
3151 {
3152 }
3153 
~SynchronizationTests(void)3154 SynchronizationTests::~SynchronizationTests (void)
3155 {
3156 }
3157 
init(void)3158 void SynchronizationTests::init (void)
3159 {
3160 	tcu::TestCaseGroup* const inInvocationGroup		= new tcu::TestCaseGroup(m_testCtx, "in_invocation",	"Test intra-invocation synchronization");
3161 	tcu::TestCaseGroup* const interInvocationGroup	= new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3162 	tcu::TestCaseGroup* const interCallGroup		= new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3163 
3164 	addChild(inInvocationGroup);
3165 	addChild(interInvocationGroup);
3166 	addChild(interCallGroup);
3167 
3168 	// .in_invocation & .inter_invocation
3169 	{
3170 		static const struct CaseConfig
3171 		{
3172 			const char*									namePrefix;
3173 			const InterInvocationTestCase::StorageType	storage;
3174 			const int									flags;
3175 		} configs[] =
3176 		{
3177 			{ "image",			InterInvocationTestCase::STORAGE_IMAGE,		0										},
3178 			{ "image_atomic",	InterInvocationTestCase::STORAGE_IMAGE,		InterInvocationTestCase::FLAG_ATOMIC	},
3179 			{ "ssbo",			InterInvocationTestCase::STORAGE_BUFFER,	0										},
3180 			{ "ssbo_atomic",	InterInvocationTestCase::STORAGE_BUFFER,	InterInvocationTestCase::FLAG_ATOMIC	},
3181 		};
3182 
3183 		for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3184 		{
3185 			tcu::TestCaseGroup* const	targetGroup	= (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3186 			const int					extraFlags	= (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3187 
3188 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3189 			{
3190 				const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3191 
3192 				targetGroup->addChild(new InvocationWriteReadCase(m_context,
3193 																  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3194 																  (std::string("Write to ") + target + " and read it").c_str(),
3195 																  configs[configNdx].storage,
3196 																  configs[configNdx].flags | extraFlags));
3197 
3198 				targetGroup->addChild(new InvocationReadWriteCase(m_context,
3199 																  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3200 																  (std::string("Read form ") + target + " and then write to it").c_str(),
3201 																  configs[configNdx].storage,
3202 																  configs[configNdx].flags | extraFlags));
3203 
3204 				targetGroup->addChild(new InvocationOverWriteCase(m_context,
3205 																  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3206 																  (std::string("Write to ") + target + " twice and read it").c_str(),
3207 																  configs[configNdx].storage,
3208 																  configs[configNdx].flags | extraFlags));
3209 
3210 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3211 																   (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3212 																   (std::string("Write to aliasing ") + target + " and read it").c_str(),
3213 																   InvocationAliasWriteCase::TYPE_WRITE,
3214 																   configs[configNdx].storage,
3215 																   configs[configNdx].flags | extraFlags));
3216 
3217 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3218 																   (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3219 																   (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3220 																   InvocationAliasWriteCase::TYPE_OVERWRITE,
3221 																   configs[configNdx].storage,
3222 																   configs[configNdx].flags | extraFlags));
3223 			}
3224 		}
3225 	}
3226 
3227 	// .inter_call
3228 	{
3229 		tcu::TestCaseGroup* const withBarrierGroup		= new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3230 		tcu::TestCaseGroup* const withoutBarrierGroup	= new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3231 
3232 		interCallGroup->addChild(withBarrierGroup);
3233 		interCallGroup->addChild(withoutBarrierGroup);
3234 
3235 		// .with_memory_barrier
3236 		{
3237 			static const struct CaseConfig
3238 			{
3239 				const char*								namePrefix;
3240 				const InterCallTestCase::StorageType	storage;
3241 				const int								flags;
3242 			} configs[] =
3243 			{
3244 				{ "image",			InterCallTestCase::STORAGE_IMAGE,	0																		},
3245 				{ "image_atomic",	InterCallTestCase::STORAGE_IMAGE,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3246 				{ "ssbo",			InterCallTestCase::STORAGE_BUFFER,	0																		},
3247 				{ "ssbo_atomic",	InterCallTestCase::STORAGE_BUFFER,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3248 			};
3249 
3250 			const int seed0 = 123;
3251 			const int seed1 = 457;
3252 
3253 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3254 			{
3255 				const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3256 
3257 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3258 																 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3259 																 (std::string("Write to ") + target + " and read it").c_str(),
3260 																 configs[configNdx].storage,
3261 																 configs[configNdx].flags,
3262 																 InterCallOperations()
3263 																	<< op::WriteData::Generate(1, seed0)
3264 																	<< op::Barrier()
3265 																	<< op::ReadData::Generate(1, seed0)));
3266 
3267 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3268 																 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3269 																 (std::string("Read from ") + target + " and then write to it").c_str(),
3270 																 configs[configNdx].storage,
3271 																 configs[configNdx].flags,
3272 																 InterCallOperations()
3273 																	<< op::ReadZeroData::Generate(1)
3274 																	<< op::Barrier()
3275 																	<< op::WriteData::Generate(1, seed0)));
3276 
3277 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3278 																 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3279 																 (std::string("Write to ") + target + " twice and read it").c_str(),
3280 																 configs[configNdx].storage,
3281 																 configs[configNdx].flags,
3282 																 InterCallOperations()
3283 																	<< op::WriteData::Generate(1, seed0)
3284 																	<< op::Barrier()
3285 																	<< op::WriteData::Generate(1, seed1)
3286 																	<< op::Barrier()
3287 																	<< op::ReadData::Generate(1, seed1)));
3288 
3289 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3290 																 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3291 																 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3292 																 configs[configNdx].storage,
3293 																 configs[configNdx].flags,
3294 																 InterCallOperations()
3295 																	<< op::WriteData::Generate(1, seed0)
3296 																	<< op::WriteData::Generate(2, seed1)
3297 																	<< op::Barrier()
3298 																	<< op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3299 
3300 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3301 																 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3302 																 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3303 																 configs[configNdx].storage,
3304 																 configs[configNdx].flags,
3305 																 InterCallOperations()
3306 																	<< op::WriteDataInterleaved::Generate(1, seed0, true)
3307 																	<< op::WriteDataInterleaved::Generate(1, seed1, false)
3308 																	<< op::Barrier()
3309 																	<< op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3310 
3311 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3312 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3313 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3314 																 configs[configNdx].storage,
3315 																 configs[configNdx].flags,
3316 																 InterCallOperations()
3317 																	<< op::WriteData::Generate(1, seed0)
3318 																	<< op::WriteData::Generate(2, seed1)
3319 																	<< op::Barrier()
3320 																	<< op::ReadData::Generate(1, seed0)
3321 																	<< op::ReadData::Generate(2, seed1)));
3322 
3323 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3324 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3325 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3326 																 configs[configNdx].storage,
3327 																 configs[configNdx].flags,
3328 																 InterCallOperations()
3329 																	<< op::WriteData::Generate(1, seed0)
3330 																	<< op::WriteData::Generate(2, seed1)
3331 																	<< op::Barrier()
3332 																	<< op::ReadData::Generate(2, seed1)
3333 																	<< op::ReadData::Generate(1, seed0)));
3334 			}
3335 
3336 			// .without_memory_barrier
3337 			{
3338 				struct InvocationConfig
3339 				{
3340 					const char*	name;
3341 					int			count;
3342 				};
3343 
3344 				static const InvocationConfig ssboInvocations[] =
3345 				{
3346 					{ "1k",		1024	},
3347 					{ "4k",		4096	},
3348 					{ "32k",	32768	},
3349 				};
3350 				static const InvocationConfig imageInvocations[] =
3351 				{
3352 					{ "8x8",		8	},
3353 					{ "32x32",		32	},
3354 					{ "128x128",	128	},
3355 				};
3356 				static const InvocationConfig counterInvocations[] =
3357 				{
3358 					{ "32",		32		},
3359 					{ "128",	128		},
3360 					{ "1k",		1024	},
3361 				};
3362 				static const int callCounts[] = { 2, 5, 100 };
3363 
3364 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3365 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3366 						withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3367 
3368 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3369 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3370 						withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3371 
3372 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3373 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3374 						withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3375 
3376 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3377 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3378 						withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3379 			}
3380 		}
3381 	}
3382 }
3383 
3384 } // Functional
3385 } // gles31
3386 } // deqp
3387