1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Synchronization Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuSurface.hpp"
27 #include "tcuRenderTarget.hpp"
28 #include "gluRenderContext.hpp"
29 #include "gluShaderProgram.hpp"
30 #include "gluObjectWrapper.hpp"
31 #include "gluPixelTransfer.hpp"
32 #include "gluContextInfo.hpp"
33 #include "glwFunctions.hpp"
34 #include "glwEnums.hpp"
35 #include "deStringUtil.hpp"
36 #include "deSharedPtr.hpp"
37 #include "deMemory.h"
38 #include "deRandom.hpp"
39 
40 #include <map>
41 
42 namespace deqp
43 {
44 namespace gles31
45 {
46 namespace Functional
47 {
48 namespace
49 {
50 
51 
validateSortedAtomicRampAdditionValueChain(const std::vector<deUint32> & valueChain,deUint32 sumValue,int & invalidOperationNdx,deUint32 & errorDelta,deUint32 & errorExpected)52 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
53 {
54 	std::vector<deUint32> chainDelta(valueChain.size());
55 
56 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
57 		chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
58 
59 	// chainDelta contains now the actual additions applied to the value
60 	// check there exists an addition ramp form 1 to ...
61 	std::sort(chainDelta.begin(), chainDelta.end());
62 
63 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
64 	{
65 		if ((int)chainDelta[callNdx] != callNdx+1)
66 		{
67 			invalidOperationNdx = callNdx;
68 			errorDelta = chainDelta[callNdx];
69 			errorExpected = callNdx+1;
70 
71 			return false;
72 		}
73 	}
74 
75 	return true;
76 }
77 
readBuffer(const glw::Functions & gl,deUint32 target,int numElements,std::vector<deUint32> & result)78 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
79 {
80 	const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
81 	GLU_EXPECT_NO_ERROR(gl.getError(), "map");
82 
83 	if (!ptr)
84 		throw tcu::TestError("mapBufferRange returned NULL");
85 
86 	result.resize(numElements);
87 	memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
88 
89 	if (gl.unmapBuffer(target) == GL_FALSE)
90 		throw tcu::TestError("unmapBuffer returned false");
91 }
92 
readBufferUint32(const glw::Functions & gl,deUint32 target)93 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
94 {
95 	std::vector<deUint32> vec;
96 
97 	readBuffer(gl, target, 1, vec);
98 
99 	return vec[0];
100 }
101 
102 //! Generate a ramp of values from 1 to numElements, and shuffle it
generateShuffledRamp(int numElements,std::vector<int> & ramp)103 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
104 {
105 	de::Random rng(0xabcd);
106 
107 	// some positive (non-zero) unique values
108 	ramp.resize(numElements);
109 	for (int callNdx = 0; callNdx < numElements; ++callNdx)
110 		ramp[callNdx] = callNdx + 1;
111 
112 	rng.shuffle(ramp.begin(), ramp.end());
113 }
114 
115 class InterInvocationTestCase : public TestCase
116 {
117 public:
118 	enum StorageType
119 	{
120 		STORAGE_BUFFER = 0,
121 		STORAGE_IMAGE,
122 
123 		STORAGE_LAST
124 	};
125 	enum CaseFlags
126 	{
127 		FLAG_ATOMIC				= 0x1,
128 		FLAG_ALIASING_STORAGES	= 0x2,
129 		FLAG_IN_GROUP			= 0x4,
130 	};
131 
132 						InterInvocationTestCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
133 						~InterInvocationTestCase	(void);
134 
135 private:
136 	void				init						(void);
137 	void				deinit						(void);
138 	IterateResult		iterate						(void);
139 
140 	void				runCompute					(void);
141 	bool				verifyResults				(void);
142 	virtual std::string	genShaderSource				(void) const = 0;
143 
144 protected:
145 	std::string			genBarrierSource			(void) const;
146 
147 	const StorageType	m_storage;
148 	const bool			m_useAtomic;
149 	const bool			m_aliasingStorages;
150 	const bool			m_syncWithGroup;
151 	const int			m_workWidth;				// !< total work width
152 	const int			m_workHeight;				// !<     ...    height
153 	const int			m_localWidth;				// !< group width
154 	const int			m_localHeight;				// !< group height
155 	const int			m_elementsPerInvocation;	// !< elements accessed by a single invocation
156 
157 private:
158 	glw::GLuint			m_storageBuf;
159 	glw::GLuint			m_storageTex;
160 	glw::GLuint			m_resultBuf;
161 	glu::ShaderProgram*	m_program;
162 };
163 
InterInvocationTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)164 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
165 	: TestCase					(context, name, desc)
166 	, m_storage					(storage)
167 	, m_useAtomic				((flags & FLAG_ATOMIC) != 0)
168 	, m_aliasingStorages		((flags & FLAG_ALIASING_STORAGES) != 0)
169 	, m_syncWithGroup			((flags & FLAG_IN_GROUP) != 0)
170 	, m_workWidth				(256)
171 	, m_workHeight				(256)
172 	, m_localWidth				(16)
173 	, m_localHeight				(8)
174 	, m_elementsPerInvocation	(8)
175 	, m_storageBuf				(0)
176 	, m_storageTex				(0)
177 	, m_resultBuf				(0)
178 	, m_program					(DE_NULL)
179 {
180 	DE_ASSERT(m_storage < STORAGE_LAST);
181 	DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
182 }
183 
~InterInvocationTestCase(void)184 InterInvocationTestCase::~InterInvocationTestCase (void)
185 {
186 	deinit();
187 }
188 
init(void)189 void InterInvocationTestCase::init (void)
190 {
191 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
192 
193 	// requirements
194 
195 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
196 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
197 
198 	// program
199 
200 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
201 	m_testCtx.getLog() << *m_program;
202 	if (!m_program->isOk())
203 		throw tcu::TestError("could not build program");
204 
205 	// source
206 
207 	if (m_storage == STORAGE_BUFFER)
208 	{
209 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
210 		const int				bufferSize		= bufferElements * sizeof(deUint32);
211 		std::vector<deUint32>	zeroBuffer		(bufferElements, 0);
212 
213 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
214 
215 		gl.genBuffers(1, &m_storageBuf);
216 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
217 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
218 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
219 	}
220 	else if (m_storage == STORAGE_IMAGE)
221 	{
222 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
223 		const int				bufferSize		= bufferElements * sizeof(deUint32);
224 
225 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
226 
227 		gl.genTextures(1, &m_storageTex);
228 		gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
229 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
230 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
231 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
232 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
233 
234 		// Zero-fill
235 		m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
236 
237 		{
238 			const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
239 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
240 			GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
241 		}
242 	}
243 	else
244 		DE_ASSERT(DE_FALSE);
245 
246 	// destination
247 
248 	{
249 		const int				bufferElements	= m_workWidth * m_workHeight;
250 		const int				bufferSize		= bufferElements * sizeof(deUint32);
251 		std::vector<deInt32>	negativeBuffer	(bufferElements, -1);
252 
253 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
254 
255 		gl.genBuffers(1, &m_resultBuf);
256 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
257 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
258 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
259 	}
260 }
261 
deinit(void)262 void InterInvocationTestCase::deinit (void)
263 {
264 	if (m_storageBuf)
265 	{
266 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
267 		m_storageBuf = DE_NULL;
268 	}
269 
270 	if (m_storageTex)
271 	{
272 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
273 		m_storageTex = DE_NULL;
274 	}
275 
276 	if (m_resultBuf)
277 	{
278 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
279 		m_resultBuf = DE_NULL;
280 	}
281 
282 	delete m_program;
283 	m_program = DE_NULL;
284 }
285 
iterate(void)286 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
287 {
288 	// Dispatch
289 	runCompute();
290 
291 	// Verify buffer contents
292 	if (verifyResults())
293 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
294 	else
295 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
296 
297 	return STOP;
298 }
299 
runCompute(void)300 void InterInvocationTestCase::runCompute (void)
301 {
302 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
303 	const int				groupsX	= m_workWidth / m_localWidth;
304 	const int				groupsY	= m_workHeight / m_localHeight;
305 
306 	DE_ASSERT((m_workWidth % m_localWidth) == 0);
307 	DE_ASSERT((m_workHeight % m_localHeight) == 0);
308 
309 	m_testCtx.getLog()
310 		<< tcu::TestLog::Message
311 		<< "Dispatching compute.\n"
312 		<< "	group size: " << m_localWidth << "x" << m_localHeight << "\n"
313 		<< "	dispatch size: " << groupsX << "x" << groupsY << "\n"
314 		<< "	total work size: " << m_workWidth << "x" << m_workHeight << "\n"
315 		<< tcu::TestLog::EndMessage;
316 
317 	gl.useProgram(m_program->getProgram());
318 
319 	// source
320 	if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
321 	{
322 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
323 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
324 	}
325 	else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
326 	{
327 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
328 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
329 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
330 
331 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
332 	}
333 	else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
334 	{
335 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
336 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
337 	}
338 	else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
339 	{
340 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
341 		gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
342 
343 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
344 
345 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
346 	}
347 	else
348 		DE_ASSERT(DE_FALSE);
349 
350 	// destination
351 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
352 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
353 
354 	// dispatch
355 	gl.dispatchCompute(groupsX, groupsY, 1);
356 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
357 }
358 
verifyResults(void)359 bool InterInvocationTestCase::verifyResults (void)
360 {
361 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
362 	const int				errorFloodThreshold	= 5;
363 	int						numErrorsLogged		= 0;
364 	const void*				mapped				= DE_NULL;
365 	std::vector<deInt32>	results				(m_workWidth * m_workHeight);
366 	bool					error				= false;
367 
368 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
369 	mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
370 	GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
371 
372 	// copy to properly aligned array
373 	deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
374 
375 	if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
376 		throw tcu::TestError("memory map store corrupted");
377 
378 	// check the results
379 	for (int ndx = 0; ndx < (int)results.size(); ++ndx)
380 	{
381 		if (results[ndx] != 1)
382 		{
383 			error = true;
384 
385 			if (numErrorsLogged == 0)
386 				m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
387 			if (numErrorsLogged++ < errorFloodThreshold)
388 				m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
389 			else
390 			{
391 				// after N errors, no point continuing verification
392 				m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
393 				break;
394 			}
395 		}
396 	}
397 
398 	if (!error)
399 		m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
400 	return !error;
401 }
402 
genBarrierSource(void) const403 std::string InterInvocationTestCase::genBarrierSource (void) const
404 {
405 	std::ostringstream buf;
406 
407 	if (m_syncWithGroup)
408 	{
409 		// Wait until all invocations in this work group have their texture/buffer read/write operations complete
410 		// \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
411 		//       we only require intra-workgroup synchronization.
412 		buf << "\n"
413 			<< "	groupMemoryBarrier();\n"
414 			<< "	barrier();\n"
415 			<< "\n";
416 	}
417 	else if (m_storage == STORAGE_BUFFER)
418 	{
419 		DE_ASSERT(!m_syncWithGroup);
420 
421 		// Waiting only for data written by this invocation. Since all buffer reads and writes are
422 		// processed in order (within a single invocation), we don't have to do anything.
423 		buf << "\n";
424 	}
425 	else if (m_storage == STORAGE_IMAGE)
426 	{
427 		DE_ASSERT(!m_syncWithGroup);
428 
429 		// Waiting only for data written by this invocation. But since operations complete in undefined
430 		// order, we have to wait for them to complete.
431 		buf << "\n"
432 			<< "	memoryBarrierImage();\n"
433 			<< "\n";
434 	}
435 	else
436 		DE_ASSERT(DE_FALSE);
437 
438 	return buf.str();
439 }
440 
441 class InvocationBasicCase : public InterInvocationTestCase
442 {
443 public:
444 							InvocationBasicCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
445 private:
446 	std::string				genShaderSource			(void) const;
447 	virtual std::string		genShaderMainBlock		(void) const = 0;
448 };
449 
InvocationBasicCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)450 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
451 	: InterInvocationTestCase(context, name, desc, storage, flags)
452 {
453 }
454 
genShaderSource(void) const455 std::string InvocationBasicCase::genShaderSource (void) const
456 {
457 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
458 	std::ostringstream	buf;
459 
460 	buf << "#version 310 es\n"
461 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
462 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
463 		<< "layout(binding=0, std430) buffer Output\n"
464 		<< "{\n"
465 		<< "	highp int values[];\n"
466 		<< "} sb_result;\n";
467 
468 	if (m_storage == STORAGE_BUFFER)
469 		buf << "layout(binding=1, std430) coherent buffer Storage\n"
470 			<< "{\n"
471 			<< "	highp int values[];\n"
472 			<< "} sb_store;\n"
473 			<< "\n"
474 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
475 			<< "{\n"
476 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
477 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
478 			<< "}\n";
479 	else if (m_storage == STORAGE_IMAGE)
480 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
481 			<< "\n"
482 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
483 			<< "{\n"
484 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
485 			<< "}\n";
486 	else
487 		DE_ASSERT(DE_FALSE);
488 
489 	buf << "\n"
490 		<< "void main (void)\n"
491 		<< "{\n"
492 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
493 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
494 		<< "	bool allOk      = true;\n"
495 		<< "\n"
496 		<< genShaderMainBlock()
497 		<< "\n"
498 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
499 		<< "}\n";
500 
501 	return buf.str();
502 }
503 
504 class InvocationWriteReadCase : public InvocationBasicCase
505 {
506 public:
507 					InvocationWriteReadCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
508 private:
509 	std::string		genShaderMainBlock			(void) const;
510 };
511 
InvocationWriteReadCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)512 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
513 	: InvocationBasicCase(context, name, desc, storage, flags)
514 {
515 }
516 
genShaderMainBlock(void) const517 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
518 {
519 	std::ostringstream buf;
520 
521 	// write
522 
523 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
524 	{
525 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
526 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
527 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
528 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
529 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
530 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
531 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
532 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
533 		else
534 			DE_ASSERT(DE_FALSE);
535 	}
536 
537 	// barrier
538 
539 	buf << genBarrierSource();
540 
541 	// read
542 
543 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
544 	{
545 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
546 
547 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
548 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
549 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
550 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
551 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
552 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
553 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
554 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
555 		else
556 			DE_ASSERT(DE_FALSE);
557 	}
558 
559 	return buf.str();
560 }
561 
562 class InvocationReadWriteCase : public InvocationBasicCase
563 {
564 public:
565 					InvocationReadWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
566 private:
567 	std::string		genShaderMainBlock			(void) const;
568 };
569 
InvocationReadWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)570 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
571 	: InvocationBasicCase(context, name, desc, storage, flags)
572 {
573 }
574 
genShaderMainBlock(void) const575 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
576 {
577 	std::ostringstream buf;
578 
579 	// read
580 
581 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
582 	{
583 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
584 
585 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
586 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
587 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
588 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
589 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
590 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
591 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
592 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
593 		else
594 			DE_ASSERT(DE_FALSE);
595 	}
596 
597 	// barrier
598 
599 	buf << genBarrierSource();
600 
601 	// write
602 
603 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
604 	{
605 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
606 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
607 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
608 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
609 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
610 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
611 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
612 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
613 		else
614 			DE_ASSERT(DE_FALSE);
615 	}
616 
617 	return buf.str();
618 }
619 
620 class InvocationOverWriteCase : public InvocationBasicCase
621 {
622 public:
623 					InvocationOverWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
624 private:
625 	std::string		genShaderMainBlock			(void) const;
626 };
627 
InvocationOverWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)628 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
629 	: InvocationBasicCase(context, name, desc, storage, flags)
630 {
631 }
632 
genShaderMainBlock(void) const633 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
634 {
635 	std::ostringstream buf;
636 
637 	// write
638 
639 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
640 	{
641 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
642 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
643 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
644 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
645 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
646 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
647 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
648 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
649 		else
650 			DE_ASSERT(DE_FALSE);
651 	}
652 
653 	// barrier
654 
655 	buf << genBarrierSource();
656 
657 	// write over
658 
659 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
660 	{
661 		// write another invocation's value or our own value depending on test type
662 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
663 
664 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
665 			buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
666 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
667 			buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
668 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
669 			buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
670 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
671 			buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
672 		else
673 			DE_ASSERT(DE_FALSE);
674 	}
675 
676 	// barrier
677 
678 	buf << genBarrierSource();
679 
680 	// read
681 
682 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
683 	{
684 		// check another invocation's value or our own value depending on test type
685 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
686 
687 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
688 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
689 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
690 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
691 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
692 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
693 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
694 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
695 		else
696 			DE_ASSERT(DE_FALSE);
697 	}
698 
699 	return buf.str();
700 }
701 
702 class InvocationAliasWriteCase : public InterInvocationTestCase
703 {
704 public:
705 	enum TestType
706 	{
707 		TYPE_WRITE = 0,
708 		TYPE_OVERWRITE,
709 
710 		TYPE_LAST
711 	};
712 
713 					InvocationAliasWriteCase	(Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
714 private:
715 	std::string		genShaderSource				(void) const;
716 
717 	const TestType	m_type;
718 };
719 
InvocationAliasWriteCase(Context & context,const char * name,const char * desc,TestType type,StorageType storage,int flags)720 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
721 	: InterInvocationTestCase	(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
722 	, m_type					(type)
723 {
724 	DE_ASSERT(type < TYPE_LAST);
725 }
726 
genShaderSource(void) const727 std::string InvocationAliasWriteCase::genShaderSource (void) const
728 {
729 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
730 	std::ostringstream	buf;
731 
732 	buf << "#version 310 es\n"
733 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
734 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
735 		<< "layout(binding=0, std430) buffer Output\n"
736 		<< "{\n"
737 		<< "	highp int values[];\n"
738 		<< "} sb_result;\n";
739 
740 	if (m_storage == STORAGE_BUFFER)
741 		buf << "layout(binding=1, std430) coherent buffer Storage0\n"
742 			<< "{\n"
743 			<< "	highp int values[];\n"
744 			<< "} sb_store0;\n"
745 			<< "layout(binding=2, std430) coherent buffer Storage1\n"
746 			<< "{\n"
747 			<< "	highp int values[];\n"
748 			<< "} sb_store1;\n"
749 			<< "\n"
750 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
751 			<< "{\n"
752 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
753 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
754 			<< "}\n";
755 	else if (m_storage == STORAGE_IMAGE)
756 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
757 			<< "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
758 			<< "\n"
759 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
760 			<< "{\n"
761 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
762 			<< "}\n";
763 	else
764 		DE_ASSERT(DE_FALSE);
765 
766 	buf << "\n"
767 		<< "void main (void)\n"
768 		<< "{\n"
769 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
770 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
771 		<< "	bool allOk      = true;\n"
772 		<< "\n";
773 
774 	if (m_type == TYPE_OVERWRITE)
775 	{
776 		// write
777 
778 		for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
779 		{
780 			if (m_storage == STORAGE_BUFFER && m_useAtomic)
781 				buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
782 			else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
783 				buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
784 			else if (m_storage == STORAGE_IMAGE && m_useAtomic)
785 				buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
786 			else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
787 				buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
788 			else
789 				DE_ASSERT(DE_FALSE);
790 		}
791 
792 		// barrier
793 
794 		buf << genBarrierSource();
795 	}
796 	else
797 		DE_ASSERT(m_type == TYPE_WRITE);
798 
799 	// write (again)
800 
801 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
802 	{
803 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
804 
805 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
806 			buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
807 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
808 			buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
809 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
810 			buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
811 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
812 			buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
813 		else
814 			DE_ASSERT(DE_FALSE);
815 	}
816 
817 	// barrier
818 
819 	buf << genBarrierSource();
820 
821 	// read
822 
823 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
824 	{
825 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
826 			buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
827 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
828 			buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
829 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
830 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
831 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
832 			buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
833 		else
834 			DE_ASSERT(DE_FALSE);
835 	}
836 
837 	// return result
838 
839 	buf << "\n"
840 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
841 		<< "}\n";
842 
843 	return buf.str();
844 }
845 
846 namespace op
847 {
848 
849 struct WriteData
850 {
851 	int targetHandle;
852 	int seed;
853 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::WriteData854 	static WriteData Generate(int targetHandle, int seed)
855 	{
856 		WriteData retVal;
857 
858 		retVal.targetHandle = targetHandle;
859 		retVal.seed = seed;
860 
861 		return retVal;
862 	}
863 };
864 
865 struct ReadData
866 {
867 	int targetHandle;
868 	int seed;
869 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadData870 	static ReadData Generate(int targetHandle, int seed)
871 	{
872 		ReadData retVal;
873 
874 		retVal.targetHandle = targetHandle;
875 		retVal.seed = seed;
876 
877 		return retVal;
878 	}
879 };
880 
881 struct Barrier
882 {
883 };
884 
885 struct WriteDataInterleaved
886 {
887 	int		targetHandle;
888 	int		seed;
889 	bool	evenOdd;
890 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::WriteDataInterleaved891 	static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
892 	{
893 		WriteDataInterleaved retVal;
894 
895 		retVal.targetHandle = targetHandle;
896 		retVal.seed = seed;
897 		retVal.evenOdd = evenOdd;
898 
899 		return retVal;
900 	}
901 };
902 
903 struct ReadDataInterleaved
904 {
905 	int targetHandle;
906 	int seed0;
907 	int seed1;
908 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadDataInterleaved909 	static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
910 	{
911 		ReadDataInterleaved retVal;
912 
913 		retVal.targetHandle = targetHandle;
914 		retVal.seed0 = seed0;
915 		retVal.seed1 = seed1;
916 
917 		return retVal;
918 	}
919 };
920 
921 struct ReadMultipleData
922 {
923 	int targetHandle0;
924 	int seed0;
925 	int targetHandle1;
926 	int seed1;
927 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadMultipleData928 	static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
929 	{
930 		ReadMultipleData retVal;
931 
932 		retVal.targetHandle0 = targetHandle0;
933 		retVal.seed0 = seed0;
934 		retVal.targetHandle1 = targetHandle1;
935 		retVal.seed1 = seed1;
936 
937 		return retVal;
938 	}
939 };
940 
941 struct ReadZeroData
942 {
943 	int targetHandle;
944 
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadZeroData945 	static ReadZeroData Generate(int targetHandle)
946 	{
947 		ReadZeroData retVal;
948 
949 		retVal.targetHandle = targetHandle;
950 
951 		return retVal;
952 	}
953 };
954 
955 } // namespace op
956 
957 class InterCallTestCase;
958 
959 class InterCallOperations
960 {
961 public:
962 	InterCallOperations& operator<< (const op::WriteData&);
963 	InterCallOperations& operator<< (const op::ReadData&);
964 	InterCallOperations& operator<< (const op::Barrier&);
965 	InterCallOperations& operator<< (const op::ReadMultipleData&);
966 	InterCallOperations& operator<< (const op::WriteDataInterleaved&);
967 	InterCallOperations& operator<< (const op::ReadDataInterleaved&);
968 	InterCallOperations& operator<< (const op::ReadZeroData&);
969 
970 private:
971 	struct Command
972 	{
973 		enum CommandType
974 		{
975 			TYPE_WRITE = 0,
976 			TYPE_READ,
977 			TYPE_BARRIER,
978 			TYPE_READ_MULTIPLE,
979 			TYPE_WRITE_INTERLEAVE,
980 			TYPE_READ_INTERLEAVE,
981 			TYPE_READ_ZERO,
982 
983 			TYPE_LAST
984 		};
985 
986 		CommandType type;
987 
988 		union CommandUnion
989 		{
990 			op::WriteData				write;
991 			op::ReadData				read;
992 			op::Barrier					barrier;
993 			op::ReadMultipleData		readMulti;
994 			op::WriteDataInterleaved	writeInterleave;
995 			op::ReadDataInterleaved		readInterleave;
996 			op::ReadZeroData			readZero;
997 		} u_cmd;
998 	};
999 
1000 	friend class InterCallTestCase;
1001 
1002 	std::vector<Command> m_cmds;
1003 };
1004 
operator <<(const op::WriteData & cmd)1005 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1006 {
1007 	m_cmds.push_back(Command());
1008 	m_cmds.back().type = Command::TYPE_WRITE;
1009 	m_cmds.back().u_cmd.write = cmd;
1010 
1011 	return *this;
1012 }
1013 
operator <<(const op::ReadData & cmd)1014 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1015 {
1016 	m_cmds.push_back(Command());
1017 	m_cmds.back().type = Command::TYPE_READ;
1018 	m_cmds.back().u_cmd.read = cmd;
1019 
1020 	return *this;
1021 }
1022 
operator <<(const op::Barrier & cmd)1023 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1024 {
1025 	m_cmds.push_back(Command());
1026 	m_cmds.back().type = Command::TYPE_BARRIER;
1027 	m_cmds.back().u_cmd.barrier = cmd;
1028 
1029 	return *this;
1030 }
1031 
operator <<(const op::ReadMultipleData & cmd)1032 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1033 {
1034 	m_cmds.push_back(Command());
1035 	m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1036 	m_cmds.back().u_cmd.readMulti = cmd;
1037 
1038 	return *this;
1039 }
1040 
operator <<(const op::WriteDataInterleaved & cmd)1041 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1042 {
1043 	m_cmds.push_back(Command());
1044 	m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1045 	m_cmds.back().u_cmd.writeInterleave = cmd;
1046 
1047 	return *this;
1048 }
1049 
operator <<(const op::ReadDataInterleaved & cmd)1050 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1051 {
1052 	m_cmds.push_back(Command());
1053 	m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1054 	m_cmds.back().u_cmd.readInterleave = cmd;
1055 
1056 	return *this;
1057 }
1058 
operator <<(const op::ReadZeroData & cmd)1059 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1060 {
1061 	m_cmds.push_back(Command());
1062 	m_cmds.back().type = Command::TYPE_READ_ZERO;
1063 	m_cmds.back().u_cmd.readZero = cmd;
1064 
1065 	return *this;
1066 }
1067 
1068 class InterCallTestCase : public TestCase
1069 {
1070 public:
1071 	enum StorageType
1072 	{
1073 		STORAGE_BUFFER = 0,
1074 		STORAGE_IMAGE,
1075 
1076 		STORAGE_LAST
1077 	};
1078 	enum Flags
1079 	{
1080 		FLAG_USE_ATOMIC	= 1,
1081 		FLAG_USE_INT	= 2,
1082 	};
1083 													InterCallTestCase			(Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1084 													~InterCallTestCase			(void);
1085 
1086 private:
1087 	void											init						(void);
1088 	void											deinit						(void);
1089 	IterateResult									iterate						(void);
1090 	bool											verifyResults				(void);
1091 
1092 	void											runCommand					(const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1093 	void											runCommand					(const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1094 	void											runCommand					(const op::Barrier&);
1095 	void											runCommand					(const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1096 	void											runCommand					(const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1097 	void											runCommand					(const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1098 	void											runCommand					(const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1099 	void											runSingleRead				(int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1100 
1101 	glw::GLuint										genStorage					(int friendlyName);
1102 	glw::GLuint										genResultStorage			(void);
1103 	glu::ShaderProgram*								genWriteProgram				(int seed);
1104 	glu::ShaderProgram*								genReadProgram				(int seed);
1105 	glu::ShaderProgram*								genReadMultipleProgram		(int seed0, int seed1);
1106 	glu::ShaderProgram*								genWriteInterleavedProgram	(int seed, bool evenOdd);
1107 	glu::ShaderProgram*								genReadInterleavedProgram	(int seed0, int seed1);
1108 	glu::ShaderProgram*								genReadZeroProgram			(void);
1109 
1110 	const StorageType								m_storage;
1111 	const int										m_invocationGridSize;	// !< width and height of the two dimensional work dispatch
1112 	const int										m_perInvocationSize;	// !< number of elements accessed in single invocation
1113 	const std::vector<InterCallOperations::Command>	m_cmds;
1114 	const bool										m_useAtomic;
1115 	const bool										m_formatInteger;
1116 
1117 	std::vector<glu::ShaderProgram*>				m_operationPrograms;
1118 	std::vector<glw::GLuint>						m_operationResultStorages;
1119 	std::map<int, glw::GLuint>						m_storageIDs;
1120 };
1121 
InterCallTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags,const InterCallOperations & ops)1122 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1123 	: TestCase					(context, name, desc)
1124 	, m_storage					(storage)
1125 	, m_invocationGridSize		(512)
1126 	, m_perInvocationSize		(2)
1127 	, m_cmds					(ops.m_cmds)
1128 	, m_useAtomic				((flags & FLAG_USE_ATOMIC) != 0)
1129 	, m_formatInteger			((flags & FLAG_USE_INT) != 0)
1130 {
1131 }
1132 
~InterCallTestCase(void)1133 InterCallTestCase::~InterCallTestCase (void)
1134 {
1135 	deinit();
1136 }
1137 
init(void)1138 void InterCallTestCase::init (void)
1139 {
1140 	int programFriendlyName = 0;
1141 
1142 	// requirements
1143 
1144 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1145 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1146 
1147 	// generate resources and validate command list
1148 
1149 	m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1150 	m_operationResultStorages.resize(m_cmds.size(), 0);
1151 
1152 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1153 	{
1154 		switch (m_cmds[step].type)
1155 		{
1156 			case InterCallOperations::Command::TYPE_WRITE:
1157 			{
1158 				const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1159 
1160 				// new storage handle?
1161 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1162 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1163 
1164 				// program
1165 				{
1166 					glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1167 
1168 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1169 					m_testCtx.getLog() << *program;
1170 
1171 					if (!program->isOk())
1172 						throw tcu::TestError("could not build program");
1173 
1174 					m_operationPrograms[step] = program;
1175 				}
1176 				break;
1177 			}
1178 
1179 			case InterCallOperations::Command::TYPE_READ:
1180 			{
1181 				const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1182 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1183 
1184 				// program and result storage
1185 				{
1186 					glu::ShaderProgram* program = genReadProgram(cmd.seed);
1187 
1188 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1189 					m_testCtx.getLog() << *program;
1190 
1191 					if (!program->isOk())
1192 						throw tcu::TestError("could not build program");
1193 
1194 					m_operationPrograms[step] = program;
1195 					m_operationResultStorages[step] = genResultStorage();
1196 				}
1197 				break;
1198 			}
1199 
1200 			case InterCallOperations::Command::TYPE_BARRIER:
1201 			{
1202 				break;
1203 			}
1204 
1205 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1206 			{
1207 				const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1208 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1209 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1210 
1211 				// program
1212 				{
1213 					glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1214 
1215 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1216 					m_testCtx.getLog() << *program;
1217 
1218 					if (!program->isOk())
1219 						throw tcu::TestError("could not build program");
1220 
1221 					m_operationPrograms[step] = program;
1222 					m_operationResultStorages[step] = genResultStorage();
1223 				}
1224 				break;
1225 			}
1226 
1227 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1228 			{
1229 				const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1230 
1231 				// new storage handle?
1232 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1233 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1234 
1235 				// program
1236 				{
1237 					glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1238 
1239 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1240 					m_testCtx.getLog() << *program;
1241 
1242 					if (!program->isOk())
1243 						throw tcu::TestError("could not build program");
1244 
1245 					m_operationPrograms[step] = program;
1246 				}
1247 				break;
1248 			}
1249 
1250 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1251 			{
1252 				const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1253 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1254 
1255 				// program
1256 				{
1257 					glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1258 
1259 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1260 					m_testCtx.getLog() << *program;
1261 
1262 					if (!program->isOk())
1263 						throw tcu::TestError("could not build program");
1264 
1265 					m_operationPrograms[step] = program;
1266 					m_operationResultStorages[step] = genResultStorage();
1267 				}
1268 				break;
1269 			}
1270 
1271 			case InterCallOperations::Command::TYPE_READ_ZERO:
1272 			{
1273 				const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1274 
1275 				// new storage handle?
1276 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1277 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1278 
1279 				// program
1280 				{
1281 					glu::ShaderProgram* program = genReadZeroProgram();
1282 
1283 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1284 					m_testCtx.getLog() << *program;
1285 
1286 					if (!program->isOk())
1287 						throw tcu::TestError("could not build program");
1288 
1289 					m_operationPrograms[step] = program;
1290 					m_operationResultStorages[step] = genResultStorage();
1291 				}
1292 				break;
1293 			}
1294 
1295 			default:
1296 				DE_ASSERT(DE_FALSE);
1297 		}
1298 	}
1299 }
1300 
deinit(void)1301 void InterCallTestCase::deinit (void)
1302 {
1303 	// programs
1304 	for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1305 		delete m_operationPrograms[ndx];
1306 	m_operationPrograms.clear();
1307 
1308 	// result storages
1309 	for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1310 	{
1311 		if (m_operationResultStorages[ndx])
1312 			m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1313 	}
1314 	m_operationResultStorages.clear();
1315 
1316 	// storage
1317 	for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1318 	{
1319 		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1320 
1321 		if (m_storage == STORAGE_BUFFER)
1322 			gl.deleteBuffers(1, &it->second);
1323 		else if (m_storage == STORAGE_IMAGE)
1324 			gl.deleteTextures(1, &it->second);
1325 		else
1326 			DE_ASSERT(DE_FALSE);
1327 	}
1328 	m_storageIDs.clear();
1329 }
1330 
iterate(void)1331 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1332 {
1333 	int programFriendlyName			= 0;
1334 	int resultStorageFriendlyName	= 0;
1335 
1336 	m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1337 
1338 	// run steps
1339 
1340 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1341 	{
1342 		switch (m_cmds[step].type)
1343 		{
1344 			case InterCallOperations::Command::TYPE_WRITE:				runCommand(m_cmds[step].u_cmd.write,			step,	programFriendlyName);								break;
1345 			case InterCallOperations::Command::TYPE_READ:				runCommand(m_cmds[step].u_cmd.read,				step,	programFriendlyName, resultStorageFriendlyName);	break;
1346 			case InterCallOperations::Command::TYPE_BARRIER:			runCommand(m_cmds[step].u_cmd.barrier);																		break;
1347 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:		runCommand(m_cmds[step].u_cmd.readMulti,		step,	programFriendlyName, resultStorageFriendlyName);	break;
1348 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.writeInterleave,	step,	programFriendlyName);								break;
1349 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.readInterleave,	step,	programFriendlyName, resultStorageFriendlyName);	break;
1350 			case InterCallOperations::Command::TYPE_READ_ZERO:			runCommand(m_cmds[step].u_cmd.readZero,			step,	programFriendlyName, resultStorageFriendlyName);	break;
1351 			default:
1352 				DE_ASSERT(DE_FALSE);
1353 		}
1354 	}
1355 
1356 	// read results from result buffers
1357 	if (verifyResults())
1358 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1359 	else
1360 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1361 
1362 	return STOP;
1363 }
1364 
verifyResults(void)1365 bool InterCallTestCase::verifyResults (void)
1366 {
1367 	int		resultBufferFriendlyName	= 0;
1368 	bool	allResultsOk				= true;
1369 	bool	anyResult					= false;
1370 
1371 	m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1372 
1373 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1374 	{
1375 		const int	errorFloodThreshold	= 5;
1376 		int			numErrorsLogged		= 0;
1377 
1378 		if (m_operationResultStorages[step])
1379 		{
1380 			const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1381 			const void*				mapped	= DE_NULL;
1382 			std::vector<deInt32>	results	(m_invocationGridSize * m_invocationGridSize);
1383 			bool					error	= false;
1384 
1385 			anyResult = true;
1386 
1387 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1388 			mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1389 			GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1390 
1391 			// copy to properly aligned array
1392 			deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1393 
1394 			if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1395 				throw tcu::TestError("memory map store corrupted");
1396 
1397 			// check the results
1398 			for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1399 			{
1400 				if (results[ndx] != 1)
1401 				{
1402 					error = true;
1403 
1404 					if (numErrorsLogged == 0)
1405 						m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1406 					if (numErrorsLogged++ < errorFloodThreshold)
1407 						m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1408 					else
1409 					{
1410 						// after N errors, no point continuing verification
1411 						m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1412 						break;
1413 					}
1414 				}
1415 			}
1416 
1417 			if (error)
1418 			{
1419 				allResultsOk = false;
1420 			}
1421 			else
1422 				m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1423 		}
1424 	}
1425 
1426 	DE_ASSERT(anyResult);
1427 	DE_UNREF(anyResult);
1428 
1429 	return allResultsOk;
1430 }
1431 
runCommand(const op::WriteData & cmd,int stepNdx,int & programFriendlyName)1432 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1433 {
1434 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1435 
1436 	m_testCtx.getLog()
1437 		<< tcu::TestLog::Message
1438 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1439 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1440 		<< tcu::TestLog::EndMessage;
1441 
1442 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1443 
1444 	// set destination
1445 	if (m_storage == STORAGE_BUFFER)
1446 	{
1447 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1448 
1449 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1450 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1451 	}
1452 	else if (m_storage == STORAGE_IMAGE)
1453 	{
1454 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1455 
1456 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1457 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1458 	}
1459 	else
1460 		DE_ASSERT(DE_FALSE);
1461 
1462 	// calc
1463 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1464 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1465 }
1466 
runCommand(const op::ReadData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1467 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1468 {
1469 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1470 }
1471 
runCommand(const op::Barrier & cmd)1472 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1473 {
1474 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1475 
1476 	DE_UNREF(cmd);
1477 
1478 	if (m_storage == STORAGE_BUFFER)
1479 	{
1480 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1481 		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1482 	}
1483 	else if (m_storage == STORAGE_IMAGE)
1484 	{
1485 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1486 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1487 	}
1488 	else
1489 		DE_ASSERT(DE_FALSE);
1490 }
1491 
runCommand(const op::ReadMultipleData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1492 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1493 {
1494 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1495 
1496 	m_testCtx.getLog()
1497 		<< tcu::TestLog::Message
1498 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1499 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1500 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1501 		<< tcu::TestLog::EndMessage;
1502 
1503 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1504 
1505 	// set sources
1506 	if (m_storage == STORAGE_BUFFER)
1507 	{
1508 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1509 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1510 
1511 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1512 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1513 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1514 	}
1515 	else if (m_storage == STORAGE_IMAGE)
1516 	{
1517 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1518 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1519 
1520 		gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1521 		gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1522 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1523 	}
1524 	else
1525 		DE_ASSERT(DE_FALSE);
1526 
1527 	// set destination
1528 	DE_ASSERT(m_operationResultStorages[stepNdx]);
1529 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1530 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1531 
1532 	// calc
1533 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1534 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1535 }
1536 
runCommand(const op::WriteDataInterleaved & cmd,int stepNdx,int & programFriendlyName)1537 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1538 {
1539 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1540 
1541 	m_testCtx.getLog()
1542 		<< tcu::TestLog::Message
1543 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1544 		<< "	Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1545 		<< "	Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1546 		<< tcu::TestLog::EndMessage;
1547 
1548 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1549 
1550 	// set destination
1551 	if (m_storage == STORAGE_BUFFER)
1552 	{
1553 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1554 
1555 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1556 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1557 	}
1558 	else if (m_storage == STORAGE_IMAGE)
1559 	{
1560 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1561 
1562 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1563 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1564 	}
1565 	else
1566 		DE_ASSERT(DE_FALSE);
1567 
1568 	// calc
1569 	gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1570 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1571 }
1572 
runCommand(const op::ReadDataInterleaved & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1573 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1574 {
1575 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1576 }
1577 
runCommand(const op::ReadZeroData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1578 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1579 {
1580 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1581 }
1582 
runSingleRead(int targetHandle,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1583 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1584 {
1585 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1586 
1587 	m_testCtx.getLog()
1588 		<< tcu::TestLog::Message
1589 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1590 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1591 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1592 		<< tcu::TestLog::EndMessage;
1593 
1594 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1595 
1596 	// set source
1597 	if (m_storage == STORAGE_BUFFER)
1598 	{
1599 		DE_ASSERT(m_storageIDs[targetHandle]);
1600 
1601 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1602 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1603 	}
1604 	else if (m_storage == STORAGE_IMAGE)
1605 	{
1606 		DE_ASSERT(m_storageIDs[targetHandle]);
1607 
1608 		gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1609 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1610 	}
1611 	else
1612 		DE_ASSERT(DE_FALSE);
1613 
1614 	// set destination
1615 	DE_ASSERT(m_operationResultStorages[stepNdx]);
1616 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1617 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1618 
1619 	// calc
1620 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1621 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1622 }
1623 
genStorage(int friendlyName)1624 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1625 {
1626 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1627 
1628 	if (m_storage == STORAGE_BUFFER)
1629 	{
1630 		const int		numElements		= m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1631 		const int		bufferSize		= numElements * ((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1632 		glw::GLuint		retVal			= 0;
1633 
1634 		m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1635 
1636 		gl.genBuffers(1, &retVal);
1637 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1638 
1639 		if (m_formatInteger)
1640 		{
1641 			const std::vector<deUint32> zeroBuffer(numElements, 0);
1642 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1643 		}
1644 		else
1645 		{
1646 			const std::vector<float> zeroBuffer(numElements, 0.0f);
1647 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1648 		}
1649 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1650 
1651 		return retVal;
1652 	}
1653 	else if (m_storage == STORAGE_IMAGE)
1654 	{
1655 		const int	imageWidth	= m_invocationGridSize;
1656 		const int	imageHeight	= m_invocationGridSize * m_perInvocationSize;
1657 		glw::GLuint	retVal		= 0;
1658 
1659 		m_testCtx.getLog()
1660 			<< tcu::TestLog::Message
1661 			<< "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1662 			<< ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1663 			<< ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1664 			<< tcu::TestLog::EndMessage;
1665 
1666 		gl.genTextures(1, &retVal);
1667 		gl.bindTexture(GL_TEXTURE_2D, retVal);
1668 
1669 		if (m_formatInteger)
1670 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1671 		else
1672 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1673 
1674 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1675 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1676 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1677 
1678 		m_testCtx.getLog()
1679 			<< tcu::TestLog::Message
1680 			<< "Filling image with 0"
1681 			<< tcu::TestLog::EndMessage;
1682 
1683 		if (m_formatInteger)
1684 		{
1685 			const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1686 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1687 		}
1688 		else
1689 		{
1690 			const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1691 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1692 		}
1693 
1694 		GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1695 
1696 		return retVal;
1697 	}
1698 	else
1699 	{
1700 		DE_ASSERT(DE_FALSE);
1701 		return 0;
1702 	}
1703 }
1704 
genResultStorage(void)1705 glw::GLuint InterCallTestCase::genResultStorage (void)
1706 {
1707 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1708 	glw::GLuint				retVal	= 0;
1709 
1710 	gl.genBuffers(1, &retVal);
1711 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1712 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1713 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1714 
1715 	return retVal;
1716 }
1717 
genWriteProgram(int seed)1718 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1719 {
1720 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1721 	std::ostringstream	buf;
1722 
1723 	buf << "#version 310 es\n"
1724 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1725 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1726 
1727 	if (m_storage == STORAGE_BUFFER)
1728 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1729 			<< "{\n"
1730 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1731 			<< "} sb_out;\n";
1732 	else if (m_storage == STORAGE_IMAGE)
1733 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1734 	else
1735 		DE_ASSERT(DE_FALSE);
1736 
1737 	buf << "\n"
1738 		<< "void main (void)\n"
1739 		<< "{\n"
1740 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1741 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1742 		<< "\n";
1743 
1744 	// Write to buffer/image m_perInvocationSize elements
1745 	if (m_storage == STORAGE_BUFFER)
1746 	{
1747 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1748 		{
1749 			if (m_useAtomic)
1750 				buf << "	atomicExchange(";
1751 			else
1752 				buf << "	";
1753 
1754 			buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1755 
1756 			if (m_useAtomic)
1757 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1758 			else
1759 				buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1760 		}
1761 	}
1762 	else if (m_storage == STORAGE_IMAGE)
1763 	{
1764 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1765 		{
1766 			if (m_useAtomic)
1767 				buf << "	imageAtomicExchange";
1768 			else
1769 				buf << "	imageStore";
1770 
1771 			buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1772 
1773 			if (m_useAtomic)
1774 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1775 			else
1776 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1777 		}
1778 	}
1779 	else
1780 		DE_ASSERT(DE_FALSE);
1781 
1782 	buf << "}\n";
1783 
1784 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1785 }
1786 
genReadProgram(int seed)1787 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1788 {
1789 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1790 	std::ostringstream	buf;
1791 
1792 	buf << "#version 310 es\n"
1793 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1794 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1795 
1796 	if (m_storage == STORAGE_BUFFER)
1797 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1798 			<< "{\n"
1799 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1800 			<< "} sb_in;\n";
1801 	else if (m_storage == STORAGE_IMAGE)
1802 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1803 	else
1804 		DE_ASSERT(DE_FALSE);
1805 
1806 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1807 		<< "{\n"
1808 		<< "	highp int resultOk[];\n"
1809 		<< "} sb_result;\n"
1810 		<< "\n"
1811 		<< "void main (void)\n"
1812 		<< "{\n"
1813 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1814 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1815 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1816 		<< "	bool allOk = true;\n"
1817 		<< "\n";
1818 
1819 	// Verify data
1820 
1821 	if (m_storage == STORAGE_BUFFER)
1822 	{
1823 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1824 		{
1825 			if (!m_useAtomic)
1826 				buf << "	allOk = allOk && (sb_in.values[(groupNdx + "
1827 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1828 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1829 			else
1830 				buf << "	allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1831 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1832 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1833 		}
1834 	}
1835 	else if (m_storage == STORAGE_IMAGE)
1836 	{
1837 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1838 		{
1839 			if (!m_useAtomic)
1840 				buf	<< "	allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1841 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1842 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1843 			else
1844 				buf << "	allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1845 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1846 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1847 		}
1848 	}
1849 	else
1850 		DE_ASSERT(DE_FALSE);
1851 
1852 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1853 		<< "}\n";
1854 
1855 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1856 }
1857 
genReadMultipleProgram(int seed0,int seed1)1858 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1859 {
1860 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1861 	std::ostringstream	buf;
1862 
1863 	buf << "#version 310 es\n"
1864 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1865 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1866 
1867 	if (m_storage == STORAGE_BUFFER)
1868 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1869 			<< "{\n"
1870 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1871 			<< "} sb_in0;\n"
1872 			<< "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1873 			<< "{\n"
1874 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1875 			<< "} sb_in1;\n";
1876 	else if (m_storage == STORAGE_IMAGE)
1877 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1878 			<< "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1879 	else
1880 		DE_ASSERT(DE_FALSE);
1881 
1882 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1883 		<< "{\n"
1884 		<< "	highp int resultOk[];\n"
1885 		<< "} sb_result;\n"
1886 		<< "\n"
1887 		<< "void main (void)\n"
1888 		<< "{\n"
1889 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1890 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1891 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1892 		<< "	bool allOk = true;\n"
1893 		<< "\n";
1894 
1895 	// Verify data
1896 
1897 	if (m_storage == STORAGE_BUFFER)
1898 	{
1899 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1900 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1901 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1902 	}
1903 	else if (m_storage == STORAGE_IMAGE)
1904 	{
1905 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1906 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1907 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1908 	}
1909 	else
1910 		DE_ASSERT(DE_FALSE);
1911 
1912 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1913 		<< "}\n";
1914 
1915 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1916 }
1917 
genWriteInterleavedProgram(int seed,bool evenOdd)1918 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1919 {
1920 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1921 	std::ostringstream	buf;
1922 
1923 	buf << "#version 310 es\n"
1924 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1925 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1926 
1927 	if (m_storage == STORAGE_BUFFER)
1928 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1929 			<< "{\n"
1930 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1931 			<< "} sb_out;\n";
1932 	else if (m_storage == STORAGE_IMAGE)
1933 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1934 	else
1935 		DE_ASSERT(DE_FALSE);
1936 
1937 	buf << "\n"
1938 		<< "void main (void)\n"
1939 		<< "{\n"
1940 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1941 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1942 		<< "\n";
1943 
1944 	// Write to buffer/image m_perInvocationSize elements
1945 	if (m_storage == STORAGE_BUFFER)
1946 	{
1947 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1948 		{
1949 			if (m_useAtomic)
1950 				buf << "	atomicExchange(";
1951 			else
1952 				buf << "	";
1953 
1954 			buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1955 
1956 			if (m_useAtomic)
1957 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1958 			else
1959 				buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1960 		}
1961 	}
1962 	else if (m_storage == STORAGE_IMAGE)
1963 	{
1964 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1965 		{
1966 			if (m_useAtomic)
1967 				buf << "	imageAtomicExchange";
1968 			else
1969 				buf << "	imageStore";
1970 
1971 			buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1972 
1973 			if (m_useAtomic)
1974 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1975 			else
1976 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1977 		}
1978 	}
1979 	else
1980 		DE_ASSERT(DE_FALSE);
1981 
1982 	buf << "}\n";
1983 
1984 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1985 }
1986 
genReadInterleavedProgram(int seed0,int seed1)1987 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
1988 {
1989 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1990 	std::ostringstream	buf;
1991 
1992 	buf << "#version 310 es\n"
1993 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1994 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1995 
1996 	if (m_storage == STORAGE_BUFFER)
1997 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1998 			<< "{\n"
1999 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2000 			<< "} sb_in;\n";
2001 	else if (m_storage == STORAGE_IMAGE)
2002 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2003 	else
2004 		DE_ASSERT(DE_FALSE);
2005 
2006 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2007 		<< "{\n"
2008 		<< "	highp int resultOk[];\n"
2009 		<< "} sb_result;\n"
2010 		<< "\n"
2011 		<< "void main (void)\n"
2012 		<< "{\n"
2013 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2014 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2015 		<< "	int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2016 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2017 		<< "	bool allOk = true;\n"
2018 		<< "\n";
2019 
2020 	// Verify data
2021 
2022 	if (m_storage == STORAGE_BUFFER)
2023 	{
2024 		buf << "	if (groupNdx % 2 == 0)\n"
2025 			<< "	{\n";
2026 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2027 			buf << "		allOk = allOk && ("
2028 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2029 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2030 		buf << "	}\n"
2031 			<< "	else\n"
2032 			<< "	{\n";
2033 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2034 			buf << "		allOk = allOk && ("
2035 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2036 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2037 		buf << "	}\n";
2038 	}
2039 	else if (m_storage == STORAGE_IMAGE)
2040 	{
2041 		buf << "	if (groupNdx % 2 == 0)\n"
2042 			<< "	{\n";
2043 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2044 			buf << "		allOk = allOk && ("
2045 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2046 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2047 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2048 		buf << "	}\n"
2049 			<< "	else\n"
2050 			<< "	{\n";
2051 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2052 			buf << "		allOk = allOk && ("
2053 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2054 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2055 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2056 		buf << "	}\n";
2057 	}
2058 	else
2059 		DE_ASSERT(DE_FALSE);
2060 
2061 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2062 		<< "}\n";
2063 
2064 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
2065 }
2066 
genReadZeroProgram(void)2067 glu::ShaderProgram*	InterCallTestCase::genReadZeroProgram (void)
2068 {
2069 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2070 	std::ostringstream	buf;
2071 
2072 	buf << "#version 310 es\n"
2073 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
2074 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2075 
2076 	if (m_storage == STORAGE_BUFFER)
2077 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2078 			<< "{\n"
2079 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2080 			<< "} sb_in;\n";
2081 	else if (m_storage == STORAGE_IMAGE)
2082 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2083 	else
2084 		DE_ASSERT(DE_FALSE);
2085 
2086 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2087 		<< "{\n"
2088 		<< "	highp int resultOk[];\n"
2089 		<< "} sb_result;\n"
2090 		<< "\n"
2091 		<< "void main (void)\n"
2092 		<< "{\n"
2093 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2094 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2095 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2096 		<< "	bool allOk = true;\n"
2097 		<< "\n";
2098 
2099 	// Verify data
2100 
2101 	if (m_storage == STORAGE_BUFFER)
2102 	{
2103 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2104 			buf << "	allOk = allOk && ("
2105 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2106 				<< ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2107 	}
2108 	else if (m_storage == STORAGE_IMAGE)
2109 	{
2110 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2111 			buf << "	allOk = allOk && ("
2112 			<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2113 			<< ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2114 	}
2115 	else
2116 		DE_ASSERT(DE_FALSE);
2117 
2118 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2119 		<< "}\n";
2120 
2121 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
2122 }
2123 
2124 class SSBOConcurrentAtomicCase : public TestCase
2125 {
2126 public:
2127 
2128 							SSBOConcurrentAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2129 							~SSBOConcurrentAtomicCase	(void);
2130 
2131 	void					init						(void);
2132 	void					deinit						(void);
2133 	IterateResult			iterate						(void);
2134 
2135 private:
2136 	std::string				genComputeSource			(void) const;
2137 
2138 	const int				m_numCalls;
2139 	const int				m_workSize;
2140 	glu::ShaderProgram*		m_program;
2141 	deUint32				m_bufferID;
2142 	std::vector<deUint32>	m_intermediateResultBuffers;
2143 };
2144 
SSBOConcurrentAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2145 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2146 	: TestCase		(context, name, description)
2147 	, m_numCalls	(numCalls)
2148 	, m_workSize	(workSize)
2149 	, m_program		(DE_NULL)
2150 	, m_bufferID	(DE_NULL)
2151 {
2152 }
2153 
~SSBOConcurrentAtomicCase(void)2154 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2155 {
2156 	deinit();
2157 }
2158 
init(void)2159 void SSBOConcurrentAtomicCase::init (void)
2160 {
2161 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2162 	std::vector<deUint32>	zeroData			(m_workSize, 0);
2163 
2164 	// gen buffers
2165 
2166 	gl.genBuffers(1, &m_bufferID);
2167 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2168 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2169 
2170 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2171 	{
2172 		deUint32 buffer = 0;
2173 
2174 		gl.genBuffers(1, &buffer);
2175 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2176 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2177 
2178 		m_intermediateResultBuffers.push_back(buffer);
2179 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2180 	}
2181 
2182 	// gen program
2183 
2184 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2185 	m_testCtx.getLog() << *m_program;
2186 	if (!m_program->isOk())
2187 		throw tcu::TestError("could not build program");
2188 }
2189 
deinit(void)2190 void SSBOConcurrentAtomicCase::deinit (void)
2191 {
2192 	if (m_bufferID)
2193 	{
2194 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2195 		m_bufferID = 0;
2196 	}
2197 
2198 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2199 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2200 	m_intermediateResultBuffers.clear();
2201 
2202 	delete m_program;
2203 	m_program = DE_NULL;
2204 }
2205 
iterate(void)2206 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2207 {
2208 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2209 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2210 	std::vector<int>		deltas;
2211 
2212 	// generate unique deltas
2213 	generateShuffledRamp(m_numCalls, deltas);
2214 
2215 	// invoke program N times, each with a different delta
2216 	{
2217 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2218 
2219 		m_testCtx.getLog()
2220 			<< tcu::TestLog::Message
2221 			<< "Running shader " << m_numCalls << " times.\n"
2222 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2223 			<< "Setting u_atomicDelta to a unique value for each call.\n"
2224 			<< tcu::TestLog::EndMessage;
2225 
2226 		if (deltaLocation == -1)
2227 			throw tcu::TestError("u_atomicDelta location was -1");
2228 
2229 		gl.useProgram(m_program->getProgram());
2230 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2231 
2232 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2233 		{
2234 			m_testCtx.getLog()
2235 				<< tcu::TestLog::Message
2236 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2237 				<< tcu::TestLog::EndMessage;
2238 
2239 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2240 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2241 			gl.dispatchCompute(m_workSize, 1, 1);
2242 		}
2243 
2244 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2245 	}
2246 
2247 	// Verify result
2248 	{
2249 		std::vector<deUint32> result;
2250 
2251 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2252 
2253 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2254 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2255 
2256 		for (int ndx = 0; ndx < m_workSize; ++ndx)
2257 		{
2258 			if (result[ndx] != sumValue)
2259 			{
2260 				m_testCtx.getLog()
2261 					<< tcu::TestLog::Message
2262 					<< "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2263 					<< "Work buffer contains invalid values."
2264 					<< tcu::TestLog::EndMessage;
2265 
2266 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2267 				return STOP;
2268 			}
2269 		}
2270 
2271 		m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2272 	}
2273 
2274 	// verify steps
2275 	{
2276 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2277 		std::vector<deUint32>				valueChain			(m_numCalls);
2278 
2279 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2280 
2281 		// collect results
2282 
2283 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2284 		{
2285 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2286 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2287 		}
2288 
2289 		// verify values
2290 
2291 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2292 		{
2293 			int			invalidOperationNdx;
2294 			deUint32	errorDelta;
2295 			deUint32	errorExpected;
2296 
2297 			// collect result chain for each element
2298 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2299 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2300 
2301 			// check there exists a path from 0 to sumValue using each addition once
2302 			// decompose cumulative results to addition operations (all additions positive => this works)
2303 
2304 			std::sort(valueChain.begin(), valueChain.end());
2305 
2306 			// validate chain
2307 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2308 			{
2309 				m_testCtx.getLog()
2310 					<< tcu::TestLog::Message
2311 					<< "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2312 					<< "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2313 					<< tcu::TestLog::EndMessage;
2314 
2315 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2316 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2317 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2318 
2319 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2320 				return STOP;
2321 			}
2322 		}
2323 
2324 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2325 	}
2326 
2327 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2328 	return STOP;
2329 }
2330 
genComputeSource(void) const2331 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2332 {
2333 	std::ostringstream buf;
2334 
2335 	buf	<< "#version 310 es\n"
2336 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2337 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2338 		<< "{\n"
2339 		<< "	highp uint values[" << m_workSize << "];\n"
2340 		<< "} sb_ires;\n"
2341 		<< "\n"
2342 		<< "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2343 		<< "{\n"
2344 		<< "	highp uint values[" << m_workSize << "];\n"
2345 		<< "} sb_work;\n"
2346 		<< "uniform highp uint u_atomicDelta;\n"
2347 		<< "\n"
2348 		<< "void main ()\n"
2349 		<< "{\n"
2350 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2351 		<< "	sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2352 		<< "}";
2353 
2354 	return buf.str();
2355 }
2356 
2357 class ConcurrentAtomicCounterCase : public TestCase
2358 {
2359 public:
2360 
2361 							ConcurrentAtomicCounterCase		(Context& context, const char* name, const char* description, int numCalls, int workSize);
2362 							~ConcurrentAtomicCounterCase	(void);
2363 
2364 	void					init							(void);
2365 	void					deinit							(void);
2366 	IterateResult			iterate							(void);
2367 
2368 private:
2369 	std::string				genComputeSource				(bool evenOdd) const;
2370 
2371 	const int				m_numCalls;
2372 	const int				m_workSize;
2373 	glu::ShaderProgram*		m_evenProgram;
2374 	glu::ShaderProgram*		m_oddProgram;
2375 	deUint32				m_counterBuffer;
2376 	deUint32				m_intermediateResultBuffer;
2377 };
2378 
ConcurrentAtomicCounterCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2379 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2380 	: TestCase					(context, name, description)
2381 	, m_numCalls				(numCalls)
2382 	, m_workSize				(workSize)
2383 	, m_evenProgram				(DE_NULL)
2384 	, m_oddProgram				(DE_NULL)
2385 	, m_counterBuffer			(DE_NULL)
2386 	, m_intermediateResultBuffer(DE_NULL)
2387 {
2388 }
2389 
~ConcurrentAtomicCounterCase(void)2390 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2391 {
2392 	deinit();
2393 }
2394 
init(void)2395 void ConcurrentAtomicCounterCase::init (void)
2396 {
2397 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
2398 	const std::vector<deUint32>	zeroData	(m_numCalls * m_workSize, 0);
2399 
2400 	// gen buffer
2401 
2402 	gl.genBuffers(1, &m_counterBuffer);
2403 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2404 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2405 
2406 	gl.genBuffers(1, &m_intermediateResultBuffer);
2407 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2408 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2409 
2410 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2411 
2412 	// gen programs
2413 
2414 	{
2415 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2416 
2417 		m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2418 		m_testCtx.getLog() << *m_evenProgram;
2419 		if (!m_evenProgram->isOk())
2420 			throw tcu::TestError("could not build program");
2421 	}
2422 	{
2423 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2424 
2425 		m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2426 		m_testCtx.getLog() << *m_oddProgram;
2427 		if (!m_oddProgram->isOk())
2428 			throw tcu::TestError("could not build program");
2429 	}
2430 }
2431 
deinit(void)2432 void ConcurrentAtomicCounterCase::deinit (void)
2433 {
2434 	if (m_counterBuffer)
2435 	{
2436 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2437 		m_counterBuffer = 0;
2438 	}
2439 	if (m_intermediateResultBuffer)
2440 	{
2441 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2442 		m_intermediateResultBuffer = 0;
2443 	}
2444 
2445 	delete m_evenProgram;
2446 	m_evenProgram = DE_NULL;
2447 
2448 	delete m_oddProgram;
2449 	m_oddProgram = DE_NULL;
2450 }
2451 
iterate(void)2452 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2453 {
2454 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2455 
2456 	// invoke program N times, each with a different delta
2457 	{
2458 		const int evenCallNdxLocation	= gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2459 		const int oddCallNdxLocation	= gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2460 
2461 		m_testCtx.getLog()
2462 			<< tcu::TestLog::Message
2463 			<< "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2464 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2465 			<< tcu::TestLog::EndMessage;
2466 
2467 		if (evenCallNdxLocation == -1)
2468 			throw tcu::TestError("u_callNdx location was -1");
2469 		if (oddCallNdxLocation == -1)
2470 			throw tcu::TestError("u_callNdx location was -1");
2471 
2472 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2473 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
2474 
2475 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2476 		{
2477 			gl.useProgram(m_evenProgram->getProgram());
2478 			gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2479 			gl.dispatchCompute(m_workSize, 1, 1);
2480 
2481 			gl.useProgram(m_oddProgram->getProgram());
2482 			gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2483 			gl.dispatchCompute(m_workSize, 1, 1);
2484 		}
2485 
2486 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2487 	}
2488 
2489 	// Verify result
2490 	{
2491 		deUint32 result;
2492 
2493 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2494 
2495 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2496 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2497 
2498 		if ((int)result != m_numCalls*m_workSize)
2499 		{
2500 			m_testCtx.getLog()
2501 				<< tcu::TestLog::Message
2502 				<< "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2503 				<< tcu::TestLog::EndMessage;
2504 
2505 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2506 			return STOP;
2507 		}
2508 
2509 		m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2510 	}
2511 
2512 	// verify steps
2513 	{
2514 		std::vector<deUint32> intermediateResults;
2515 
2516 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2517 
2518 		// collect results
2519 
2520 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2521 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2522 
2523 		// verify values
2524 
2525 		std::sort(intermediateResults.begin(), intermediateResults.end());
2526 
2527 		for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2528 		{
2529 			if ((int)intermediateResults[valueNdx] != valueNdx)
2530 			{
2531 				m_testCtx.getLog()
2532 					<< tcu::TestLog::Message
2533 					<< "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2534 					<< "Intermediate buffer contains invalid values. Intermediate results:\n"
2535 					<< tcu::TestLog::EndMessage;
2536 
2537 				for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2538 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2539 
2540 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2541 				return STOP;
2542 			}
2543 		}
2544 
2545 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2546 	}
2547 
2548 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2549 	return STOP;
2550 }
2551 
genComputeSource(bool evenOdd) const2552 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2553 {
2554 	std::ostringstream buf;
2555 
2556 	buf	<< "#version 310 es\n"
2557 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2558 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2559 		<< "{\n"
2560 		<< "	highp uint values[" << m_workSize * m_numCalls << "];\n"
2561 		<< "} sb_ires;\n"
2562 		<< "\n"
2563 		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
2564 		<< "uniform highp uint u_callNdx;\n"
2565 		<< "\n"
2566 		<< "void main ()\n"
2567 		<< "{\n"
2568 		<< "	highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2569 		<< "	if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2570 		<< "		sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2571 		<< "}";
2572 
2573 	return buf.str();
2574 }
2575 
2576 class ConcurrentImageAtomicCase : public TestCase
2577 {
2578 public:
2579 
2580 							ConcurrentImageAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2581 							~ConcurrentImageAtomicCase	(void);
2582 
2583 	void					init						(void);
2584 	void					deinit						(void);
2585 	IterateResult			iterate						(void);
2586 
2587 private:
2588 	void					readWorkImage				(std::vector<deUint32>& result);
2589 
2590 	std::string				genComputeSource			(void) const;
2591 	std::string				genImageReadSource			(void) const;
2592 	std::string				genImageClearSource			(void) const;
2593 
2594 	const int				m_numCalls;
2595 	const int				m_workSize;
2596 	glu::ShaderProgram*		m_program;
2597 	glu::ShaderProgram*		m_imageReadProgram;
2598 	glu::ShaderProgram*		m_imageClearProgram;
2599 	deUint32				m_imageID;
2600 	std::vector<deUint32>	m_intermediateResultBuffers;
2601 };
2602 
ConcurrentImageAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2603 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2604 	: TestCase				(context, name, description)
2605 	, m_numCalls			(numCalls)
2606 	, m_workSize			(workSize)
2607 	, m_program				(DE_NULL)
2608 	, m_imageReadProgram	(DE_NULL)
2609 	, m_imageClearProgram	(DE_NULL)
2610 	, m_imageID				(DE_NULL)
2611 {
2612 }
2613 
~ConcurrentImageAtomicCase(void)2614 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2615 {
2616 	deinit();
2617 }
2618 
init(void)2619 void ConcurrentImageAtomicCase::init (void)
2620 {
2621 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2622 	std::vector<deUint32>	zeroData			(m_workSize * m_workSize, 0);
2623 
2624 	if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
2625 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2626 
2627 	// gen image
2628 
2629 	gl.genTextures(1, &m_imageID);
2630 	gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2631 	gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2632 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2633 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2634 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2635 
2636 	// gen buffers
2637 
2638 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2639 	{
2640 		deUint32 buffer = 0;
2641 
2642 		gl.genBuffers(1, &buffer);
2643 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2644 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2645 
2646 		m_intermediateResultBuffers.push_back(buffer);
2647 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2648 	}
2649 
2650 	// gen programs
2651 
2652 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2653 	m_testCtx.getLog() << *m_program;
2654 	if (!m_program->isOk())
2655 		throw tcu::TestError("could not build program");
2656 
2657 	m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2658 	if (!m_imageReadProgram->isOk())
2659 	{
2660 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2661 
2662 		m_testCtx.getLog() << *m_imageReadProgram;
2663 		throw tcu::TestError("could not build program");
2664 	}
2665 
2666 	m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2667 	if (!m_imageClearProgram->isOk())
2668 	{
2669 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2670 
2671 		m_testCtx.getLog() << *m_imageClearProgram;
2672 		throw tcu::TestError("could not build program");
2673 	}
2674 }
2675 
deinit(void)2676 void ConcurrentImageAtomicCase::deinit (void)
2677 {
2678 	if (m_imageID)
2679 	{
2680 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2681 		m_imageID = 0;
2682 	}
2683 
2684 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2685 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2686 	m_intermediateResultBuffers.clear();
2687 
2688 	delete m_program;
2689 	m_program = DE_NULL;
2690 
2691 	delete m_imageReadProgram;
2692 	m_imageReadProgram = DE_NULL;
2693 
2694 	delete m_imageClearProgram;
2695 	m_imageClearProgram = DE_NULL;
2696 }
2697 
iterate(void)2698 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2699 {
2700 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2701 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2702 	std::vector<int>		deltas;
2703 
2704 	// generate unique deltas
2705 	generateShuffledRamp(m_numCalls, deltas);
2706 
2707 	// clear image
2708 	{
2709 		m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2710 
2711 		gl.useProgram(m_imageClearProgram->getProgram());
2712 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2713 		gl.dispatchCompute(m_workSize, m_workSize, 1);
2714 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2715 
2716 		GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2717 	}
2718 
2719 	// invoke program N times, each with a different delta
2720 	{
2721 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2722 
2723 		m_testCtx.getLog()
2724 			<< tcu::TestLog::Message
2725 			<< "Running shader " << m_numCalls << " times.\n"
2726 			<< "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2727 			<< "Setting u_atomicDelta to a unique value for each call.\n"
2728 			<< tcu::TestLog::EndMessage;
2729 
2730 		if (deltaLocation == -1)
2731 			throw tcu::TestError("u_atomicDelta location was -1");
2732 
2733 		gl.useProgram(m_program->getProgram());
2734 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2735 
2736 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2737 		{
2738 			m_testCtx.getLog()
2739 				<< tcu::TestLog::Message
2740 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2741 				<< tcu::TestLog::EndMessage;
2742 
2743 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2744 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2745 			gl.dispatchCompute(m_workSize, m_workSize, 1);
2746 		}
2747 
2748 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2749 	}
2750 
2751 	// Verify result
2752 	{
2753 		std::vector<deUint32> result;
2754 
2755 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2756 
2757 		readWorkImage(result);
2758 
2759 		for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2760 		{
2761 			if (result[ndx] != sumValue)
2762 			{
2763 				m_testCtx.getLog()
2764 					<< tcu::TestLog::Message
2765 					<< "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2766 					<< "Work image contains invalid values."
2767 					<< tcu::TestLog::EndMessage;
2768 
2769 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2770 				return STOP;
2771 			}
2772 		}
2773 
2774 		m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2775 	}
2776 
2777 	// verify steps
2778 	{
2779 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2780 		std::vector<deUint32>				valueChain			(m_numCalls);
2781 		std::vector<deUint32>				chainDelta			(m_numCalls);
2782 
2783 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2784 
2785 		// collect results
2786 
2787 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2788 		{
2789 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2790 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2791 		}
2792 
2793 		// verify values
2794 
2795 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2796 		{
2797 			int			invalidOperationNdx;
2798 			deUint32	errorDelta;
2799 			deUint32	errorExpected;
2800 
2801 			// collect result chain for each element
2802 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2803 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2804 
2805 			// check there exists a path from 0 to sumValue using each addition once
2806 			// decompose cumulative results to addition operations (all additions positive => this works)
2807 
2808 			std::sort(valueChain.begin(), valueChain.end());
2809 
2810 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811 				chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2812 
2813 			// chainDelta contains now the actual additions applied to the value
2814 			std::sort(chainDelta.begin(), chainDelta.end());
2815 
2816 			// validate chain
2817 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2818 			{
2819 				m_testCtx.getLog()
2820 					<< tcu::TestLog::Message
2821 					<< "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2822 					<< invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2823 					<< "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2824 					<< tcu::TestLog::EndMessage;
2825 
2826 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2827 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2828 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2829 
2830 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2831 				return STOP;
2832 			}
2833 		}
2834 
2835 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2836 	}
2837 
2838 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2839 	return STOP;
2840 }
2841 
readWorkImage(std::vector<deUint32> & result)2842 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2843 {
2844 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2845 	glu::Buffer				resultBuffer	(m_context.getRenderContext());
2846 
2847 	// Read image to an ssbo
2848 
2849 	{
2850 		const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2851 
2852 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2853 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2854 
2855 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2856 		gl.useProgram(m_imageReadProgram->getProgram());
2857 
2858 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2859 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2860 		gl.dispatchCompute(m_workSize, m_workSize, 1);
2861 
2862 		GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2863 	}
2864 
2865 	// Read ssbo
2866 	{
2867 		const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2868 		GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2869 
2870 		if (!ptr)
2871 			throw tcu::TestError("mapBufferRange returned NULL");
2872 
2873 		result.resize(m_workSize * m_workSize);
2874 		memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2875 
2876 		if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2877 			throw tcu::TestError("unmapBuffer returned false");
2878 	}
2879 }
2880 
genComputeSource(void) const2881 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2882 {
2883 	std::ostringstream buf;
2884 
2885 	buf	<< "#version 310 es\n"
2886 		<< "#extension GL_OES_shader_image_atomic : require\n"
2887 		<< "\n"
2888 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2889 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2890 		<< "{\n"
2891 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2892 		<< "} sb_ires;\n"
2893 		<< "\n"
2894 		<< "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2895 		<< "uniform highp uint u_atomicDelta;\n"
2896 		<< "\n"
2897 		<< "void main ()\n"
2898 		<< "{\n"
2899 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2900 		<< "	sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2901 		<< "}";
2902 
2903 	return buf.str();
2904 }
2905 
genImageReadSource(void) const2906 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2907 {
2908 	std::ostringstream buf;
2909 
2910 	buf	<< "#version 310 es\n"
2911 		<< "\n"
2912 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2913 		<< "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2914 		<< "{\n"
2915 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2916 		<< "} sb_res;\n"
2917 		<< "\n"
2918 		<< "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2919 		<< "\n"
2920 		<< "void main ()\n"
2921 		<< "{\n"
2922 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923 		<< "	sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2924 		<< "}";
2925 
2926 	return buf.str();
2927 }
2928 
genImageClearSource(void) const2929 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2930 {
2931 	std::ostringstream buf;
2932 
2933 	buf	<< "#version 310 es\n"
2934 		<< "\n"
2935 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936 		<< "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2937 		<< "\n"
2938 		<< "void main ()\n"
2939 		<< "{\n"
2940 		<< "	imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2941 		<< "}";
2942 
2943 	return buf.str();
2944 }
2945 
2946 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2947 {
2948 public:
2949 							ConcurrentSSBOAtomicCounterMixedCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2950 							~ConcurrentSSBOAtomicCounterMixedCase	(void);
2951 
2952 	void					init									(void);
2953 	void					deinit									(void);
2954 	IterateResult			iterate									(void);
2955 
2956 private:
2957 	std::string				genSSBOComputeSource					(void) const;
2958 	std::string				genAtomicCounterComputeSource			(void) const;
2959 
2960 	const int				m_numCalls;
2961 	const int				m_workSize;
2962 	deUint32				m_bufferID;
2963 	glu::ShaderProgram*		m_ssboAtomicProgram;
2964 	glu::ShaderProgram*		m_atomicCounterProgram;
2965 };
2966 
ConcurrentSSBOAtomicCounterMixedCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2967 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2968 	: TestCase					(context, name, description)
2969 	, m_numCalls				(numCalls)
2970 	, m_workSize				(workSize)
2971 	, m_bufferID				(DE_NULL)
2972 	, m_ssboAtomicProgram		(DE_NULL)
2973 	, m_atomicCounterProgram	(DE_NULL)
2974 {
2975 	// SSBO atomic XORs cancel out
2976 	DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
2977 }
2978 
~ConcurrentSSBOAtomicCounterMixedCase(void)2979 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
2980 {
2981 	deinit();
2982 }
2983 
init(void)2984 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
2985 {
2986 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
2987 	const deUint32				zeroBuf[2]	= { 0, 0 };
2988 
2989 	// gen buffer
2990 
2991 	gl.genBuffers(1, &m_bufferID);
2992 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2993 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
2994 
2995 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2996 
2997 	// gen programs
2998 
2999 	{
3000 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3001 
3002 		m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3003 		m_testCtx.getLog() << *m_ssboAtomicProgram;
3004 		if (!m_ssboAtomicProgram->isOk())
3005 			throw tcu::TestError("could not build program");
3006 	}
3007 	{
3008 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3009 
3010 		m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3011 		m_testCtx.getLog() << *m_atomicCounterProgram;
3012 		if (!m_atomicCounterProgram->isOk())
3013 			throw tcu::TestError("could not build program");
3014 	}
3015 }
3016 
deinit(void)3017 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3018 {
3019 	if (m_bufferID)
3020 	{
3021 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3022 		m_bufferID = 0;
3023 	}
3024 
3025 	delete m_ssboAtomicProgram;
3026 	m_ssboAtomicProgram = DE_NULL;
3027 
3028 	delete m_atomicCounterProgram;
3029 	m_atomicCounterProgram = DE_NULL;
3030 }
3031 
iterate(void)3032 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3033 {
3034 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3035 
3036 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3037 
3038 	// invoke programs N times
3039 	{
3040 		m_testCtx.getLog()
3041 			<< tcu::TestLog::Message
3042 			<< "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3043 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
3044 			<< tcu::TestLog::EndMessage;
3045 
3046 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3047 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
3048 
3049 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3050 		{
3051 			gl.useProgram(m_atomicCounterProgram->getProgram());
3052 			gl.dispatchCompute(m_workSize, 1, 1);
3053 
3054 			gl.useProgram(m_ssboAtomicProgram->getProgram());
3055 			gl.dispatchCompute(m_workSize, 1, 1);
3056 		}
3057 
3058 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3059 	}
3060 
3061 	// Verify result
3062 	{
3063 		deUint32 result;
3064 
3065 		// XORs cancel out, only addition is left
3066 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3067 
3068 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3069 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3070 
3071 		if ((int)result != m_numCalls*m_workSize)
3072 		{
3073 			m_testCtx.getLog()
3074 				<< tcu::TestLog::Message
3075 				<< "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3076 				<< tcu::TestLog::EndMessage;
3077 
3078 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3079 			return STOP;
3080 		}
3081 
3082 		m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3083 	}
3084 
3085 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3086 	return STOP;
3087 }
3088 
genSSBOComputeSource(void) const3089 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3090 {
3091 	std::ostringstream buf;
3092 
3093 	buf	<< "#version 310 es\n"
3094 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3095 		<< "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3096 		<< "{\n"
3097 		<< "	highp uint targetValue;\n"
3098 		<< "	highp uint dummy;\n"
3099 		<< "} sb_work;\n"
3100 		<< "\n"
3101 		<< "void main ()\n"
3102 		<< "{\n"
3103 		<< "	// flip high bits\n"
3104 		<< "	highp uint mask = uint(1) << (16u + (gl_GlobalInvocationID.x % 16u));\n"
3105 		<< "	sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3106 		<< "}";
3107 
3108 	return buf.str();
3109 }
3110 
genAtomicCounterComputeSource(void) const3111 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3112 {
3113 	std::ostringstream buf;
3114 
3115 	buf	<< "#version 310 es\n"
3116 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3117 		<< "\n"
3118 		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
3119 		<< "\n"
3120 		<< "void main ()\n"
3121 		<< "{\n"
3122 		<< "	atomicCounterIncrement(u_counter);\n"
3123 		<< "}";
3124 
3125 	return buf.str();
3126 }
3127 
3128 } // anonymous
3129 
SynchronizationTests(Context & context)3130 SynchronizationTests::SynchronizationTests (Context& context)
3131 	: TestCaseGroup(context, "synchronization", "Synchronization tests")
3132 {
3133 }
3134 
~SynchronizationTests(void)3135 SynchronizationTests::~SynchronizationTests (void)
3136 {
3137 }
3138 
init(void)3139 void SynchronizationTests::init (void)
3140 {
3141 	tcu::TestCaseGroup* const inInvocationGroup		= new tcu::TestCaseGroup(m_testCtx, "in_invocation",	"Test intra-invocation synchronization");
3142 	tcu::TestCaseGroup* const interInvocationGroup	= new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3143 	tcu::TestCaseGroup* const interCallGroup		= new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3144 
3145 	addChild(inInvocationGroup);
3146 	addChild(interInvocationGroup);
3147 	addChild(interCallGroup);
3148 
3149 	// .in_invocation & .inter_invocation
3150 	{
3151 		static const struct CaseConfig
3152 		{
3153 			const char*									namePrefix;
3154 			const InterInvocationTestCase::StorageType	storage;
3155 			const int									flags;
3156 		} configs[] =
3157 		{
3158 			{ "image",			InterInvocationTestCase::STORAGE_IMAGE,		0										},
3159 			{ "image_atomic",	InterInvocationTestCase::STORAGE_IMAGE,		InterInvocationTestCase::FLAG_ATOMIC	},
3160 			{ "ssbo",			InterInvocationTestCase::STORAGE_BUFFER,	0										},
3161 			{ "ssbo_atomic",	InterInvocationTestCase::STORAGE_BUFFER,	InterInvocationTestCase::FLAG_ATOMIC	},
3162 		};
3163 
3164 		for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3165 		{
3166 			tcu::TestCaseGroup* const	targetGroup	= (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3167 			const int					extraFlags	= (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3168 
3169 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3170 			{
3171 				const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3172 
3173 				targetGroup->addChild(new InvocationWriteReadCase(m_context,
3174 																  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3175 																  (std::string("Write to ") + target + " and read it").c_str(),
3176 																  configs[configNdx].storage,
3177 																  configs[configNdx].flags | extraFlags));
3178 
3179 				targetGroup->addChild(new InvocationReadWriteCase(m_context,
3180 																  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3181 																  (std::string("Read form ") + target + " and then write to it").c_str(),
3182 																  configs[configNdx].storage,
3183 																  configs[configNdx].flags | extraFlags));
3184 
3185 				targetGroup->addChild(new InvocationOverWriteCase(m_context,
3186 																  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3187 																  (std::string("Write to ") + target + " twice and read it").c_str(),
3188 																  configs[configNdx].storage,
3189 																  configs[configNdx].flags | extraFlags));
3190 
3191 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3192 																   (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3193 																   (std::string("Write to aliasing ") + target + " and read it").c_str(),
3194 																   InvocationAliasWriteCase::TYPE_WRITE,
3195 																   configs[configNdx].storage,
3196 																   configs[configNdx].flags | extraFlags));
3197 
3198 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3199 																   (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3200 																   (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3201 																   InvocationAliasWriteCase::TYPE_OVERWRITE,
3202 																   configs[configNdx].storage,
3203 																   configs[configNdx].flags | extraFlags));
3204 			}
3205 		}
3206 	}
3207 
3208 	// .inter_call
3209 	{
3210 		tcu::TestCaseGroup* const withBarrierGroup		= new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3211 		tcu::TestCaseGroup* const withoutBarrierGroup	= new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3212 
3213 		interCallGroup->addChild(withBarrierGroup);
3214 		interCallGroup->addChild(withoutBarrierGroup);
3215 
3216 		// .with_memory_barrier
3217 		{
3218 			static const struct CaseConfig
3219 			{
3220 				const char*								namePrefix;
3221 				const InterCallTestCase::StorageType	storage;
3222 				const int								flags;
3223 			} configs[] =
3224 			{
3225 				{ "image",			InterCallTestCase::STORAGE_IMAGE,	0																		},
3226 				{ "image_atomic",	InterCallTestCase::STORAGE_IMAGE,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3227 				{ "ssbo",			InterCallTestCase::STORAGE_BUFFER,	0																		},
3228 				{ "ssbo_atomic",	InterCallTestCase::STORAGE_BUFFER,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3229 			};
3230 
3231 			const int seed0 = 123;
3232 			const int seed1 = 457;
3233 
3234 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3235 			{
3236 				const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3237 
3238 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3239 																 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3240 																 (std::string("Write to ") + target + " and read it").c_str(),
3241 																 configs[configNdx].storage,
3242 																 configs[configNdx].flags,
3243 																 InterCallOperations()
3244 																	<< op::WriteData::Generate(1, seed0)
3245 																	<< op::Barrier()
3246 																	<< op::ReadData::Generate(1, seed0)));
3247 
3248 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3249 																 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3250 																 (std::string("Read from ") + target + " and then write to it").c_str(),
3251 																 configs[configNdx].storage,
3252 																 configs[configNdx].flags,
3253 																 InterCallOperations()
3254 																	<< op::ReadZeroData::Generate(1)
3255 																	<< op::Barrier()
3256 																	<< op::WriteData::Generate(1, seed0)));
3257 
3258 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3259 																 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3260 																 (std::string("Write to ") + target + " twice and read it").c_str(),
3261 																 configs[configNdx].storage,
3262 																 configs[configNdx].flags,
3263 																 InterCallOperations()
3264 																	<< op::WriteData::Generate(1, seed0)
3265 																	<< op::Barrier()
3266 																	<< op::WriteData::Generate(1, seed1)
3267 																	<< op::Barrier()
3268 																	<< op::ReadData::Generate(1, seed1)));
3269 
3270 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3271 																 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3272 																 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3273 																 configs[configNdx].storage,
3274 																 configs[configNdx].flags,
3275 																 InterCallOperations()
3276 																	<< op::WriteData::Generate(1, seed0)
3277 																	<< op::WriteData::Generate(2, seed1)
3278 																	<< op::Barrier()
3279 																	<< op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3280 
3281 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282 																 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3283 																 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3284 																 configs[configNdx].storage,
3285 																 configs[configNdx].flags,
3286 																 InterCallOperations()
3287 																	<< op::WriteDataInterleaved::Generate(1, seed0, true)
3288 																	<< op::WriteDataInterleaved::Generate(1, seed1, false)
3289 																	<< op::Barrier()
3290 																	<< op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3291 
3292 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3293 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3294 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3295 																 configs[configNdx].storage,
3296 																 configs[configNdx].flags,
3297 																 InterCallOperations()
3298 																	<< op::WriteData::Generate(1, seed0)
3299 																	<< op::WriteData::Generate(2, seed1)
3300 																	<< op::Barrier()
3301 																	<< op::ReadData::Generate(1, seed0)
3302 																	<< op::ReadData::Generate(2, seed1)));
3303 
3304 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3306 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3307 																 configs[configNdx].storage,
3308 																 configs[configNdx].flags,
3309 																 InterCallOperations()
3310 																	<< op::WriteData::Generate(1, seed0)
3311 																	<< op::WriteData::Generate(2, seed1)
3312 																	<< op::Barrier()
3313 																	<< op::ReadData::Generate(2, seed1)
3314 																	<< op::ReadData::Generate(1, seed0)));
3315 			}
3316 
3317 			// .without_memory_barrier
3318 			{
3319 				struct InvocationConfig
3320 				{
3321 					const char*	name;
3322 					int			count;
3323 				};
3324 
3325 				static const InvocationConfig ssboInvocations[] =
3326 				{
3327 					{ "1k",		1024	},
3328 					{ "4k",		4096	},
3329 					{ "32k",	32768	},
3330 				};
3331 				static const InvocationConfig imageInvocations[] =
3332 				{
3333 					{ "8x8",		8	},
3334 					{ "32x32",		32	},
3335 					{ "128x128",	128	},
3336 				};
3337 				static const InvocationConfig counterInvocations[] =
3338 				{
3339 					{ "32",		32		},
3340 					{ "128",	128		},
3341 					{ "1k",		1024	},
3342 				};
3343 				static const int callCounts[] = { 2, 5, 100 };
3344 
3345 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3346 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3347 						withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3348 
3349 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3350 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3351 						withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3352 
3353 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3354 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3355 						withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3356 
3357 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3358 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3359 						withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3360 			}
3361 		}
3362 	}
3363 }
3364 
3365 } // Functional
3366 } // gles31
3367 } // deqp
3368