1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Basic Compute Shader Tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fBasicComputeShaderTests.hpp"
25 #include "gluShaderProgram.hpp"
26 #include "gluObjectWrapper.hpp"
27 #include "gluRenderContext.hpp"
28 #include "gluProgramInterfaceQuery.hpp"
29 #include "gluContextInfo.hpp"
30 #include "glwFunctions.hpp"
31 #include "glwEnums.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deRandom.hpp"
34 #include "deStringUtil.hpp"
35 #include "deMemory.h"
36 
37 namespace deqp
38 {
39 namespace gles31
40 {
41 namespace Functional
42 {
43 
44 using std::string;
45 using std::vector;
46 using tcu::TestLog;
47 using namespace glu;
48 
49 //! Utility for mapping buffers.
50 class BufferMemMap
51 {
52 public:
BufferMemMap(const glw::Functions & gl,deUint32 target,int offset,int size,deUint32 access)53 	BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access)
54 		: m_gl		(gl)
55 		, m_target	(target)
56 		, m_ptr		(DE_NULL)
57 	{
58 		m_ptr = gl.mapBufferRange(target, offset, size, access);
59 		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
60 		TCU_CHECK(m_ptr);
61 	}
62 
~BufferMemMap(void)63 	~BufferMemMap (void)
64 	{
65 		m_gl.unmapBuffer(m_target);
66 	}
67 
getPtr(void) const68 	void*	getPtr		(void) const { return m_ptr; }
operator *(void) const69 	void*	operator*	(void) const { return m_ptr; }
70 
71 private:
72 							BufferMemMap			(const BufferMemMap& other);
73 	BufferMemMap&			operator=				(const BufferMemMap& other);
74 
75 	const glw::Functions&	m_gl;
76 	const deUint32			m_target;
77 	void*					m_ptr;
78 };
79 
80 namespace
81 {
82 
83 class EmptyComputeShaderCase : public TestCase
84 {
85 public:
EmptyComputeShaderCase(Context & context)86 	EmptyComputeShaderCase (Context& context)
87 		: TestCase(context, "empty", "Empty shader")
88 	{
89 	}
90 
iterate(void)91 	IterateResult iterate (void)
92 	{
93 		const ShaderProgram program(m_context.getRenderContext(),
94 			ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE,
95 				"#version 310 es\n"
96 				"layout (local_size_x = 1) in;\n"
97 				"void main (void) {}\n"
98 				));
99 
100 		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
101 
102 		m_testCtx.getLog() << program;
103 		if (!program.isOk())
104 			TCU_FAIL("Compile failed");
105 
106 		gl.useProgram(program.getProgram());
107 		gl.dispatchCompute(1, 1, 1);
108 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
109 
110 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
111 		return STOP;
112 	}
113 };
114 
115 class UBOToSSBOInvertCase : public TestCase
116 {
117 public:
UBOToSSBOInvertCase(Context & context,const char * name,const char * description,int numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)118 	UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
119 		: TestCase		(context, name, description)
120 		, m_numValues	(numValues)
121 		, m_localSize	(localSize)
122 		, m_workSize	(workSize)
123 	{
124 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
125 	}
126 
iterate(void)127 	IterateResult iterate (void)
128 	{
129 		std::ostringstream src;
130 		src << "#version 310 es\n"
131 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
132 			<< "uniform Input {\n"
133 			<< "    uint values[" << m_numValues << "];\n"
134 			<< "} ub_in;\n"
135 			<< "layout(binding = 1) buffer Output {\n"
136 			<< "    uint values[" << m_numValues << "];\n"
137 			<< "} sb_out;\n"
138 			<< "void main (void) {\n"
139 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
140 			<< "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
141 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
142 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
143 			<< "\n"
144 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
145 			<< "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
146 			<< "}\n";
147 
148 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
149 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
150 		const Buffer				inputBuffer		(m_context.getRenderContext());
151 		const Buffer				outputBuffer	(m_context.getRenderContext());
152 		std::vector<deUint32>		inputValues		(m_numValues);
153 
154 		// Compute input values.
155 		{
156 			de::Random rnd(0x111223f);
157 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
158 				inputValues[ndx] = rnd.getUint32();
159 		}
160 
161 		m_testCtx.getLog() << program;
162 		if (!program.isOk())
163 			TCU_FAIL("Compile failed");
164 
165 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
166 
167 		gl.useProgram(program.getProgram());
168 
169 		// Input buffer setup
170 		{
171 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input");
172 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex);
173 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values");
174 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex);
175 
176 			gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer);
177 			gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
178 
179 			{
180 				const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
181 
182 				for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++)
183 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
184 			}
185 
186 			gl.uniformBlockBinding(program.getProgram(), blockIndex, 0);
187 			gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer);
188 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
189 		}
190 
191 		// Output buffer setup
192 		{
193 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
194 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
195 
196 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
197 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
198 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer);
199 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
200 		}
201 
202 		// Dispatch compute workload
203 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
204 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
205 
206 		// Read back and compare
207 		{
208 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
209 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
210 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
211 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
212 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
213 
214 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
215 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
216 			{
217 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
218 				const deUint32	ref		= ~inputValues[ndx];
219 
220 				if (res != ref)
221 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
222 			}
223 		}
224 
225 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
226 		return STOP;
227 	}
228 
229 private:
230 	const int			m_numValues;
231 	const tcu::IVec3	m_localSize;
232 	const tcu::IVec3	m_workSize;
233 };
234 
235 class CopyInvertSSBOCase : public TestCase
236 {
237 public:
CopyInvertSSBOCase(Context & context,const char * name,const char * description,int numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)238 	CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
239 		: TestCase		(context, name, description)
240 		, m_numValues	(numValues)
241 		, m_localSize	(localSize)
242 		, m_workSize	(workSize)
243 	{
244 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
245 	}
246 
iterate(void)247 	IterateResult iterate (void)
248 	{
249 		std::ostringstream src;
250 		src << "#version 310 es\n"
251 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
252 			<< "layout(binding = 0) buffer Input {\n"
253 			<< "    uint values[" << m_numValues << "];\n"
254 			<< "} sb_in;\n"
255 			<< "layout (binding = 1) buffer Output {\n"
256 			<< "    uint values[" << m_numValues << "];\n"
257 			<< "} sb_out;\n"
258 			<< "void main (void) {\n"
259 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
260 			<< "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
261 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
262 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
263 			<< "\n"
264 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
265 			<< "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
266 			<< "}\n";
267 
268 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
269 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
270 		const Buffer				inputBuffer		(m_context.getRenderContext());
271 		const Buffer				outputBuffer	(m_context.getRenderContext());
272 		std::vector<deUint32>		inputValues		(m_numValues);
273 
274 		// Compute input values.
275 		{
276 			de::Random rnd(0x124fef);
277 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
278 				inputValues[ndx] = rnd.getUint32();
279 		}
280 
281 		m_testCtx.getLog() << program;
282 		if (!program.isOk())
283 			TCU_FAIL("Compile failed");
284 
285 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
286 
287 		gl.useProgram(program.getProgram());
288 
289 		// Input buffer setup
290 		{
291 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
292 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
293 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
294 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
295 
296 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
297 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
298 
299 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
300 
301 			{
302 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
303 
304 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
305 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
306 			}
307 
308 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
309 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
310 		}
311 
312 		// Output buffer setup
313 		{
314 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
315 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
316 
317 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
318 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ);
319 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer);
320 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
321 		}
322 
323 		// Dispatch compute workload
324 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
325 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
326 
327 		// Read back and compare
328 		{
329 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
330 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
331 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
332 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
333 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
334 
335 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
336 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
337 			{
338 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
339 				const deUint32	ref		= ~inputValues[ndx];
340 
341 				if (res != ref)
342 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
343 			}
344 		}
345 
346 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
347 		return STOP;
348 	}
349 
350 private:
351 	const int			m_numValues;
352 	const tcu::IVec3	m_localSize;
353 	const tcu::IVec3	m_workSize;
354 };
355 
356 class InvertSSBOInPlaceCase : public TestCase
357 {
358 public:
InvertSSBOInPlaceCase(Context & context,const char * name,const char * description,int numValues,bool isSized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)359 	InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
360 		: TestCase		(context, name, description)
361 		, m_numValues	(numValues)
362 		, m_isSized		(isSized)
363 		, m_localSize	(localSize)
364 		, m_workSize	(workSize)
365 	{
366 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
367 	}
368 
iterate(void)369 	IterateResult iterate (void)
370 	{
371 		std::ostringstream src;
372 		src << "#version 310 es\n"
373 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
374 			<< "layout(binding = 0) buffer InOut {\n"
375 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
376 			<< "} sb_inout;\n"
377 			<< "void main (void) {\n"
378 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
379 			<< "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
380 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
381 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
382 			<< "\n"
383 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
384 			<< "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
385 			<< "}\n";
386 
387 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
388 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
389 
390 		m_testCtx.getLog() << program;
391 		if (!program.isOk())
392 			TCU_FAIL("Compile failed");
393 
394 		const Buffer				outputBuffer	(m_context.getRenderContext());
395 		const deUint32				valueIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values");
396 		const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
397 		const deUint32				blockSize		= valueInfo.arrayStride*(deUint32)m_numValues;
398 		std::vector<deUint32>		inputValues		(m_numValues);
399 
400 		// Compute input values.
401 		{
402 			de::Random rnd(0x82ce7f);
403 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
404 				inputValues[ndx] = rnd.getUint32();
405 		}
406 
407 		TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
408 
409 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
410 
411 		gl.useProgram(program.getProgram());
412 
413 		// Output buffer setup
414 		{
415 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
416 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW);
417 
418 			{
419 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT);
420 
421 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
422 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
423 			}
424 
425 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
426 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
427 		}
428 
429 		// Dispatch compute workload
430 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
431 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
432 
433 		// Read back and compare
434 		{
435 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
436 
437 			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
438 			{
439 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
440 				const deUint32	ref		= ~inputValues[ndx];
441 
442 				if (res != ref)
443 					throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]");
444 			}
445 		}
446 
447 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
448 		return STOP;
449 	}
450 
451 private:
452 	const int			m_numValues;
453 	const bool			m_isSized;
454 	const tcu::IVec3	m_localSize;
455 	const tcu::IVec3	m_workSize;
456 };
457 
458 class WriteToMultipleSSBOCase : public TestCase
459 {
460 public:
WriteToMultipleSSBOCase(Context & context,const char * name,const char * description,int numValues,bool isSized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)461 	WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
462 		: TestCase		(context, name, description)
463 		, m_numValues	(numValues)
464 		, m_isSized		(isSized)
465 		, m_localSize	(localSize)
466 		, m_workSize	(workSize)
467 	{
468 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
469 	}
470 
iterate(void)471 	IterateResult iterate (void)
472 	{
473 		std::ostringstream src;
474 		src << "#version 310 es\n"
475 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
476 			<< "layout(binding = 0) buffer Out0 {\n"
477 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
478 			<< "} sb_out0;\n"
479 			<< "layout(binding = 1) buffer Out1 {\n"
480 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
481 			<< "} sb_out1;\n"
482 			<< "void main (void) {\n"
483 			<< "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
484 			<< "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
485 			<< "\n"
486 			<< "    {\n"
487 			<< "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
488 			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
489 			<< "\n"
490 			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
491 			<< "            sb_out0.values[offset + ndx] = offset + ndx;\n"
492 			<< "    }\n"
493 			<< "    {\n"
494 			<< "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
495 			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
496 			<< "\n"
497 			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
498 			<< "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
499 			<< "    }\n"
500 			<< "}\n";
501 
502 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
503 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
504 
505 		m_testCtx.getLog() << program;
506 		if (!program.isOk())
507 			TCU_FAIL("Compile failed");
508 
509 		const Buffer				outputBuffer0	(m_context.getRenderContext());
510 		const deUint32				value0Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values");
511 		const InterfaceVariableInfo	value0Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index);
512 		const deUint32				block0Size		= value0Info.arrayStride*(deUint32)m_numValues;
513 
514 		const Buffer				outputBuffer1	(m_context.getRenderContext());
515 		const deUint32				value1Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values");
516 		const InterfaceVariableInfo	value1Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index);
517 		const deUint32				block1Size		= value1Info.arrayStride*(deUint32)m_numValues;
518 
519 		TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
520 		TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
521 
522 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
523 
524 		gl.useProgram(program.getProgram());
525 
526 		// Output buffer setup
527 		{
528 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
529 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW);
530 
531 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0);
532 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
533 		}
534 		{
535 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
536 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW);
537 
538 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1);
539 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
540 		}
541 
542 		// Dispatch compute workload
543 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
544 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
545 
546 		// Read back and compare
547 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
548 		{
549 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT);
550 
551 			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
552 			{
553 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx));
554 				const deUint32	ref		= ndx;
555 
556 				if (res != ref)
557 					throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
558 			}
559 		}
560 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
561 		{
562 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT);
563 
564 			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
565 			{
566 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx));
567 				const deUint32	ref		= m_numValues - ndx;
568 
569 				if (res != ref)
570 					throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
571 			}
572 		}
573 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
574 		return STOP;
575 	}
576 
577 private:
578 	const int			m_numValues;
579 	const bool			m_isSized;
580 	const tcu::IVec3	m_localSize;
581 	const tcu::IVec3	m_workSize;
582 };
583 
584 class SSBOLocalBarrierCase : public TestCase
585 {
586 public:
SSBOLocalBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)587 	SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
588 		: TestCase		(context, name, description)
589 		, m_localSize	(localSize)
590 		, m_workSize	(workSize)
591 	{
592 	}
593 
iterate(void)594 	IterateResult iterate (void)
595 	{
596 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
597 		const Buffer				outputBuffer	(m_context.getRenderContext());
598 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
599 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
600 		const int					numValues		= workGroupSize*workGroupCount;
601 
602 		std::ostringstream src;
603 		src << "#version 310 es\n"
604 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
605 			<< "layout(binding = 0) buffer Output {\n"
606 			<< "    coherent uint values[" << numValues << "];\n"
607 			<< "} sb_out;\n\n"
608 			<< "shared uint offsets[" << workGroupSize << "];\n\n"
609 			<< "void main (void) {\n"
610 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
611 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
612 			<< "    uint globalOffs = localSize*globalNdx;\n"
613 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
614 			<< "\n"
615 			<< "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
616 			<< "    memoryBarrierBuffer();\n"
617 			<< "    barrier();\n"
618 			<< "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"
619 			<< "    memoryBarrierBuffer();\n"
620 			<< "    barrier();\n"
621 			<< "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
622 			<< "}\n";
623 
624 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
625 
626 		m_testCtx.getLog() << program;
627 		if (!program.isOk())
628 			TCU_FAIL("Compile failed");
629 
630 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
631 
632 		gl.useProgram(program.getProgram());
633 
634 		// Output buffer setup
635 		{
636 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
637 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
638 
639 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
640 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
641 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
642 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
643 		}
644 
645 		// Dispatch compute workload
646 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
647 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
648 
649 		// Read back and compare
650 		{
651 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
652 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
653 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
654 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
655 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
656 
657 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
658 			{
659 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
660 				{
661 					const int		globalOffs	= groupNdx*workGroupSize;
662 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
663 					const int		offs0		= localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize);
664 					const int		offs1		= localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize);
665 					const deUint32	ref			= (deUint32)(globalOffs + offs0 + offs1);
666 
667 					if (res != ref)
668 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
669 				}
670 			}
671 		}
672 
673 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
674 		return STOP;
675 	}
676 
677 private:
678 	const tcu::IVec3	m_localSize;
679 	const tcu::IVec3	m_workSize;
680 };
681 
682 class SSBOBarrierCase : public TestCase
683 {
684 public:
SSBOBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec3 & workSize)685 	SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize)
686 		: TestCase		(context, name, description)
687 		, m_workSize	(workSize)
688 	{
689 	}
690 
iterate(void)691 	IterateResult iterate (void)
692 	{
693 		const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() <<
694 			ComputeSource("#version 310 es\n"
695 						  "layout (local_size_x = 1) in;\n"
696 						  "uniform uint u_baseVal;\n"
697 						  "layout(binding = 1) buffer Output {\n"
698 						  "    uint values[];\n"
699 						  "};\n"
700 						  "void main (void) {\n"
701 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
702 						  "    values[offset] = u_baseVal+offset;\n"
703 						  "}\n"));
704 		const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() <<
705 			ComputeSource("#version 310 es\n"
706 						  "layout (local_size_x = 1) in;\n"
707 						  "uniform uint u_baseVal;\n"
708 						  "layout(binding = 1) buffer Input {\n"
709 						  "    uint values[];\n"
710 						  "};\n"
711 						  "layout(binding = 0) buffer Output {\n"
712 						  "    coherent uint sum;\n"
713 						  "};\n"
714 						  "void main (void) {\n"
715 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
716 						  "    uint value  = values[offset];\n"
717 						  "    atomicAdd(sum, value);\n"
718 						  "}\n"));
719 
720 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
721 		const Buffer				tempBuffer		(m_context.getRenderContext());
722 		const Buffer				outputBuffer	(m_context.getRenderContext());
723 		const deUint32				baseValue		= 127;
724 
725 		m_testCtx.getLog() << program0 << program1;
726 		if (!program0.isOk() || !program1.isOk())
727 			TCU_FAIL("Compile failed");
728 
729 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
730 
731 		// Temp buffer setup
732 		{
733 			const deUint32				valueIndex		= gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]");
734 			const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
735 			const deUint32				bufferSize		= valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2];
736 
737 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer);
738 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW);
739 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer);
740 			GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed");
741 		}
742 
743 		// Output buffer setup
744 		{
745 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
746 			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
747 
748 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
749 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
750 
751 			{
752 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
753 				deMemset(bufMap.getPtr(), 0, blockSize);
754 			}
755 
756 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
757 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
758 		}
759 
760 		// Dispatch compute workload
761 		gl.useProgram(program0.getProgram());
762 		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
763 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
764 		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
765 		gl.useProgram(program1.getProgram());
766 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
767 		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
768 
769 		// Read back and compare
770 		{
771 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
772 			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
773 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
774 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
775 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
776 
777 			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
778 			deUint32					ref			= 0;
779 
780 			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++)
781 				ref += baseValue + (deUint32)ndx;
782 
783 			if (res != ref)
784 			{
785 				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
786 				throw tcu::TestError("Comparison failed");
787 			}
788 		}
789 
790 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
791 		return STOP;
792 	}
793 
794 private:
795 	const tcu::IVec3	m_workSize;
796 };
797 
798 class BasicSharedVarCase : public TestCase
799 {
800 public:
BasicSharedVarCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)801 	BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
802 		: TestCase		(context, name, description)
803 		, m_localSize	(localSize)
804 		, m_workSize	(workSize)
805 	{
806 	}
807 
iterate(void)808 	IterateResult iterate (void)
809 	{
810 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
811 		const Buffer				outputBuffer	(m_context.getRenderContext());
812 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
813 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
814 		const int					numValues		= workGroupSize*workGroupCount;
815 
816 		std::ostringstream src;
817 		src << "#version 310 es\n"
818 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
819 			<< "layout(binding = 0) buffer Output {\n"
820 			<< "    uint values[" << numValues << "];\n"
821 			<< "} sb_out;\n\n"
822 			<< "shared uint offsets[" << workGroupSize << "];\n\n"
823 			<< "void main (void) {\n"
824 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
825 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
826 			<< "    uint globalOffs = localSize*globalNdx;\n"
827 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
828 			<< "\n"
829 			<< "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
830 			<< "    barrier();\n"
831 			<< "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
832 			<< "}\n";
833 
834 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
835 
836 		m_testCtx.getLog() << program;
837 		if (!program.isOk())
838 			TCU_FAIL("Compile failed");
839 
840 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
841 
842 		gl.useProgram(program.getProgram());
843 
844 		// Output buffer setup
845 		{
846 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
847 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
848 
849 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
850 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
851 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
852 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
853 		}
854 
855 		// Dispatch compute workload
856 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
857 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
858 
859 		// Read back and compare
860 		{
861 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
862 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
863 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
864 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
865 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
866 
867 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
868 			{
869 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
870 				{
871 					const int		globalOffs	= groupNdx*workGroupSize;
872 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
873 					const deUint32	ref			= (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1));
874 
875 					if (res != ref)
876 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
877 				}
878 			}
879 		}
880 
881 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
882 		return STOP;
883 	}
884 
885 private:
886 	const tcu::IVec3	m_localSize;
887 	const tcu::IVec3	m_workSize;
888 };
889 
890 class SharedVarAtomicOpCase : public TestCase
891 {
892 public:
SharedVarAtomicOpCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)893 	SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
894 		: TestCase		(context, name, description)
895 		, m_localSize	(localSize)
896 		, m_workSize	(workSize)
897 	{
898 	}
899 
iterate(void)900 	IterateResult iterate (void)
901 	{
902 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
903 		const Buffer				outputBuffer	(m_context.getRenderContext());
904 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
905 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
906 		const int					numValues		= workGroupSize*workGroupCount;
907 
908 		std::ostringstream src;
909 		src << "#version 310 es\n"
910 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
911 			<< "layout(binding = 0) buffer Output {\n"
912 			<< "    uint values[" << numValues << "];\n"
913 			<< "} sb_out;\n\n"
914 			<< "shared uint count;\n\n"
915 			<< "void main (void) {\n"
916 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
917 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
918 			<< "    uint globalOffs = localSize*globalNdx;\n"
919 			<< "\n"
920 			<< "    count = 0u;\n"
921 			<< "    barrier();\n"
922 			<< "    uint oldVal = atomicAdd(count, 1u);\n"
923 			<< "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
924 			<< "}\n";
925 
926 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
927 
928 		m_testCtx.getLog() << program;
929 		if (!program.isOk())
930 			TCU_FAIL("Compile failed");
931 
932 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
933 
934 		gl.useProgram(program.getProgram());
935 
936 		// Output buffer setup
937 		{
938 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
939 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
940 
941 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
942 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
943 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
944 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
945 		}
946 
947 		// Dispatch compute workload
948 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
949 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
950 
951 		// Read back and compare
952 		{
953 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
954 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
955 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
956 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
957 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
958 
959 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
960 			{
961 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
962 				{
963 					const int		globalOffs	= groupNdx*workGroupSize;
964 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
965 					const deUint32	ref			= (deUint32)(localOffs+1);
966 
967 					if (res != ref)
968 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
969 				}
970 			}
971 		}
972 
973 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
974 		return STOP;
975 	}
976 
977 private:
978 	const tcu::IVec3	m_localSize;
979 	const tcu::IVec3	m_workSize;
980 };
981 
982 class CopyImageToSSBOCase : public TestCase
983 {
984 public:
CopyImageToSSBOCase(Context & context,const char * name,const char * description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)985 	CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
986 		: TestCase		(context, name, description)
987 		, m_localSize	(localSize)
988 		, m_imageSize	(imageSize)
989 	{
990 		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
991 		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
992 	}
993 
iterate(void)994 	IterateResult iterate (void)
995 	{
996 
997 		std::ostringstream src;
998 		src << "#version 310 es\n"
999 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1000 			<< "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n"
1001 			<< "layout(binding = 0) buffer Output {\n"
1002 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1003 			<< "} sb_out;\n\n"
1004 			<< "void main (void) {\n"
1005 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1006 			<< "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
1007 			<< "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
1008 			<< "}\n";
1009 
1010 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1011 		const Buffer				outputBuffer	(m_context.getRenderContext());
1012 		const Texture				inputTexture	(m_context.getRenderContext());
1013 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1014 		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
1015 		de::Random					rnd				(0xab2c7);
1016 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
1017 
1018 		m_testCtx.getLog() << program;
1019 		if (!program.isOk())
1020 			TCU_FAIL("Compile failed");
1021 
1022 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1023 
1024 		gl.useProgram(program.getProgram());
1025 
1026 		// Input values
1027 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1028 			*i = rnd.getUint32();
1029 
1030 		// Input image setup
1031 		gl.bindTexture(GL_TEXTURE_2D, *inputTexture);
1032 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1033 		gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]);
1034 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1035 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1036 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1037 
1038 		// Bind to unit 1
1039 		gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
1040 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1041 
1042 		// Output buffer setup
1043 		{
1044 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1045 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1046 
1047 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1048 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1049 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1050 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1051 		}
1052 
1053 		// Dispatch compute workload
1054 		gl.dispatchCompute(workSize[0], workSize[1], 1);
1055 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1056 
1057 		// Read back and compare
1058 		{
1059 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1060 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1061 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1062 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1063 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1064 
1065 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1066 
1067 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
1068 			{
1069 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
1070 				const deUint32	ref		= inputValues[ndx];
1071 
1072 				if (res != ref)
1073 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
1074 			}
1075 		}
1076 
1077 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1078 		return STOP;
1079 	}
1080 
1081 private:
1082 	const tcu::IVec2	m_localSize;
1083 	const tcu::IVec2	m_imageSize;
1084 };
1085 
1086 class CopySSBOToImageCase : public TestCase
1087 {
1088 public:
CopySSBOToImageCase(Context & context,const char * name,const char * description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)1089 	CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
1090 		: TestCase		(context, name, description)
1091 		, m_localSize	(localSize)
1092 		, m_imageSize	(imageSize)
1093 	{
1094 		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1095 		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1096 	}
1097 
iterate(void)1098 	IterateResult iterate (void)
1099 	{
1100 
1101 		std::ostringstream src;
1102 		src << "#version 310 es\n"
1103 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1104 			<< "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n"
1105 			<< "buffer Input {\n"
1106 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1107 			<< "} sb_in;\n\n"
1108 			<< "void main (void) {\n"
1109 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1110 			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1111 			<< "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
1112 			<< "}\n";
1113 
1114 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1115 		const Buffer				inputBuffer		(m_context.getRenderContext());
1116 		const Texture				outputTexture	(m_context.getRenderContext());
1117 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1118 		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
1119 		de::Random					rnd				(0x77238ac2);
1120 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
1121 
1122 		m_testCtx.getLog() << program;
1123 		if (!program.isOk())
1124 			TCU_FAIL("Compile failed");
1125 
1126 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1127 
1128 		gl.useProgram(program.getProgram());
1129 
1130 		// Input values
1131 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1132 			*i = rnd.getUint32();
1133 
1134 		// Input buffer setup
1135 		{
1136 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1137 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1138 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1139 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1140 
1141 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1142 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1143 
1144 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1145 
1146 			{
1147 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1148 
1149 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1150 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1151 			}
1152 
1153 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1154 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1155 		}
1156 
1157 		// Output image setup
1158 		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1159 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1160 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1161 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1162 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1163 
1164 		// Bind to unit 1
1165 		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
1166 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1167 
1168 		// Dispatch compute workload
1169 		gl.dispatchCompute(workSize[0], workSize[1], 1);
1170 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1171 
1172 		// Read back and compare
1173 		{
1174 			Framebuffer			fbo			(m_context.getRenderContext());
1175 			vector<deUint32>	pixels		(inputValues.size()*4);
1176 
1177 			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1178 			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1179 			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1180 
1181 			// \note In ES3 we have to use GL_RGBA_INTEGER
1182 			gl.readBuffer(GL_COLOR_ATTACHMENT0);
1183 			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1184 			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1185 
1186 			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1187 			{
1188 				const deUint32	res		= pixels[ndx*4];
1189 				const deUint32	ref		= inputValues[ndx];
1190 
1191 				if (res != ref)
1192 					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx));
1193 			}
1194 		}
1195 
1196 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1197 		return STOP;
1198 	}
1199 
1200 private:
1201 	const tcu::IVec2	m_localSize;
1202 	const tcu::IVec2	m_imageSize;
1203 };
1204 
1205 class ImageAtomicOpCase : public TestCase
1206 {
1207 public:
ImageAtomicOpCase(Context & context,const char * name,const char * description,int localSize,const tcu::IVec2 & imageSize)1208 	ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize)
1209 		: TestCase		(context, name, description)
1210 		, m_localSize	(localSize)
1211 		, m_imageSize	(imageSize)
1212 	{
1213 	}
1214 
init(void)1215 	void init (void)
1216 	{
1217 		if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1218 			throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension");
1219 	}
1220 
iterate(void)1221 	IterateResult iterate (void)
1222 	{
1223 
1224 		std::ostringstream src;
1225 		src << "#version 310 es\n"
1226 			<< "#extension GL_OES_shader_image_atomic : require\n"
1227 			<< "layout (local_size_x = " << m_localSize << ") in;\n"
1228 			<< "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n"
1229 			<< "buffer Input {\n"
1230 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n"
1231 			<< "} sb_in;\n\n"
1232 			<< "void main (void) {\n"
1233 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1234 			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1235 			<< "\n"
1236 			<< "    if (gl_LocalInvocationIndex == 0u)\n"
1237 			<< "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1238 			<< "    barrier();\n"
1239 			<< "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1240 			<< "}\n";
1241 
1242 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1243 		const Buffer				inputBuffer		(m_context.getRenderContext());
1244 		const Texture				outputTexture	(m_context.getRenderContext());
1245 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1246 		de::Random					rnd				(0x77238ac2);
1247 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]*m_localSize);
1248 
1249 		m_testCtx.getLog() << program;
1250 		if (!program.isOk())
1251 			TCU_FAIL("Compile failed");
1252 
1253 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage;
1254 
1255 		gl.useProgram(program.getProgram());
1256 
1257 		// Input values
1258 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1259 			*i = rnd.getUint32();
1260 
1261 		// Input buffer setup
1262 		{
1263 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1264 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1265 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1266 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1267 
1268 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1269 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1270 
1271 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1272 
1273 			{
1274 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1275 
1276 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1277 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1278 			}
1279 
1280 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1281 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1282 		}
1283 
1284 		// Output image setup
1285 		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1286 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1287 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1288 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1289 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1290 
1291 		// Bind to unit 1
1292 		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1293 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1294 
1295 		// Dispatch compute workload
1296 		gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1);
1297 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1298 
1299 		// Read back and compare
1300 		{
1301 			Framebuffer			fbo			(m_context.getRenderContext());
1302 			vector<deUint32>	pixels		(m_imageSize[0]*m_imageSize[1]*4);
1303 
1304 			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1305 			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1306 			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1307 
1308 			// \note In ES3 we have to use GL_RGBA_INTEGER
1309 			gl.readBuffer(GL_COLOR_ATTACHMENT0);
1310 			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1311 			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1312 
1313 			for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++)
1314 			{
1315 				const deUint32	res		= pixels[pixelNdx*4];
1316 				deUint32		ref		= 0;
1317 
1318 				for (int offs = 0; offs < m_localSize; offs++)
1319 					ref += inputValues[pixelNdx*m_localSize + offs];
1320 
1321 				if (res != ref)
1322 					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx));
1323 			}
1324 		}
1325 
1326 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1327 		return STOP;
1328 	}
1329 
1330 private:
1331 	const int			m_localSize;
1332 	const tcu::IVec2	m_imageSize;
1333 };
1334 
1335 class ImageBarrierCase : public TestCase
1336 {
1337 public:
ImageBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec2 & workSize)1338 	ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize)
1339 		: TestCase		(context, name, description)
1340 		, m_workSize	(workSize)
1341 	{
1342 	}
1343 
iterate(void)1344 	IterateResult iterate (void)
1345 	{
1346 		const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() <<
1347 			ComputeSource("#version 310 es\n"
1348 						  "layout (local_size_x = 1) in;\n"
1349 						  "uniform uint u_baseVal;\n"
1350 						  "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n"
1351 						  "void main (void) {\n"
1352 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1353 						  "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n"
1354 						  "}\n"));
1355 		const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() <<
1356 			ComputeSource("#version 310 es\n"
1357 						  "layout (local_size_x = 1) in;\n"
1358 						  "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n"
1359 						  "layout(binding = 0) buffer Output {\n"
1360 						  "    coherent uint sum;\n"
1361 						  "};\n"
1362 						  "void main (void) {\n"
1363 						  "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
1364 						  "    atomicAdd(sum, value);\n"
1365 						  "}\n"));
1366 
1367 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1368 		const Texture				tempTexture		(m_context.getRenderContext());
1369 		const Buffer				outputBuffer	(m_context.getRenderContext());
1370 		const deUint32				baseValue		= 127;
1371 
1372 		m_testCtx.getLog() << program0 << program1;
1373 		if (!program0.isOk() || !program1.isOk())
1374 			TCU_FAIL("Compile failed");
1375 
1376 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1377 
1378 		// Temp texture setup
1379 		gl.bindTexture(GL_TEXTURE_2D, *tempTexture);
1380 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]);
1381 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1382 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1383 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1384 
1385 		// Bind to unit 2
1386 		gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1387 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1388 
1389 		// Output buffer setup
1390 		{
1391 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1392 			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1393 
1394 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1395 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1396 
1397 			{
1398 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
1399 				deMemset(bufMap.getPtr(), 0, blockSize);
1400 			}
1401 
1402 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1403 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1404 		}
1405 
1406 		// Dispatch compute workload
1407 		gl.useProgram(program0.getProgram());
1408 		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
1409 		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1410 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1411 		gl.useProgram(program1.getProgram());
1412 		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1413 		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
1414 
1415 		// Read back and compare
1416 		{
1417 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1418 			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1419 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
1420 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1421 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1422 
1423 			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
1424 			deUint32					ref			= 0;
1425 
1426 			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++)
1427 				ref += baseValue + (deUint32)ndx;
1428 
1429 			if (res != ref)
1430 			{
1431 				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
1432 				throw tcu::TestError("Comparison failed");
1433 			}
1434 		}
1435 
1436 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1437 		return STOP;
1438 	}
1439 
1440 private:
1441 	const tcu::IVec2	m_workSize;
1442 };
1443 
1444 class AtomicCounterCase : public TestCase
1445 {
1446 public:
AtomicCounterCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1447 	AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
1448 		: TestCase		(context, name, description)
1449 		, m_localSize	(localSize)
1450 		, m_workSize	(workSize)
1451 	{
1452 	}
1453 
iterate(void)1454 	IterateResult iterate (void)
1455 	{
1456 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1457 		const Buffer				outputBuffer	(m_context.getRenderContext());
1458 		const Buffer				counterBuffer	(m_context.getRenderContext());
1459 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
1460 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
1461 		const int					numValues		= workGroupSize*workGroupCount;
1462 
1463 		std::ostringstream src;
1464 		src << "#version 310 es\n"
1465 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
1466 			<< "layout(binding = 0) buffer Output {\n"
1467 			<< "    uint values[" << numValues << "];\n"
1468 			<< "} sb_out;\n\n"
1469 			<< "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n"
1470 			<< "void main (void) {\n"
1471 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
1472 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1473 			<< "    uint globalOffs = localSize*globalNdx;\n"
1474 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
1475 			<< "\n"
1476 			<< "    uint oldVal = atomicCounterIncrement(u_count);\n"
1477 			<< "    sb_out.values[globalOffs+localOffs] = oldVal;\n"
1478 			<< "}\n";
1479 
1480 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
1481 
1482 		m_testCtx.getLog() << program;
1483 		if (!program.isOk())
1484 			TCU_FAIL("Compile failed");
1485 
1486 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1487 
1488 		gl.useProgram(program.getProgram());
1489 
1490 		// Atomic counter buffer setup
1491 		{
1492 			const deUint32	uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1493 			const deUint32	bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1494 			const deUint32	bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1495 
1496 			gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer);
1497 			gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ);
1498 
1499 			{
1500 				const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT);
1501 				deMemset(memMap.getPtr(), 0, (int)bufferSize);
1502 			}
1503 
1504 			gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer);
1505 			GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed");
1506 		}
1507 
1508 		// Output buffer setup
1509 		{
1510 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1511 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1512 
1513 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1514 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1515 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1516 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1517 		}
1518 
1519 		// Dispatch compute workload
1520 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
1521 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1522 
1523 		// Read back and compare atomic counter
1524 		{
1525 			const deUint32		uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1526 			const deUint32		uniformOffset	= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET);
1527 			const deUint32		bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1528 			const deUint32		bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1529 			const BufferMemMap	bufMap			(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT);
1530 
1531 			const deUint32		resVal			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset));
1532 
1533 			if (resVal != (deUint32)numValues)
1534 				throw tcu::TestError("Invalid atomic counter value");
1535 		}
1536 
1537 		// Read back and compare SSBO
1538 		{
1539 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1540 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1541 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1542 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1543 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1544 			deUint32					valSum		= 0;
1545 			deUint32					refSum		= 0;
1546 
1547 			for (int valNdx = 0; valNdx < numValues; valNdx++)
1548 			{
1549 				const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx));
1550 
1551 				valSum += res;
1552 				refSum += (deUint32)valNdx;
1553 
1554 				if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues))
1555 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]");
1556 			}
1557 
1558 			if (valSum != refSum)
1559 				throw tcu::TestError("Total sum of values in Output.values doesn't match");
1560 		}
1561 
1562 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1563 		return STOP;
1564 	}
1565 
1566 private:
1567 	const tcu::IVec3	m_localSize;
1568 	const tcu::IVec3	m_workSize;
1569 };
1570 
1571 } // anonymous
1572 
BasicComputeShaderTests(Context & context)1573 BasicComputeShaderTests::BasicComputeShaderTests (Context& context)
1574 	: TestCaseGroup(context, "basic", "Basic Compute Shader Tests")
1575 {
1576 }
1577 
~BasicComputeShaderTests(void)1578 BasicComputeShaderTests::~BasicComputeShaderTests (void)
1579 {
1580 }
1581 
init(void)1582 void BasicComputeShaderTests::init (void)
1583 {
1584 	addChild(new EmptyComputeShaderCase(m_context));
1585 
1586 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_invocation",			"Copy from UBO to SSBO, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1587 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_group",					"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(2,1,4),	tcu::IVec3(1,1,1)));
1588 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_invocations",			"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
1589 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_groups",				"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1590 
1591 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_single_invocation",				"Copy between SSBOs, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1592 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_invocations",			"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
1593 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_groups",				"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1594 
1595 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_single_invocation",				"Read and write same SSBO",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1596 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_multiple_groups",					"Read and write same SSBO",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1597 
1598 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_single_invocation",		"Read and write same SSBO",				256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1599 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_multiple_groups",			"Read and write same SSBO",				1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1600 
1601 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation",		"Write to multiple SSBOs",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1602 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups",		"Write to multiple SSBOs",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1603 
1604 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation",	"Write to multiple SSBOs",			256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1605 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups",	"Write to multiple SSBOs",			1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1606 
1607 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_invocation",		"SSBO local barrier usage",				tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1608 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_group",			"SSBO local barrier usage",				tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1609 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_multiple_groups",		"SSBO local barrier usage",				tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1610 
1611 	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_single",					"SSBO memory barrier usage",			tcu::IVec3(1,1,1)));
1612 	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_multiple",				"SSBO memory barrier usage",			tcu::IVec3(11,5,7)));
1613 
1614 	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_invocation",				"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1615 	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_group",					"Basic shared variable usage",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1616 	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_invocations",			"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1617 	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_groups",				"Basic shared variable usage",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1618 
1619 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_invocation",		"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1620 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_group",			"Atomic operation with shared var",		tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1621 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_invocations",	"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1622 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_groups",			"Atomic operation with shared var",		tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1623 
1624 	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_small",					"Image to SSBO copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
1625 	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_large",					"Image to SSBO copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
1626 
1627 	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_small",					"SSBO to image copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
1628 	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_large",					"SSBO to image copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
1629 
1630 	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_1",				"Atomic operation with image",			1,	tcu::IVec2(64,64)));
1631 	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_8",				"Atomic operation with image",			8,	tcu::IVec2(64,64)));
1632 
1633 	addChild(new ImageBarrierCase		(m_context, "image_barrier_single",						"Image barrier",						tcu::IVec2(1,1)));
1634 	addChild(new ImageBarrierCase		(m_context, "image_barrier_multiple",					"Image barrier",						tcu::IVec2(64,64)));
1635 
1636 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_invocation",			"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1637 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_group",				"Basic atomic counter test",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1638 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_invocations",		"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1639 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_groups",			"Basic atomic counter test",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1640 }
1641 
1642 } // Functional
1643 } // gles31
1644 } // deqp
1645