1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Basic Compute Shader Tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fBasicComputeShaderTests.hpp"
25 #include "gluShaderProgram.hpp"
26 #include "gluObjectWrapper.hpp"
27 #include "gluRenderContext.hpp"
28 #include "gluProgramInterfaceQuery.hpp"
29 #include "gluContextInfo.hpp"
30 #include "glwFunctions.hpp"
31 #include "glwEnums.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deRandom.hpp"
34 #include "deStringUtil.hpp"
35 #include "deMemory.h"
36 
37 namespace deqp
38 {
39 namespace gles31
40 {
41 namespace Functional
42 {
43 
44 using std::string;
45 using std::vector;
46 using tcu::TestLog;
47 using namespace glu;
48 
49 //! Utility for mapping buffers.
50 class BufferMemMap
51 {
52 public:
BufferMemMap(const glw::Functions & gl,deUint32 target,int offset,int size,deUint32 access)53 	BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access)
54 		: m_gl		(gl)
55 		, m_target	(target)
56 		, m_ptr		(DE_NULL)
57 	{
58 		m_ptr = gl.mapBufferRange(target, offset, size, access);
59 		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
60 		TCU_CHECK(m_ptr);
61 	}
62 
~BufferMemMap(void)63 	~BufferMemMap (void)
64 	{
65 		m_gl.unmapBuffer(m_target);
66 	}
67 
getPtr(void) const68 	void*	getPtr		(void) const { return m_ptr; }
operator *(void) const69 	void*	operator*	(void) const { return m_ptr; }
70 
71 private:
72 							BufferMemMap			(const BufferMemMap& other);
73 	BufferMemMap&			operator=				(const BufferMemMap& other);
74 
75 	const glw::Functions&	m_gl;
76 	const deUint32			m_target;
77 	void*					m_ptr;
78 };
79 
80 namespace
81 {
82 
83 class EmptyComputeShaderCase : public TestCase
84 {
85 public:
EmptyComputeShaderCase(Context & context)86 	EmptyComputeShaderCase (Context& context)
87 		: TestCase(context, "empty", "Empty shader")
88 	{
89 	}
90 
iterate(void)91 	IterateResult iterate (void)
92 	{
93 		const GLSLVersion	glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
94 		std::ostringstream	src;
95 
96 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
97 			<< "layout (local_size_x = 1) in;\n"
98 			   "void main (void) {}\n";
99 
100 		const ShaderProgram program(m_context.getRenderContext(),
101 			ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
102 
103 		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
104 
105 		m_testCtx.getLog() << program;
106 		if (!program.isOk())
107 			TCU_FAIL("Compile failed");
108 
109 		gl.useProgram(program.getProgram());
110 		gl.dispatchCompute(1, 1, 1);
111 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
112 
113 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
114 		return STOP;
115 	}
116 };
117 
118 class UBOToSSBOInvertCase : public TestCase
119 {
120 public:
UBOToSSBOInvertCase(Context & context,const char * name,const char * description,int numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)121 	UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
122 		: TestCase		(context, name, description)
123 		, m_numValues	(numValues)
124 		, m_localSize	(localSize)
125 		, m_workSize	(workSize)
126 	{
127 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
128 	}
129 
iterate(void)130 	IterateResult iterate (void)
131 	{
132 		const GLSLVersion	glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
133 		std::ostringstream	src;
134 
135 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
136 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
137 			<< "uniform Input {\n"
138 			<< "    uint values[" << m_numValues << "];\n"
139 			<< "} ub_in;\n"
140 			<< "layout(binding = 1) buffer Output {\n"
141 			<< "    uint values[" << m_numValues << "];\n"
142 			<< "} sb_out;\n"
143 			<< "void main (void) {\n"
144 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
145 			<< "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
146 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
147 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
148 			<< "\n"
149 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
150 			<< "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
151 			<< "}\n";
152 
153 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
154 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
155 		const Buffer				inputBuffer		(m_context.getRenderContext());
156 		const Buffer				outputBuffer	(m_context.getRenderContext());
157 		std::vector<deUint32>		inputValues		(m_numValues);
158 
159 		// Compute input values.
160 		{
161 			de::Random rnd(0x111223f);
162 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
163 				inputValues[ndx] = rnd.getUint32();
164 		}
165 
166 		m_testCtx.getLog() << program;
167 		if (!program.isOk())
168 			TCU_FAIL("Compile failed");
169 
170 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
171 
172 		gl.useProgram(program.getProgram());
173 
174 		// Input buffer setup
175 		{
176 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input");
177 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex);
178 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values");
179 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex);
180 
181 			gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer);
182 			gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
183 
184 			{
185 				const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
186 
187 				for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++)
188 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
189 			}
190 
191 			gl.uniformBlockBinding(program.getProgram(), blockIndex, 0);
192 			gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer);
193 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
194 		}
195 
196 		// Output buffer setup
197 		{
198 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
199 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
200 
201 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
202 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
203 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer);
204 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
205 		}
206 
207 		// Dispatch compute workload
208 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
209 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
210 
211 		// Read back and compare
212 		{
213 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
214 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
215 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
216 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
217 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
218 
219 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
220 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
221 			{
222 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
223 				const deUint32	ref		= ~inputValues[ndx];
224 
225 				if (res != ref)
226 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
227 			}
228 		}
229 
230 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
231 		return STOP;
232 	}
233 
234 private:
235 	const int			m_numValues;
236 	const tcu::IVec3	m_localSize;
237 	const tcu::IVec3	m_workSize;
238 };
239 
240 class CopyInvertSSBOCase : public TestCase
241 {
242 public:
CopyInvertSSBOCase(Context & context,const char * name,const char * description,int numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)243 	CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
244 		: TestCase		(context, name, description)
245 		, m_numValues	(numValues)
246 		, m_localSize	(localSize)
247 		, m_workSize	(workSize)
248 	{
249 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
250 	}
251 
iterate(void)252 	IterateResult iterate (void)
253 	{
254 		const GLSLVersion	glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
255 		std::ostringstream	src;
256 
257 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
258 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
259 			<< "layout(binding = 0) buffer Input {\n"
260 			<< "    uint values[" << m_numValues << "];\n"
261 			<< "} sb_in;\n"
262 			<< "layout (binding = 1) buffer Output {\n"
263 			<< "    uint values[" << m_numValues << "];\n"
264 			<< "} sb_out;\n"
265 			<< "void main (void) {\n"
266 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
267 			<< "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
268 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
269 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
270 			<< "\n"
271 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
272 			<< "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
273 			<< "}\n";
274 
275 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
276 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
277 		const Buffer				inputBuffer		(m_context.getRenderContext());
278 		const Buffer				outputBuffer	(m_context.getRenderContext());
279 		std::vector<deUint32>		inputValues		(m_numValues);
280 
281 		// Compute input values.
282 		{
283 			de::Random rnd(0x124fef);
284 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
285 				inputValues[ndx] = rnd.getUint32();
286 		}
287 
288 		m_testCtx.getLog() << program;
289 		if (!program.isOk())
290 			TCU_FAIL("Compile failed");
291 
292 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
293 
294 		gl.useProgram(program.getProgram());
295 
296 		// Input buffer setup
297 		{
298 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
299 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
300 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
301 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
302 
303 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
304 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
305 
306 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
307 
308 			{
309 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
310 
311 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
312 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
313 			}
314 
315 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
316 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
317 		}
318 
319 		// Output buffer setup
320 		{
321 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
322 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
323 
324 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
325 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ);
326 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer);
327 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
328 		}
329 
330 		// Dispatch compute workload
331 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
332 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
333 
334 		// Read back and compare
335 		{
336 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
337 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
338 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
339 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
340 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
341 
342 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
343 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
344 			{
345 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
346 				const deUint32	ref		= ~inputValues[ndx];
347 
348 				if (res != ref)
349 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
350 			}
351 		}
352 
353 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
354 		return STOP;
355 	}
356 
357 private:
358 	const int			m_numValues;
359 	const tcu::IVec3	m_localSize;
360 	const tcu::IVec3	m_workSize;
361 };
362 
363 class InvertSSBOInPlaceCase : public TestCase
364 {
365 public:
InvertSSBOInPlaceCase(Context & context,const char * name,const char * description,int numValues,bool isSized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)366 	InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
367 		: TestCase		(context, name, description)
368 		, m_numValues	(numValues)
369 		, m_isSized		(isSized)
370 		, m_localSize	(localSize)
371 		, m_workSize	(workSize)
372 	{
373 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
374 	}
375 
iterate(void)376 	IterateResult iterate (void)
377 	{
378 		const GLSLVersion	glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
379 		std::ostringstream	src;
380 
381 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
382 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
383 			<< "layout(binding = 0) buffer InOut {\n"
384 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
385 			<< "} sb_inout;\n"
386 			<< "void main (void) {\n"
387 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
388 			<< "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
389 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
390 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
391 			<< "\n"
392 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
393 			<< "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
394 			<< "}\n";
395 
396 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
397 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
398 
399 		m_testCtx.getLog() << program;
400 		if (!program.isOk())
401 			TCU_FAIL("Compile failed");
402 
403 		const Buffer				outputBuffer	(m_context.getRenderContext());
404 		const deUint32				valueIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values");
405 		const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
406 		const deUint32				blockSize		= valueInfo.arrayStride*(deUint32)m_numValues;
407 		std::vector<deUint32>		inputValues		(m_numValues);
408 
409 		// Compute input values.
410 		{
411 			de::Random rnd(0x82ce7f);
412 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
413 				inputValues[ndx] = rnd.getUint32();
414 		}
415 
416 		TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
417 
418 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
419 
420 		gl.useProgram(program.getProgram());
421 
422 		// Output buffer setup
423 		{
424 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
425 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW);
426 
427 			{
428 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT);
429 
430 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
431 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
432 			}
433 
434 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
435 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
436 		}
437 
438 		// Dispatch compute workload
439 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
440 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
441 
442 		// Read back and compare
443 		{
444 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
445 
446 			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
447 			{
448 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
449 				const deUint32	ref		= ~inputValues[ndx];
450 
451 				if (res != ref)
452 					throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]");
453 			}
454 		}
455 
456 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
457 		return STOP;
458 	}
459 
460 private:
461 	const int			m_numValues;
462 	const bool			m_isSized;
463 	const tcu::IVec3	m_localSize;
464 	const tcu::IVec3	m_workSize;
465 };
466 
467 class WriteToMultipleSSBOCase : public TestCase
468 {
469 public:
WriteToMultipleSSBOCase(Context & context,const char * name,const char * description,int numValues,bool isSized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)470 	WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
471 		: TestCase		(context, name, description)
472 		, m_numValues	(numValues)
473 		, m_isSized		(isSized)
474 		, m_localSize	(localSize)
475 		, m_workSize	(workSize)
476 	{
477 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
478 	}
479 
iterate(void)480 	IterateResult iterate (void)
481 	{
482 		const GLSLVersion	glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
483 		std::ostringstream	src;
484 
485 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
486 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
487 			<< "layout(binding = 0) buffer Out0 {\n"
488 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
489 			<< "} sb_out0;\n"
490 			<< "layout(binding = 1) buffer Out1 {\n"
491 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
492 			<< "} sb_out1;\n"
493 			<< "void main (void) {\n"
494 			<< "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
495 			<< "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
496 			<< "\n"
497 			<< "    {\n"
498 			<< "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
499 			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
500 			<< "\n"
501 			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
502 			<< "            sb_out0.values[offset + ndx] = offset + ndx;\n"
503 			<< "    }\n"
504 			<< "    {\n"
505 			<< "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
506 			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
507 			<< "\n"
508 			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
509 			<< "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
510 			<< "    }\n"
511 			<< "}\n";
512 
513 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
514 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
515 
516 		m_testCtx.getLog() << program;
517 		if (!program.isOk())
518 			TCU_FAIL("Compile failed");
519 
520 		const Buffer				outputBuffer0	(m_context.getRenderContext());
521 		const deUint32				value0Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values");
522 		const InterfaceVariableInfo	value0Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index);
523 		const deUint32				block0Size		= value0Info.arrayStride*(deUint32)m_numValues;
524 
525 		const Buffer				outputBuffer1	(m_context.getRenderContext());
526 		const deUint32				value1Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values");
527 		const InterfaceVariableInfo	value1Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index);
528 		const deUint32				block1Size		= value1Info.arrayStride*(deUint32)m_numValues;
529 
530 		TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
531 		TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
532 
533 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
534 
535 		gl.useProgram(program.getProgram());
536 
537 		// Output buffer setup
538 		{
539 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
540 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW);
541 
542 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0);
543 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
544 		}
545 		{
546 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
547 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW);
548 
549 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1);
550 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
551 		}
552 
553 		// Dispatch compute workload
554 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
555 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
556 
557 		// Read back and compare
558 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
559 		{
560 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT);
561 
562 			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
563 			{
564 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx));
565 				const deUint32	ref		= ndx;
566 
567 				if (res != ref)
568 					throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
569 			}
570 		}
571 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
572 		{
573 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT);
574 
575 			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
576 			{
577 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx));
578 				const deUint32	ref		= m_numValues - ndx;
579 
580 				if (res != ref)
581 					throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
582 			}
583 		}
584 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
585 		return STOP;
586 	}
587 
588 private:
589 	const int			m_numValues;
590 	const bool			m_isSized;
591 	const tcu::IVec3	m_localSize;
592 	const tcu::IVec3	m_workSize;
593 };
594 
595 class SSBOLocalBarrierCase : public TestCase
596 {
597 public:
SSBOLocalBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)598 	SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
599 		: TestCase		(context, name, description)
600 		, m_localSize	(localSize)
601 		, m_workSize	(workSize)
602 	{
603 	}
604 
iterate(void)605 	IterateResult iterate (void)
606 	{
607 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
608 		const Buffer				outputBuffer	(m_context.getRenderContext());
609 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
610 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
611 		const int					numValues		= workGroupSize*workGroupCount;
612 
613 		const GLSLVersion			glslVersion		= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
614 		std::ostringstream			src;
615 
616 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
617 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
618 			<< "layout(binding = 0) buffer Output {\n"
619 			<< "    coherent uint values[" << numValues << "];\n"
620 			<< "} sb_out;\n\n"
621 			<< "shared uint offsets[" << workGroupSize << "];\n\n"
622 			<< "void main (void) {\n"
623 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
624 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
625 			<< "    uint globalOffs = localSize*globalNdx;\n"
626 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
627 			<< "\n"
628 			<< "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
629 			<< "    memoryBarrierBuffer();\n"
630 			<< "    barrier();\n"
631 			<< "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"
632 			<< "    memoryBarrierBuffer();\n"
633 			<< "    barrier();\n"
634 			<< "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
635 			<< "}\n";
636 
637 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
638 
639 		m_testCtx.getLog() << program;
640 		if (!program.isOk())
641 			TCU_FAIL("Compile failed");
642 
643 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
644 
645 		gl.useProgram(program.getProgram());
646 
647 		// Output buffer setup
648 		{
649 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
650 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
651 
652 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
653 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
654 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
655 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
656 		}
657 
658 		// Dispatch compute workload
659 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
660 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
661 
662 		// Read back and compare
663 		{
664 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
665 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
666 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
667 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
668 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
669 
670 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
671 			{
672 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
673 				{
674 					const int		globalOffs	= groupNdx*workGroupSize;
675 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
676 					const int		offs0		= localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize);
677 					const int		offs1		= localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize);
678 					const deUint32	ref			= (deUint32)(globalOffs + offs0 + offs1);
679 
680 					if (res != ref)
681 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
682 				}
683 			}
684 		}
685 
686 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
687 		return STOP;
688 	}
689 
690 private:
691 	const tcu::IVec3	m_localSize;
692 	const tcu::IVec3	m_workSize;
693 };
694 
695 class SSBOBarrierCase : public TestCase
696 {
697 public:
SSBOBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec3 & workSize)698 	SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize)
699 		: TestCase		(context, name, description)
700 		, m_workSize	(workSize)
701 	{
702 	}
703 
iterate(void)704 	IterateResult iterate (void)
705 	{
706 		const GLSLVersion	glslVersion				= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
707 		const char* const	glslVersionDeclaration	= getGLSLVersionDeclaration(glslVersion);
708 
709 		std::ostringstream src0;
710 		src0 << glslVersionDeclaration << "\n"
711 			 << "layout (local_size_x = 1) in;\n"
712 						  "uniform uint u_baseVal;\n"
713 						  "layout(binding = 1) buffer Output {\n"
714 						  "    uint values[];\n"
715 						  "};\n"
716 						  "void main (void) {\n"
717 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
718 						  "    values[offset] = u_baseVal+offset;\n"
719 				"}\n";
720 
721 		std::ostringstream src1;
722 		src1 << glslVersionDeclaration << "\n"
723 			 << "layout (local_size_x = 1) in;\n"
724 						  "uniform uint u_baseVal;\n"
725 						  "layout(binding = 1) buffer Input {\n"
726 						  "    uint values[];\n"
727 						  "};\n"
728 						  "layout(binding = 0) buffer Output {\n"
729 						  "    coherent uint sum;\n"
730 						  "};\n"
731 						  "void main (void) {\n"
732 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
733 						  "    uint value  = values[offset];\n"
734 						  "    atomicAdd(sum, value);\n"
735 				"}\n";
736 
737 		const ShaderProgram			program0		(m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str()));
738 		const ShaderProgram			program1		(m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str()));
739 
740 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
741 		const Buffer				tempBuffer		(m_context.getRenderContext());
742 		const Buffer				outputBuffer	(m_context.getRenderContext());
743 		const deUint32				baseValue		= 127;
744 
745 		m_testCtx.getLog() << program0 << program1;
746 		if (!program0.isOk() || !program1.isOk())
747 			TCU_FAIL("Compile failed");
748 
749 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
750 
751 		// Temp buffer setup
752 		{
753 			const deUint32				valueIndex		= gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]");
754 			const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
755 			const deUint32				bufferSize		= valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2];
756 
757 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer);
758 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW);
759 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer);
760 			GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed");
761 		}
762 
763 		// Output buffer setup
764 		{
765 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
766 			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
767 
768 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
769 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
770 
771 			{
772 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
773 				deMemset(bufMap.getPtr(), 0, blockSize);
774 			}
775 
776 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
777 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
778 		}
779 
780 		// Dispatch compute workload
781 		gl.useProgram(program0.getProgram());
782 		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
783 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
784 		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
785 		gl.useProgram(program1.getProgram());
786 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
787 		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
788 
789 		// Read back and compare
790 		{
791 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
792 			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
793 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
794 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
795 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
796 
797 			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
798 			deUint32					ref			= 0;
799 
800 			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++)
801 				ref += baseValue + (deUint32)ndx;
802 
803 			if (res != ref)
804 			{
805 				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
806 				throw tcu::TestError("Comparison failed");
807 			}
808 		}
809 
810 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
811 		return STOP;
812 	}
813 
814 private:
815 	const tcu::IVec3	m_workSize;
816 };
817 
818 class BasicSharedVarCase : public TestCase
819 {
820 public:
BasicSharedVarCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)821 	BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
822 		: TestCase		(context, name, description)
823 		, m_localSize	(localSize)
824 		, m_workSize	(workSize)
825 	{
826 	}
827 
iterate(void)828 	IterateResult iterate (void)
829 	{
830 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
831 		const Buffer				outputBuffer	(m_context.getRenderContext());
832 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
833 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
834 		const int					numValues		= workGroupSize*workGroupCount;
835 
836 		const GLSLVersion			glslVersion		= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
837 		std::ostringstream			src;
838 
839 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
840 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
841 			<< "layout(binding = 0) buffer Output {\n"
842 			<< "    uint values[" << numValues << "];\n"
843 			<< "} sb_out;\n\n"
844 			<< "shared uint offsets[" << workGroupSize << "];\n\n"
845 			<< "void main (void) {\n"
846 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
847 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
848 			<< "    uint globalOffs = localSize*globalNdx;\n"
849 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
850 			<< "\n"
851 			<< "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
852 			<< "    barrier();\n"
853 			<< "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
854 			<< "}\n";
855 
856 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
857 
858 		m_testCtx.getLog() << program;
859 		if (!program.isOk())
860 			TCU_FAIL("Compile failed");
861 
862 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
863 
864 		gl.useProgram(program.getProgram());
865 
866 		// Output buffer setup
867 		{
868 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
869 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
870 
871 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
872 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
873 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
874 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
875 		}
876 
877 		// Dispatch compute workload
878 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
879 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
880 
881 		// Read back and compare
882 		{
883 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
884 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
885 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
886 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
887 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
888 
889 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
890 			{
891 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
892 				{
893 					const int		globalOffs	= groupNdx*workGroupSize;
894 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
895 					const deUint32	ref			= (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1));
896 
897 					if (res != ref)
898 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
899 				}
900 			}
901 		}
902 
903 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
904 		return STOP;
905 	}
906 
907 private:
908 	const tcu::IVec3	m_localSize;
909 	const tcu::IVec3	m_workSize;
910 };
911 
912 class SharedVarAtomicOpCase : public TestCase
913 {
914 public:
SharedVarAtomicOpCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)915 	SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
916 		: TestCase		(context, name, description)
917 		, m_localSize	(localSize)
918 		, m_workSize	(workSize)
919 	{
920 	}
921 
iterate(void)922 	IterateResult iterate (void)
923 	{
924 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
925 		const Buffer				outputBuffer	(m_context.getRenderContext());
926 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
927 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
928 		const int					numValues		= workGroupSize*workGroupCount;
929 
930 		const GLSLVersion			glslVersion		= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
931 		std::ostringstream			src;
932 
933 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
934 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
935 			<< "layout(binding = 0) buffer Output {\n"
936 			<< "    uint values[" << numValues << "];\n"
937 			<< "} sb_out;\n\n"
938 			<< "shared uint count;\n\n"
939 			<< "void main (void) {\n"
940 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
941 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
942 			<< "    uint globalOffs = localSize*globalNdx;\n"
943 			<< "\n"
944 			<< "    count = 0u;\n"
945 			<< "    barrier();\n"
946 			<< "    uint oldVal = atomicAdd(count, 1u);\n"
947 			<< "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
948 			<< "}\n";
949 
950 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
951 
952 		m_testCtx.getLog() << program;
953 		if (!program.isOk())
954 			TCU_FAIL("Compile failed");
955 
956 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
957 
958 		gl.useProgram(program.getProgram());
959 
960 		// Output buffer setup
961 		{
962 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
963 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
964 
965 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
966 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
967 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
968 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
969 		}
970 
971 		// Dispatch compute workload
972 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
973 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
974 
975 		// Read back and compare
976 		{
977 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
978 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
979 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
980 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
981 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
982 
983 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
984 			{
985 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
986 				{
987 					const int		globalOffs	= groupNdx*workGroupSize;
988 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
989 					const deUint32	ref			= (deUint32)(localOffs+1);
990 
991 					if (res != ref)
992 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
993 				}
994 			}
995 		}
996 
997 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
998 		return STOP;
999 	}
1000 
1001 private:
1002 	const tcu::IVec3	m_localSize;
1003 	const tcu::IVec3	m_workSize;
1004 };
1005 
1006 class CopyImageToSSBOCase : public TestCase
1007 {
1008 public:
CopyImageToSSBOCase(Context & context,const char * name,const char * description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)1009 	CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
1010 		: TestCase		(context, name, description)
1011 		, m_localSize	(localSize)
1012 		, m_imageSize	(imageSize)
1013 	{
1014 		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1015 		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1016 	}
1017 
iterate(void)1018 	IterateResult iterate (void)
1019 	{
1020 		const GLSLVersion			glslVersion		= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1021 		std::ostringstream			src;
1022 
1023 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
1024 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1025 			<< "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n"
1026 			<< "layout(binding = 0) buffer Output {\n"
1027 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1028 			<< "} sb_out;\n\n"
1029 			<< "void main (void) {\n"
1030 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1031 			<< "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
1032 			<< "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
1033 			<< "}\n";
1034 
1035 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1036 		const Buffer				outputBuffer	(m_context.getRenderContext());
1037 		const Texture				inputTexture	(m_context.getRenderContext());
1038 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1039 		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
1040 		de::Random					rnd				(0xab2c7);
1041 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
1042 
1043 		m_testCtx.getLog() << program;
1044 		if (!program.isOk())
1045 			TCU_FAIL("Compile failed");
1046 
1047 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1048 
1049 		gl.useProgram(program.getProgram());
1050 
1051 		// Input values
1052 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1053 			*i = rnd.getUint32();
1054 
1055 		// Input image setup
1056 		gl.bindTexture(GL_TEXTURE_2D, *inputTexture);
1057 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1058 		gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]);
1059 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1060 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1061 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1062 
1063 		// Bind to unit 1
1064 		gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
1065 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1066 
1067 		// Output buffer setup
1068 		{
1069 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1070 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1071 
1072 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1073 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1074 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1075 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1076 		}
1077 
1078 		// Dispatch compute workload
1079 		gl.dispatchCompute(workSize[0], workSize[1], 1);
1080 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1081 
1082 		// Read back and compare
1083 		{
1084 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1085 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1086 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1087 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1088 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1089 
1090 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1091 
1092 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
1093 			{
1094 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
1095 				const deUint32	ref		= inputValues[ndx];
1096 
1097 				if (res != ref)
1098 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
1099 			}
1100 		}
1101 
1102 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1103 		return STOP;
1104 	}
1105 
1106 private:
1107 	const tcu::IVec2	m_localSize;
1108 	const tcu::IVec2	m_imageSize;
1109 };
1110 
1111 class CopySSBOToImageCase : public TestCase
1112 {
1113 public:
CopySSBOToImageCase(Context & context,const char * name,const char * description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)1114 	CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
1115 		: TestCase		(context, name, description)
1116 		, m_localSize	(localSize)
1117 		, m_imageSize	(imageSize)
1118 	{
1119 		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1120 		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1121 	}
1122 
iterate(void)1123 	IterateResult iterate (void)
1124 	{
1125 		const GLSLVersion			glslVersion		= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1126 		std::ostringstream			src;
1127 
1128 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
1129 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1130 			<< "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n"
1131 			<< "buffer Input {\n"
1132 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1133 			<< "} sb_in;\n\n"
1134 			<< "void main (void) {\n"
1135 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1136 			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1137 			<< "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
1138 			<< "}\n";
1139 
1140 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1141 		const Buffer				inputBuffer		(m_context.getRenderContext());
1142 		const Texture				outputTexture	(m_context.getRenderContext());
1143 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1144 		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
1145 		de::Random					rnd				(0x77238ac2);
1146 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
1147 
1148 		m_testCtx.getLog() << program;
1149 		if (!program.isOk())
1150 			TCU_FAIL("Compile failed");
1151 
1152 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1153 
1154 		gl.useProgram(program.getProgram());
1155 
1156 		// Input values
1157 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1158 			*i = rnd.getUint32();
1159 
1160 		// Input buffer setup
1161 		{
1162 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1163 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1164 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1165 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1166 
1167 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1168 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1169 
1170 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1171 
1172 			{
1173 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1174 
1175 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1176 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1177 			}
1178 
1179 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1180 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1181 		}
1182 
1183 		// Output image setup
1184 		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1185 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1186 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1187 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1188 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1189 
1190 		// Bind to unit 1
1191 		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
1192 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1193 
1194 		// Dispatch compute workload
1195 		gl.dispatchCompute(workSize[0], workSize[1], 1);
1196 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1197 
1198 		// Read back and compare
1199 		{
1200 			Framebuffer			fbo			(m_context.getRenderContext());
1201 			vector<deUint32>	pixels		(inputValues.size()*4);
1202 
1203 			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1204 			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1205 			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1206 
1207 			// \note In ES3 we have to use GL_RGBA_INTEGER
1208 			gl.readBuffer(GL_COLOR_ATTACHMENT0);
1209 			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1210 			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1211 
1212 			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1213 			{
1214 				const deUint32	res		= pixels[ndx*4];
1215 				const deUint32	ref		= inputValues[ndx];
1216 
1217 				if (res != ref)
1218 					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx));
1219 			}
1220 		}
1221 
1222 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1223 		return STOP;
1224 	}
1225 
1226 private:
1227 	const tcu::IVec2	m_localSize;
1228 	const tcu::IVec2	m_imageSize;
1229 };
1230 
1231 class ImageAtomicOpCase : public TestCase
1232 {
1233 public:
ImageAtomicOpCase(Context & context,const char * name,const char * description,int localSize,const tcu::IVec2 & imageSize)1234 	ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize)
1235 		: TestCase		(context, name, description)
1236 		, m_localSize	(localSize)
1237 		, m_imageSize	(imageSize)
1238 	{
1239 	}
1240 
init(void)1241 	void init (void)
1242 	{
1243 		if (!glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2)))
1244 			if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1245 				throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension");
1246 	}
1247 
iterate(void)1248 	IterateResult iterate (void)
1249 	{
1250 		const GLSLVersion			glslVersion		= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1251 		const bool					supportsES32	= glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
1252 		std::ostringstream			src;
1253 
1254 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
1255 			<< (supportsES32 ? "\n" : "#extension GL_OES_shader_image_atomic : require\n")
1256 			<< "layout (local_size_x = " << m_localSize << ") in;\n"
1257 			<< "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n"
1258 			<< "buffer Input {\n"
1259 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n"
1260 			<< "} sb_in;\n\n"
1261 			<< "void main (void) {\n"
1262 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1263 			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1264 			<< "\n"
1265 			<< "    if (gl_LocalInvocationIndex == 0u)\n"
1266 			<< "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1267 			<< "    barrier();\n"
1268 			<< "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1269 			<< "}\n";
1270 
1271 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1272 		const Buffer				inputBuffer		(m_context.getRenderContext());
1273 		const Texture				outputTexture	(m_context.getRenderContext());
1274 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1275 		de::Random					rnd				(0x77238ac2);
1276 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]*m_localSize);
1277 
1278 		m_testCtx.getLog() << program;
1279 		if (!program.isOk())
1280 			TCU_FAIL("Compile failed");
1281 
1282 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage;
1283 
1284 		gl.useProgram(program.getProgram());
1285 
1286 		// Input values
1287 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1288 			*i = rnd.getUint32();
1289 
1290 		// Input buffer setup
1291 		{
1292 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1293 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1294 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1295 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1296 
1297 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1298 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1299 
1300 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1301 
1302 			{
1303 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1304 
1305 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1306 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1307 			}
1308 
1309 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1310 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1311 		}
1312 
1313 		// Output image setup
1314 		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1315 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1316 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1317 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1318 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1319 
1320 		// Bind to unit 1
1321 		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1322 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1323 
1324 		// Dispatch compute workload
1325 		gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1);
1326 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1327 
1328 		// Read back and compare
1329 		{
1330 			Framebuffer			fbo			(m_context.getRenderContext());
1331 			vector<deUint32>	pixels		(m_imageSize[0]*m_imageSize[1]*4);
1332 
1333 			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1334 			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1335 			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1336 
1337 			// \note In ES3 we have to use GL_RGBA_INTEGER
1338 			gl.readBuffer(GL_COLOR_ATTACHMENT0);
1339 			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1340 			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1341 
1342 			for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++)
1343 			{
1344 				const deUint32	res		= pixels[pixelNdx*4];
1345 				deUint32		ref		= 0;
1346 
1347 				for (int offs = 0; offs < m_localSize; offs++)
1348 					ref += inputValues[pixelNdx*m_localSize + offs];
1349 
1350 				if (res != ref)
1351 					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx));
1352 			}
1353 		}
1354 
1355 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1356 		return STOP;
1357 	}
1358 
1359 private:
1360 	const int			m_localSize;
1361 	const tcu::IVec2	m_imageSize;
1362 };
1363 
1364 class ImageBarrierCase : public TestCase
1365 {
1366 public:
ImageBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec2 & workSize)1367 	ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize)
1368 		: TestCase		(context, name, description)
1369 		, m_workSize	(workSize)
1370 	{
1371 	}
1372 
iterate(void)1373 	IterateResult iterate (void)
1374 	{
1375 		const GLSLVersion			glslVersion				= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1376 		const char* const			glslVersionDeclaration	= getGLSLVersionDeclaration(glslVersion);
1377 
1378 		std::ostringstream src0;
1379 		src0 << glslVersionDeclaration << "\n"
1380 			 << "layout (local_size_x = 1) in;\n"
1381 						  "uniform uint u_baseVal;\n"
1382 						  "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n"
1383 						  "void main (void) {\n"
1384 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1385 						  "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n"
1386 				"}\n";
1387 
1388 		std::ostringstream src1;
1389 		src1 << glslVersionDeclaration << "\n"
1390 			 << "layout (local_size_x = 1) in;\n"
1391 						  "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n"
1392 						  "layout(binding = 0) buffer Output {\n"
1393 						  "    coherent uint sum;\n"
1394 						  "};\n"
1395 						  "void main (void) {\n"
1396 						  "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
1397 						  "    atomicAdd(sum, value);\n"
1398 				"}\n";
1399 
1400 		const ShaderProgram			program0		(m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str()));
1401 		const ShaderProgram			program1		(m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str()));
1402 
1403 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1404 		const Texture				tempTexture		(m_context.getRenderContext());
1405 		const Buffer				outputBuffer	(m_context.getRenderContext());
1406 		const deUint32				baseValue		= 127;
1407 
1408 		m_testCtx.getLog() << program0 << program1;
1409 		if (!program0.isOk() || !program1.isOk())
1410 			TCU_FAIL("Compile failed");
1411 
1412 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1413 
1414 		// Temp texture setup
1415 		gl.bindTexture(GL_TEXTURE_2D, *tempTexture);
1416 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]);
1417 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1418 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1419 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1420 
1421 		// Bind to unit 2
1422 		gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1423 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1424 
1425 		// Output buffer setup
1426 		{
1427 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1428 			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1429 
1430 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1431 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1432 
1433 			{
1434 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
1435 				deMemset(bufMap.getPtr(), 0, blockSize);
1436 			}
1437 
1438 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1439 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1440 		}
1441 
1442 		// Dispatch compute workload
1443 		gl.useProgram(program0.getProgram());
1444 		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
1445 		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1446 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1447 		gl.useProgram(program1.getProgram());
1448 		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1449 		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
1450 
1451 		// Read back and compare
1452 		{
1453 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1454 			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1455 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
1456 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1457 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1458 
1459 			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
1460 			deUint32					ref			= 0;
1461 
1462 			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++)
1463 				ref += baseValue + (deUint32)ndx;
1464 
1465 			if (res != ref)
1466 			{
1467 				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
1468 				throw tcu::TestError("Comparison failed");
1469 			}
1470 		}
1471 
1472 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1473 		return STOP;
1474 	}
1475 
1476 private:
1477 	const tcu::IVec2	m_workSize;
1478 };
1479 
1480 class AtomicCounterCase : public TestCase
1481 {
1482 public:
AtomicCounterCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1483 	AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
1484 		: TestCase		(context, name, description)
1485 		, m_localSize	(localSize)
1486 		, m_workSize	(workSize)
1487 	{
1488 	}
1489 
iterate(void)1490 	IterateResult iterate (void)
1491 	{
1492 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1493 		const Buffer				outputBuffer	(m_context.getRenderContext());
1494 		const Buffer				counterBuffer	(m_context.getRenderContext());
1495 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
1496 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
1497 		const int					numValues		= workGroupSize*workGroupCount;
1498 
1499 		const GLSLVersion			glslVersion		= glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1500 		std::ostringstream			src;
1501 
1502 		src << getGLSLVersionDeclaration(glslVersion) << "\n"
1503 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
1504 			<< "layout(binding = 0) buffer Output {\n"
1505 			<< "    uint values[" << numValues << "];\n"
1506 			<< "} sb_out;\n\n"
1507 			<< "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n"
1508 			<< "void main (void) {\n"
1509 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
1510 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1511 			<< "    uint globalOffs = localSize*globalNdx;\n"
1512 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
1513 			<< "\n"
1514 			<< "    uint oldVal = atomicCounterIncrement(u_count);\n"
1515 			<< "    sb_out.values[globalOffs+localOffs] = oldVal;\n"
1516 			<< "}\n";
1517 
1518 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
1519 
1520 		m_testCtx.getLog() << program;
1521 		if (!program.isOk())
1522 			TCU_FAIL("Compile failed");
1523 
1524 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1525 
1526 		gl.useProgram(program.getProgram());
1527 
1528 		// Atomic counter buffer setup
1529 		{
1530 			const deUint32	uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1531 			const deUint32	bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1532 			const deUint32	bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1533 
1534 			gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer);
1535 			gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ);
1536 
1537 			{
1538 				const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT);
1539 				deMemset(memMap.getPtr(), 0, (int)bufferSize);
1540 			}
1541 
1542 			gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer);
1543 			GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed");
1544 		}
1545 
1546 		// Output buffer setup
1547 		{
1548 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1549 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1550 
1551 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1552 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1553 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1554 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1555 		}
1556 
1557 		// Dispatch compute workload
1558 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
1559 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1560 
1561 		// Read back and compare atomic counter
1562 		{
1563 			const deUint32		uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1564 			const deUint32		uniformOffset	= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET);
1565 			const deUint32		bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1566 			const deUint32		bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1567 			const BufferMemMap	bufMap			(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT);
1568 
1569 			const deUint32		resVal			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset));
1570 
1571 			if (resVal != (deUint32)numValues)
1572 				throw tcu::TestError("Invalid atomic counter value");
1573 		}
1574 
1575 		// Read back and compare SSBO
1576 		{
1577 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1578 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1579 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1580 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1581 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1582 			deUint32					valSum		= 0;
1583 			deUint32					refSum		= 0;
1584 
1585 			for (int valNdx = 0; valNdx < numValues; valNdx++)
1586 			{
1587 				const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx));
1588 
1589 				valSum += res;
1590 				refSum += (deUint32)valNdx;
1591 
1592 				if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues))
1593 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]");
1594 			}
1595 
1596 			if (valSum != refSum)
1597 				throw tcu::TestError("Total sum of values in Output.values doesn't match");
1598 		}
1599 
1600 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1601 		return STOP;
1602 	}
1603 
1604 private:
1605 	const tcu::IVec3	m_localSize;
1606 	const tcu::IVec3	m_workSize;
1607 };
1608 
1609 } // anonymous
1610 
BasicComputeShaderTests(Context & context)1611 BasicComputeShaderTests::BasicComputeShaderTests (Context& context)
1612 	: TestCaseGroup(context, "basic", "Basic Compute Shader Tests")
1613 {
1614 }
1615 
~BasicComputeShaderTests(void)1616 BasicComputeShaderTests::~BasicComputeShaderTests (void)
1617 {
1618 }
1619 
init(void)1620 void BasicComputeShaderTests::init (void)
1621 {
1622 	addChild(new EmptyComputeShaderCase(m_context));
1623 
1624 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_invocation",			"Copy from UBO to SSBO, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1625 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_group",					"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(2,1,4),	tcu::IVec3(1,1,1)));
1626 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_invocations",			"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
1627 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_groups",				"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1628 
1629 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_single_invocation",				"Copy between SSBOs, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1630 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_invocations",			"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
1631 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_groups",				"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1632 
1633 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_single_invocation",				"Read and write same SSBO",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1634 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_multiple_groups",					"Read and write same SSBO",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1635 
1636 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_single_invocation",		"Read and write same SSBO",				256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1637 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_multiple_groups",			"Read and write same SSBO",				1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1638 
1639 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation",		"Write to multiple SSBOs",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1640 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups",		"Write to multiple SSBOs",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1641 
1642 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation",	"Write to multiple SSBOs",			256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1643 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups",	"Write to multiple SSBOs",			1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1644 
1645 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_invocation",		"SSBO local barrier usage",				tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1646 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_group",			"SSBO local barrier usage",				tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1647 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_multiple_groups",		"SSBO local barrier usage",				tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1648 
1649 	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_single",					"SSBO memory barrier usage",			tcu::IVec3(1,1,1)));
1650 	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_multiple",				"SSBO memory barrier usage",			tcu::IVec3(11,5,7)));
1651 
1652 	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_invocation",				"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1653 	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_group",					"Basic shared variable usage",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1654 	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_invocations",			"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1655 	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_groups",				"Basic shared variable usage",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1656 
1657 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_invocation",		"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1658 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_group",			"Atomic operation with shared var",		tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1659 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_invocations",	"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1660 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_groups",			"Atomic operation with shared var",		tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1661 
1662 	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_small",					"Image to SSBO copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
1663 	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_large",					"Image to SSBO copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
1664 
1665 	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_small",					"SSBO to image copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
1666 	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_large",					"SSBO to image copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
1667 
1668 	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_1",				"Atomic operation with image",			1,	tcu::IVec2(64,64)));
1669 	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_8",				"Atomic operation with image",			8,	tcu::IVec2(64,64)));
1670 
1671 	addChild(new ImageBarrierCase		(m_context, "image_barrier_single",						"Image barrier",						tcu::IVec2(1,1)));
1672 	addChild(new ImageBarrierCase		(m_context, "image_barrier_multiple",					"Image barrier",						tcu::IVec2(64,64)));
1673 
1674 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_invocation",			"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1675 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_group",				"Basic atomic counter test",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1676 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_invocations",		"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1677 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_groups",			"Basic atomic counter test",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1678 }
1679 
1680 } // Functional
1681 } // gles31
1682 } // deqp
1683