1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Synchronization Tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuStringTemplate.hpp"
27 #include "tcuSurface.hpp"
28 #include "tcuRenderTarget.hpp"
29 #include "gluRenderContext.hpp"
30 #include "gluShaderProgram.hpp"
31 #include "gluObjectWrapper.hpp"
32 #include "gluPixelTransfer.hpp"
33 #include "gluContextInfo.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deStringUtil.hpp"
37 #include "deSharedPtr.hpp"
38 #include "deMemory.h"
39 #include "deRandom.hpp"
40
41 #include <map>
42
43 namespace deqp
44 {
45 namespace gles31
46 {
47 namespace Functional
48 {
49 namespace
50 {
51
validateSortedAtomicRampAdditionValueChain(const std::vector<deUint32> & valueChain,deUint32 sumValue,int & invalidOperationNdx,deUint32 & errorDelta,deUint32 & errorExpected)52 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
53 {
54 std::vector<deUint32> chainDelta(valueChain.size());
55
56 for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
57 chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
58
59 // chainDelta contains now the actual additions applied to the value
60 // check there exists an addition ramp form 1 to ...
61 std::sort(chainDelta.begin(), chainDelta.end());
62
63 for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
64 {
65 if ((int)chainDelta[callNdx] != callNdx+1)
66 {
67 invalidOperationNdx = callNdx;
68 errorDelta = chainDelta[callNdx];
69 errorExpected = callNdx+1;
70
71 return false;
72 }
73 }
74
75 return true;
76 }
77
readBuffer(const glw::Functions & gl,deUint32 target,int numElements,std::vector<deUint32> & result)78 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
79 {
80 const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
81 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
82
83 if (!ptr)
84 throw tcu::TestError("mapBufferRange returned NULL");
85
86 result.resize(numElements);
87 memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
88
89 if (gl.unmapBuffer(target) == GL_FALSE)
90 throw tcu::TestError("unmapBuffer returned false");
91 }
92
readBufferUint32(const glw::Functions & gl,deUint32 target)93 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
94 {
95 std::vector<deUint32> vec;
96
97 readBuffer(gl, target, 1, vec);
98
99 return vec[0];
100 }
101
102 //! Generate a ramp of values from 1 to numElements, and shuffle it
generateShuffledRamp(int numElements,std::vector<int> & ramp)103 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
104 {
105 de::Random rng(0xabcd);
106
107 // some positive (non-zero) unique values
108 ramp.resize(numElements);
109 for (int callNdx = 0; callNdx < numElements; ++callNdx)
110 ramp[callNdx] = callNdx + 1;
111
112 rng.shuffle(ramp.begin(), ramp.end());
113 }
114
specializeShader(Context & context,const char * code)115 static std::string specializeShader(Context& context, const char* code)
116 {
117 const glu::GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(context.getRenderContext().getType());
118 std::map<std::string, std::string> specializationMap;
119
120 specializationMap["GLSL_VERSION_DECL"] = glu::getGLSLVersionDeclaration(glslVersion);
121
122 if (glu::contextSupports(context.getRenderContext().getType(), glu::ApiType::es(3, 2)))
123 specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "";
124 else
125 specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "#extension GL_OES_shader_image_atomic : require";
126
127 return tcu::StringTemplate(code).specialize(specializationMap);
128 }
129
130 class InterInvocationTestCase : public TestCase
131 {
132 public:
133 enum StorageType
134 {
135 STORAGE_BUFFER = 0,
136 STORAGE_IMAGE,
137
138 STORAGE_LAST
139 };
140 enum CaseFlags
141 {
142 FLAG_ATOMIC = 0x1,
143 FLAG_ALIASING_STORAGES = 0x2,
144 FLAG_IN_GROUP = 0x4,
145 };
146
147 InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
148 ~InterInvocationTestCase (void);
149
150 private:
151 void init (void);
152 void deinit (void);
153 IterateResult iterate (void);
154
155 void runCompute (void);
156 bool verifyResults (void);
157 virtual std::string genShaderSource (void) const = 0;
158
159 protected:
160 std::string genBarrierSource (void) const;
161
162 const StorageType m_storage;
163 const bool m_useAtomic;
164 const bool m_aliasingStorages;
165 const bool m_syncWithGroup;
166 const int m_workWidth; // !< total work width
167 const int m_workHeight; // !< ... height
168 const int m_localWidth; // !< group width
169 const int m_localHeight; // !< group height
170 const int m_elementsPerInvocation; // !< elements accessed by a single invocation
171
172 private:
173 glw::GLuint m_storageBuf;
174 glw::GLuint m_storageTex;
175 glw::GLuint m_resultBuf;
176 glu::ShaderProgram* m_program;
177 };
178
InterInvocationTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)179 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
180 : TestCase (context, name, desc)
181 , m_storage (storage)
182 , m_useAtomic ((flags & FLAG_ATOMIC) != 0)
183 , m_aliasingStorages ((flags & FLAG_ALIASING_STORAGES) != 0)
184 , m_syncWithGroup ((flags & FLAG_IN_GROUP) != 0)
185 , m_workWidth (256)
186 , m_workHeight (256)
187 , m_localWidth (16)
188 , m_localHeight (8)
189 , m_elementsPerInvocation (8)
190 , m_storageBuf (0)
191 , m_storageTex (0)
192 , m_resultBuf (0)
193 , m_program (DE_NULL)
194 {
195 DE_ASSERT(m_storage < STORAGE_LAST);
196 DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
197 }
198
~InterInvocationTestCase(void)199 InterInvocationTestCase::~InterInvocationTestCase (void)
200 {
201 deinit();
202 }
203
init(void)204 void InterInvocationTestCase::init (void)
205 {
206 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
207 const bool supportsES32 = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
208
209 // requirements
210
211 if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
212 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
213
214 // program
215
216 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
217 m_testCtx.getLog() << *m_program;
218 if (!m_program->isOk())
219 throw tcu::TestError("could not build program");
220
221 // source
222
223 if (m_storage == STORAGE_BUFFER)
224 {
225 const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
226 const int bufferSize = bufferElements * (int)sizeof(deUint32);
227 std::vector<deUint32> zeroBuffer (bufferElements, 0);
228
229 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
230
231 gl.genBuffers(1, &m_storageBuf);
232 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
233 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
234 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
235 }
236 else if (m_storage == STORAGE_IMAGE)
237 {
238 const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
239 const int bufferSize = bufferElements * (int)sizeof(deUint32);
240
241 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
242
243 gl.genTextures(1, &m_storageTex);
244 gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
245 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
246 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
247 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
248 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
249
250 // Zero-fill
251 m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
252
253 {
254 const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
255 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
256 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
257 }
258 }
259 else
260 DE_ASSERT(DE_FALSE);
261
262 // destination
263
264 {
265 const int bufferElements = m_workWidth * m_workHeight;
266 const int bufferSize = bufferElements * (int)sizeof(deUint32);
267 std::vector<deInt32> negativeBuffer (bufferElements, -1);
268
269 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
270
271 gl.genBuffers(1, &m_resultBuf);
272 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
273 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
274 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
275 }
276 }
277
deinit(void)278 void InterInvocationTestCase::deinit (void)
279 {
280 if (m_storageBuf)
281 {
282 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
283 m_storageBuf = DE_NULL;
284 }
285
286 if (m_storageTex)
287 {
288 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
289 m_storageTex = DE_NULL;
290 }
291
292 if (m_resultBuf)
293 {
294 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
295 m_resultBuf = DE_NULL;
296 }
297
298 delete m_program;
299 m_program = DE_NULL;
300 }
301
iterate(void)302 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
303 {
304 // Dispatch
305 runCompute();
306
307 // Verify buffer contents
308 if (verifyResults())
309 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
310 else
311 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
312
313 return STOP;
314 }
315
runCompute(void)316 void InterInvocationTestCase::runCompute (void)
317 {
318 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
319 const int groupsX = m_workWidth / m_localWidth;
320 const int groupsY = m_workHeight / m_localHeight;
321
322 DE_ASSERT((m_workWidth % m_localWidth) == 0);
323 DE_ASSERT((m_workHeight % m_localHeight) == 0);
324
325 m_testCtx.getLog()
326 << tcu::TestLog::Message
327 << "Dispatching compute.\n"
328 << " group size: " << m_localWidth << "x" << m_localHeight << "\n"
329 << " dispatch size: " << groupsX << "x" << groupsY << "\n"
330 << " total work size: " << m_workWidth << "x" << m_workHeight << "\n"
331 << tcu::TestLog::EndMessage;
332
333 gl.useProgram(m_program->getProgram());
334
335 // source
336 if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
337 {
338 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
339 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
340 }
341 else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
342 {
343 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
344 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
345 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
346
347 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
348 }
349 else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
350 {
351 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
352 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
353 }
354 else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
355 {
356 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
357 gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
358
359 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
360
361 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
362 }
363 else
364 DE_ASSERT(DE_FALSE);
365
366 // destination
367 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
368 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
369
370 // dispatch
371 gl.dispatchCompute(groupsX, groupsY, 1);
372 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
373 }
374
verifyResults(void)375 bool InterInvocationTestCase::verifyResults (void)
376 {
377 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
378 const int errorFloodThreshold = 5;
379 int numErrorsLogged = 0;
380 const void* mapped = DE_NULL;
381 std::vector<deInt32> results (m_workWidth * m_workHeight);
382 bool error = false;
383
384 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
385 gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
386 mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
387 GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
388
389 // copy to properly aligned array
390 deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
391
392 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
393 throw tcu::TestError("memory map store corrupted");
394
395 // check the results
396 for (int ndx = 0; ndx < (int)results.size(); ++ndx)
397 {
398 if (results[ndx] != 1)
399 {
400 error = true;
401
402 if (numErrorsLogged == 0)
403 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
404 if (numErrorsLogged++ < errorFloodThreshold)
405 m_testCtx.getLog() << tcu::TestLog::Message << " Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
406 else
407 {
408 // after N errors, no point continuing verification
409 m_testCtx.getLog() << tcu::TestLog::Message << " -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
410 break;
411 }
412 }
413 }
414
415 if (!error)
416 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
417 return !error;
418 }
419
genBarrierSource(void) const420 std::string InterInvocationTestCase::genBarrierSource (void) const
421 {
422 std::ostringstream buf;
423
424 if (m_syncWithGroup)
425 {
426 // Wait until all invocations in this work group have their texture/buffer read/write operations complete
427 // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
428 // we only require intra-workgroup synchronization.
429 buf << "\n"
430 << " groupMemoryBarrier();\n"
431 << " barrier();\n"
432 << "\n";
433 }
434 else if (m_storage == STORAGE_BUFFER)
435 {
436 DE_ASSERT(!m_syncWithGroup);
437
438 // Waiting only for data written by this invocation. Since all buffer reads and writes are
439 // processed in order (within a single invocation), we don't have to do anything.
440 buf << "\n";
441 }
442 else if (m_storage == STORAGE_IMAGE)
443 {
444 DE_ASSERT(!m_syncWithGroup);
445
446 // Waiting only for data written by this invocation. But since operations complete in undefined
447 // order, we have to wait for them to complete.
448 buf << "\n"
449 << " memoryBarrierImage();\n"
450 << "\n";
451 }
452 else
453 DE_ASSERT(DE_FALSE);
454
455 return buf.str();
456 }
457
458 class InvocationBasicCase : public InterInvocationTestCase
459 {
460 public:
461 InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
462 private:
463 std::string genShaderSource (void) const;
464 virtual std::string genShaderMainBlock (void) const = 0;
465 };
466
InvocationBasicCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)467 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
468 : InterInvocationTestCase(context, name, desc, storage, flags)
469 {
470 }
471
genShaderSource(void) const472 std::string InvocationBasicCase::genShaderSource (void) const
473 {
474 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
475 std::ostringstream buf;
476
477 buf << "${GLSL_VERSION_DECL}\n"
478 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
479 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
480 << "layout(binding=0, std430) buffer Output\n"
481 << "{\n"
482 << " highp int values[];\n"
483 << "} sb_result;\n";
484
485 if (m_storage == STORAGE_BUFFER)
486 buf << "layout(binding=1, std430) coherent buffer Storage\n"
487 << "{\n"
488 << " highp int values[];\n"
489 << "} sb_store;\n"
490 << "\n"
491 << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
492 << "{\n"
493 << " highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
494 << " return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
495 << "}\n";
496 else if (m_storage == STORAGE_IMAGE)
497 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
498 << "\n"
499 << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
500 << "{\n"
501 << " return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
502 << "}\n";
503 else
504 DE_ASSERT(DE_FALSE);
505
506 buf << "\n"
507 << "void main (void)\n"
508 << "{\n"
509 << " int resultNdx = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
510 << " int groupNdx = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
511 << " bool allOk = true;\n"
512 << "\n"
513 << genShaderMainBlock()
514 << "\n"
515 << " sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
516 << "}\n";
517
518 return specializeShader(m_context, buf.str().c_str());
519 }
520
521 class InvocationWriteReadCase : public InvocationBasicCase
522 {
523 public:
524 InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
525 private:
526 std::string genShaderMainBlock (void) const;
527 };
528
InvocationWriteReadCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)529 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
530 : InvocationBasicCase(context, name, desc, storage, flags)
531 {
532 }
533
genShaderMainBlock(void) const534 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
535 {
536 std::ostringstream buf;
537
538 // write
539
540 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
541 {
542 if (m_storage == STORAGE_BUFFER && m_useAtomic)
543 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
544 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
545 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
546 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
547 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
548 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
549 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
550 else
551 DE_ASSERT(DE_FALSE);
552 }
553
554 // barrier
555
556 buf << genBarrierSource();
557
558 // read
559
560 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
561 {
562 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
563
564 if (m_storage == STORAGE_BUFFER && m_useAtomic)
565 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
566 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
567 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
568 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
569 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
570 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
571 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
572 else
573 DE_ASSERT(DE_FALSE);
574 }
575
576 return buf.str();
577 }
578
579 class InvocationReadWriteCase : public InvocationBasicCase
580 {
581 public:
582 InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
583 private:
584 std::string genShaderMainBlock (void) const;
585 };
586
InvocationReadWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)587 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
588 : InvocationBasicCase(context, name, desc, storage, flags)
589 {
590 }
591
genShaderMainBlock(void) const592 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
593 {
594 std::ostringstream buf;
595
596 // read
597
598 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
599 {
600 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
601
602 if (m_storage == STORAGE_BUFFER && m_useAtomic)
603 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
604 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
605 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
606 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
607 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
608 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
609 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
610 else
611 DE_ASSERT(DE_FALSE);
612 }
613
614 // barrier
615
616 buf << genBarrierSource();
617
618 // write
619
620 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
621 {
622 if (m_storage == STORAGE_BUFFER && m_useAtomic)
623 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
624 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
625 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
626 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
627 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
628 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
629 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
630 else
631 DE_ASSERT(DE_FALSE);
632 }
633
634 return buf.str();
635 }
636
637 class InvocationOverWriteCase : public InvocationBasicCase
638 {
639 public:
640 InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
641 private:
642 std::string genShaderMainBlock (void) const;
643 };
644
InvocationOverWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)645 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
646 : InvocationBasicCase(context, name, desc, storage, flags)
647 {
648 }
649
genShaderMainBlock(void) const650 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
651 {
652 std::ostringstream buf;
653
654 // write
655
656 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
657 {
658 if (m_storage == STORAGE_BUFFER && m_useAtomic)
659 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
660 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
661 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
662 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
663 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
664 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
665 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
666 else
667 DE_ASSERT(DE_FALSE);
668 }
669
670 // barrier
671
672 buf << genBarrierSource();
673
674 // write over
675
676 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
677 {
678 // write another invocation's value or our own value depending on test type
679 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
680
681 if (m_storage == STORAGE_BUFFER && m_useAtomic)
682 buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
683 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
684 buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
685 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
686 buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
687 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
688 buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
689 else
690 DE_ASSERT(DE_FALSE);
691 }
692
693 // barrier
694
695 buf << genBarrierSource();
696
697 // read
698
699 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
700 {
701 // check another invocation's value or our own value depending on test type
702 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
703
704 if (m_storage == STORAGE_BUFFER && m_useAtomic)
705 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
706 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
707 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
708 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
709 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
710 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
711 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
712 else
713 DE_ASSERT(DE_FALSE);
714 }
715
716 return buf.str();
717 }
718
719 class InvocationAliasWriteCase : public InterInvocationTestCase
720 {
721 public:
722 enum TestType
723 {
724 TYPE_WRITE = 0,
725 TYPE_OVERWRITE,
726
727 TYPE_LAST
728 };
729
730 InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
731 private:
732 std::string genShaderSource (void) const;
733
734 const TestType m_type;
735 };
736
InvocationAliasWriteCase(Context & context,const char * name,const char * desc,TestType type,StorageType storage,int flags)737 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
738 : InterInvocationTestCase (context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
739 , m_type (type)
740 {
741 DE_ASSERT(type < TYPE_LAST);
742 }
743
genShaderSource(void) const744 std::string InvocationAliasWriteCase::genShaderSource (void) const
745 {
746 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
747 std::ostringstream buf;
748
749 buf << "${GLSL_VERSION_DECL}\n"
750 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
751 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
752 << "layout(binding=0, std430) buffer Output\n"
753 << "{\n"
754 << " highp int values[];\n"
755 << "} sb_result;\n";
756
757 if (m_storage == STORAGE_BUFFER)
758 buf << "layout(binding=1, std430) coherent buffer Storage0\n"
759 << "{\n"
760 << " highp int values[];\n"
761 << "} sb_store0;\n"
762 << "layout(binding=2, std430) coherent buffer Storage1\n"
763 << "{\n"
764 << " highp int values[];\n"
765 << "} sb_store1;\n"
766 << "\n"
767 << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
768 << "{\n"
769 << " highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
770 << " return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
771 << "}\n";
772 else if (m_storage == STORAGE_IMAGE)
773 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
774 << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
775 << "\n"
776 << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
777 << "{\n"
778 << " return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
779 << "}\n";
780 else
781 DE_ASSERT(DE_FALSE);
782
783 buf << "\n"
784 << "void main (void)\n"
785 << "{\n"
786 << " int resultNdx = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
787 << " int groupNdx = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
788 << " bool allOk = true;\n"
789 << "\n";
790
791 if (m_type == TYPE_OVERWRITE)
792 {
793 // write
794
795 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
796 {
797 if (m_storage == STORAGE_BUFFER && m_useAtomic)
798 buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
799 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
800 buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
801 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
802 buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
803 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
804 buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
805 else
806 DE_ASSERT(DE_FALSE);
807 }
808
809 // barrier
810
811 buf << genBarrierSource();
812 }
813 else
814 DE_ASSERT(m_type == TYPE_WRITE);
815
816 // write (again)
817
818 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
819 {
820 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
821
822 if (m_storage == STORAGE_BUFFER && m_useAtomic)
823 buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
824 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
825 buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
826 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
827 buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
828 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
829 buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
830 else
831 DE_ASSERT(DE_FALSE);
832 }
833
834 // barrier
835
836 buf << genBarrierSource();
837
838 // read
839
840 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
841 {
842 if (m_storage == STORAGE_BUFFER && m_useAtomic)
843 buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
844 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
845 buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
846 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
847 buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
848 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
849 buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
850 else
851 DE_ASSERT(DE_FALSE);
852 }
853
854 // return result
855
856 buf << "\n"
857 << " sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
858 << "}\n";
859
860 return specializeShader(m_context, buf.str().c_str());
861 }
862
863 namespace op
864 {
865
866 struct WriteData
867 {
868 int targetHandle;
869 int seed;
870
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::WriteData871 static WriteData Generate(int targetHandle, int seed)
872 {
873 WriteData retVal;
874
875 retVal.targetHandle = targetHandle;
876 retVal.seed = seed;
877
878 return retVal;
879 }
880 };
881
882 struct ReadData
883 {
884 int targetHandle;
885 int seed;
886
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadData887 static ReadData Generate(int targetHandle, int seed)
888 {
889 ReadData retVal;
890
891 retVal.targetHandle = targetHandle;
892 retVal.seed = seed;
893
894 return retVal;
895 }
896 };
897
898 struct Barrier
899 {
900 };
901
902 struct WriteDataInterleaved
903 {
904 int targetHandle;
905 int seed;
906 bool evenOdd;
907
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::WriteDataInterleaved908 static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
909 {
910 WriteDataInterleaved retVal;
911
912 retVal.targetHandle = targetHandle;
913 retVal.seed = seed;
914 retVal.evenOdd = evenOdd;
915
916 return retVal;
917 }
918 };
919
920 struct ReadDataInterleaved
921 {
922 int targetHandle;
923 int seed0;
924 int seed1;
925
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadDataInterleaved926 static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
927 {
928 ReadDataInterleaved retVal;
929
930 retVal.targetHandle = targetHandle;
931 retVal.seed0 = seed0;
932 retVal.seed1 = seed1;
933
934 return retVal;
935 }
936 };
937
938 struct ReadMultipleData
939 {
940 int targetHandle0;
941 int seed0;
942 int targetHandle1;
943 int seed1;
944
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadMultipleData945 static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
946 {
947 ReadMultipleData retVal;
948
949 retVal.targetHandle0 = targetHandle0;
950 retVal.seed0 = seed0;
951 retVal.targetHandle1 = targetHandle1;
952 retVal.seed1 = seed1;
953
954 return retVal;
955 }
956 };
957
958 struct ReadZeroData
959 {
960 int targetHandle;
961
Generatedeqp::gles31::Functional::__anon1d296c8a0111::op::ReadZeroData962 static ReadZeroData Generate(int targetHandle)
963 {
964 ReadZeroData retVal;
965
966 retVal.targetHandle = targetHandle;
967
968 return retVal;
969 }
970 };
971
972 } // namespace op
973
974 class InterCallTestCase;
975
976 class InterCallOperations
977 {
978 public:
979 InterCallOperations& operator<< (const op::WriteData&);
980 InterCallOperations& operator<< (const op::ReadData&);
981 InterCallOperations& operator<< (const op::Barrier&);
982 InterCallOperations& operator<< (const op::ReadMultipleData&);
983 InterCallOperations& operator<< (const op::WriteDataInterleaved&);
984 InterCallOperations& operator<< (const op::ReadDataInterleaved&);
985 InterCallOperations& operator<< (const op::ReadZeroData&);
986
987 private:
988 struct Command
989 {
990 enum CommandType
991 {
992 TYPE_WRITE = 0,
993 TYPE_READ,
994 TYPE_BARRIER,
995 TYPE_READ_MULTIPLE,
996 TYPE_WRITE_INTERLEAVE,
997 TYPE_READ_INTERLEAVE,
998 TYPE_READ_ZERO,
999
1000 TYPE_LAST
1001 };
1002
1003 CommandType type;
1004
1005 union CommandUnion
1006 {
1007 op::WriteData write;
1008 op::ReadData read;
1009 op::Barrier barrier;
1010 op::ReadMultipleData readMulti;
1011 op::WriteDataInterleaved writeInterleave;
1012 op::ReadDataInterleaved readInterleave;
1013 op::ReadZeroData readZero;
1014 } u_cmd;
1015 };
1016
1017 friend class InterCallTestCase;
1018
1019 std::vector<Command> m_cmds;
1020 };
1021
operator <<(const op::WriteData & cmd)1022 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1023 {
1024 m_cmds.push_back(Command());
1025 m_cmds.back().type = Command::TYPE_WRITE;
1026 m_cmds.back().u_cmd.write = cmd;
1027
1028 return *this;
1029 }
1030
operator <<(const op::ReadData & cmd)1031 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1032 {
1033 m_cmds.push_back(Command());
1034 m_cmds.back().type = Command::TYPE_READ;
1035 m_cmds.back().u_cmd.read = cmd;
1036
1037 return *this;
1038 }
1039
operator <<(const op::Barrier & cmd)1040 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1041 {
1042 m_cmds.push_back(Command());
1043 m_cmds.back().type = Command::TYPE_BARRIER;
1044 m_cmds.back().u_cmd.barrier = cmd;
1045
1046 return *this;
1047 }
1048
operator <<(const op::ReadMultipleData & cmd)1049 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1050 {
1051 m_cmds.push_back(Command());
1052 m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1053 m_cmds.back().u_cmd.readMulti = cmd;
1054
1055 return *this;
1056 }
1057
operator <<(const op::WriteDataInterleaved & cmd)1058 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1059 {
1060 m_cmds.push_back(Command());
1061 m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1062 m_cmds.back().u_cmd.writeInterleave = cmd;
1063
1064 return *this;
1065 }
1066
operator <<(const op::ReadDataInterleaved & cmd)1067 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1068 {
1069 m_cmds.push_back(Command());
1070 m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1071 m_cmds.back().u_cmd.readInterleave = cmd;
1072
1073 return *this;
1074 }
1075
operator <<(const op::ReadZeroData & cmd)1076 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1077 {
1078 m_cmds.push_back(Command());
1079 m_cmds.back().type = Command::TYPE_READ_ZERO;
1080 m_cmds.back().u_cmd.readZero = cmd;
1081
1082 return *this;
1083 }
1084
1085 class InterCallTestCase : public TestCase
1086 {
1087 public:
1088 enum StorageType
1089 {
1090 STORAGE_BUFFER = 0,
1091 STORAGE_IMAGE,
1092
1093 STORAGE_LAST
1094 };
1095 enum Flags
1096 {
1097 FLAG_USE_ATOMIC = 1,
1098 FLAG_USE_INT = 2,
1099 };
1100 InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1101 ~InterCallTestCase (void);
1102
1103 private:
1104 void init (void);
1105 void deinit (void);
1106 IterateResult iterate (void);
1107 bool verifyResults (void);
1108
1109 void runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1110 void runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1111 void runCommand (const op::Barrier&);
1112 void runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1113 void runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1114 void runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1115 void runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1116 void runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117
1118 glw::GLuint genStorage (int friendlyName);
1119 glw::GLuint genResultStorage (void);
1120 glu::ShaderProgram* genWriteProgram (int seed);
1121 glu::ShaderProgram* genReadProgram (int seed);
1122 glu::ShaderProgram* genReadMultipleProgram (int seed0, int seed1);
1123 glu::ShaderProgram* genWriteInterleavedProgram (int seed, bool evenOdd);
1124 glu::ShaderProgram* genReadInterleavedProgram (int seed0, int seed1);
1125 glu::ShaderProgram* genReadZeroProgram (void);
1126
1127 const StorageType m_storage;
1128 const int m_invocationGridSize; // !< width and height of the two dimensional work dispatch
1129 const int m_perInvocationSize; // !< number of elements accessed in single invocation
1130 const std::vector<InterCallOperations::Command> m_cmds;
1131 const bool m_useAtomic;
1132 const bool m_formatInteger;
1133
1134 std::vector<glu::ShaderProgram*> m_operationPrograms;
1135 std::vector<glw::GLuint> m_operationResultStorages;
1136 std::map<int, glw::GLuint> m_storageIDs;
1137 };
1138
InterCallTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags,const InterCallOperations & ops)1139 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1140 : TestCase (context, name, desc)
1141 , m_storage (storage)
1142 , m_invocationGridSize (512)
1143 , m_perInvocationSize (2)
1144 , m_cmds (ops.m_cmds)
1145 , m_useAtomic ((flags & FLAG_USE_ATOMIC) != 0)
1146 , m_formatInteger ((flags & FLAG_USE_INT) != 0)
1147 {
1148 }
1149
~InterCallTestCase(void)1150 InterCallTestCase::~InterCallTestCase (void)
1151 {
1152 deinit();
1153 }
1154
init(void)1155 void InterCallTestCase::init (void)
1156 {
1157 int programFriendlyName = 0;
1158 const bool supportsES32 = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
1159
1160 // requirements
1161
1162 if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1163 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1164
1165 // generate resources and validate command list
1166
1167 m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1168 m_operationResultStorages.resize(m_cmds.size(), 0);
1169
1170 for (int step = 0; step < (int)m_cmds.size(); ++step)
1171 {
1172 switch (m_cmds[step].type)
1173 {
1174 case InterCallOperations::Command::TYPE_WRITE:
1175 {
1176 const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1177
1178 // new storage handle?
1179 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1180 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1181
1182 // program
1183 {
1184 glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1185
1186 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1187 m_testCtx.getLog() << *program;
1188
1189 if (!program->isOk())
1190 throw tcu::TestError("could not build program");
1191
1192 m_operationPrograms[step] = program;
1193 }
1194 break;
1195 }
1196
1197 case InterCallOperations::Command::TYPE_READ:
1198 {
1199 const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1200 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1201
1202 // program and result storage
1203 {
1204 glu::ShaderProgram* program = genReadProgram(cmd.seed);
1205
1206 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1207 m_testCtx.getLog() << *program;
1208
1209 if (!program->isOk())
1210 throw tcu::TestError("could not build program");
1211
1212 m_operationPrograms[step] = program;
1213 m_operationResultStorages[step] = genResultStorage();
1214 }
1215 break;
1216 }
1217
1218 case InterCallOperations::Command::TYPE_BARRIER:
1219 {
1220 break;
1221 }
1222
1223 case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1224 {
1225 const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1226 DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1227 DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1228
1229 // program
1230 {
1231 glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1232
1233 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1234 m_testCtx.getLog() << *program;
1235
1236 if (!program->isOk())
1237 throw tcu::TestError("could not build program");
1238
1239 m_operationPrograms[step] = program;
1240 m_operationResultStorages[step] = genResultStorage();
1241 }
1242 break;
1243 }
1244
1245 case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1246 {
1247 const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1248
1249 // new storage handle?
1250 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1251 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1252
1253 // program
1254 {
1255 glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1256
1257 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1258 m_testCtx.getLog() << *program;
1259
1260 if (!program->isOk())
1261 throw tcu::TestError("could not build program");
1262
1263 m_operationPrograms[step] = program;
1264 }
1265 break;
1266 }
1267
1268 case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1269 {
1270 const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1271 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1272
1273 // program
1274 {
1275 glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1276
1277 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1278 m_testCtx.getLog() << *program;
1279
1280 if (!program->isOk())
1281 throw tcu::TestError("could not build program");
1282
1283 m_operationPrograms[step] = program;
1284 m_operationResultStorages[step] = genResultStorage();
1285 }
1286 break;
1287 }
1288
1289 case InterCallOperations::Command::TYPE_READ_ZERO:
1290 {
1291 const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1292
1293 // new storage handle?
1294 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1295 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1296
1297 // program
1298 {
1299 glu::ShaderProgram* program = genReadZeroProgram();
1300
1301 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1302 m_testCtx.getLog() << *program;
1303
1304 if (!program->isOk())
1305 throw tcu::TestError("could not build program");
1306
1307 m_operationPrograms[step] = program;
1308 m_operationResultStorages[step] = genResultStorage();
1309 }
1310 break;
1311 }
1312
1313 default:
1314 DE_ASSERT(DE_FALSE);
1315 }
1316 }
1317 }
1318
deinit(void)1319 void InterCallTestCase::deinit (void)
1320 {
1321 // programs
1322 for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1323 delete m_operationPrograms[ndx];
1324 m_operationPrograms.clear();
1325
1326 // result storages
1327 for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1328 {
1329 if (m_operationResultStorages[ndx])
1330 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1331 }
1332 m_operationResultStorages.clear();
1333
1334 // storage
1335 for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1336 {
1337 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1338
1339 if (m_storage == STORAGE_BUFFER)
1340 gl.deleteBuffers(1, &it->second);
1341 else if (m_storage == STORAGE_IMAGE)
1342 gl.deleteTextures(1, &it->second);
1343 else
1344 DE_ASSERT(DE_FALSE);
1345 }
1346 m_storageIDs.clear();
1347 }
1348
iterate(void)1349 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1350 {
1351 int programFriendlyName = 0;
1352 int resultStorageFriendlyName = 0;
1353
1354 m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1355
1356 // run steps
1357
1358 for (int step = 0; step < (int)m_cmds.size(); ++step)
1359 {
1360 switch (m_cmds[step].type)
1361 {
1362 case InterCallOperations::Command::TYPE_WRITE: runCommand(m_cmds[step].u_cmd.write, step, programFriendlyName); break;
1363 case InterCallOperations::Command::TYPE_READ: runCommand(m_cmds[step].u_cmd.read, step, programFriendlyName, resultStorageFriendlyName); break;
1364 case InterCallOperations::Command::TYPE_BARRIER: runCommand(m_cmds[step].u_cmd.barrier); break;
1365 case InterCallOperations::Command::TYPE_READ_MULTIPLE: runCommand(m_cmds[step].u_cmd.readMulti, step, programFriendlyName, resultStorageFriendlyName); break;
1366 case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE: runCommand(m_cmds[step].u_cmd.writeInterleave, step, programFriendlyName); break;
1367 case InterCallOperations::Command::TYPE_READ_INTERLEAVE: runCommand(m_cmds[step].u_cmd.readInterleave, step, programFriendlyName, resultStorageFriendlyName); break;
1368 case InterCallOperations::Command::TYPE_READ_ZERO: runCommand(m_cmds[step].u_cmd.readZero, step, programFriendlyName, resultStorageFriendlyName); break;
1369 default:
1370 DE_ASSERT(DE_FALSE);
1371 }
1372 }
1373
1374 // read results from result buffers
1375 if (verifyResults())
1376 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1377 else
1378 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1379
1380 return STOP;
1381 }
1382
verifyResults(void)1383 bool InterCallTestCase::verifyResults (void)
1384 {
1385 int resultBufferFriendlyName = 0;
1386 bool allResultsOk = true;
1387 bool anyResult = false;
1388
1389 m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1390
1391 for (int step = 0; step < (int)m_cmds.size(); ++step)
1392 {
1393 const int errorFloodThreshold = 5;
1394 int numErrorsLogged = 0;
1395
1396 if (m_operationResultStorages[step])
1397 {
1398 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1399 const void* mapped = DE_NULL;
1400 std::vector<deInt32> results (m_invocationGridSize * m_invocationGridSize);
1401 bool error = false;
1402
1403 anyResult = true;
1404
1405 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1406 mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1407 GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1408
1409 // copy to properly aligned array
1410 deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1411
1412 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1413 throw tcu::TestError("memory map store corrupted");
1414
1415 // check the results
1416 for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1417 {
1418 if (results[ndx] != 1)
1419 {
1420 error = true;
1421
1422 if (numErrorsLogged == 0)
1423 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1424 if (numErrorsLogged++ < errorFloodThreshold)
1425 m_testCtx.getLog() << tcu::TestLog::Message << " Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1426 else
1427 {
1428 // after N errors, no point continuing verification
1429 m_testCtx.getLog() << tcu::TestLog::Message << " -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1430 break;
1431 }
1432 }
1433 }
1434
1435 if (error)
1436 {
1437 allResultsOk = false;
1438 }
1439 else
1440 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1441 }
1442 }
1443
1444 DE_ASSERT(anyResult);
1445 DE_UNREF(anyResult);
1446
1447 return allResultsOk;
1448 }
1449
runCommand(const op::WriteData & cmd,int stepNdx,int & programFriendlyName)1450 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1451 {
1452 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1453
1454 m_testCtx.getLog()
1455 << tcu::TestLog::Message
1456 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1457 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1458 << tcu::TestLog::EndMessage;
1459
1460 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1461
1462 // set destination
1463 if (m_storage == STORAGE_BUFFER)
1464 {
1465 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1466
1467 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1468 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1469 }
1470 else if (m_storage == STORAGE_IMAGE)
1471 {
1472 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1473
1474 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1475 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1476 }
1477 else
1478 DE_ASSERT(DE_FALSE);
1479
1480 // calc
1481 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1482 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1483 }
1484
runCommand(const op::ReadData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1485 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1486 {
1487 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1488 }
1489
runCommand(const op::Barrier & cmd)1490 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1491 {
1492 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1493
1494 DE_UNREF(cmd);
1495
1496 if (m_storage == STORAGE_BUFFER)
1497 {
1498 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1499 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1500 }
1501 else if (m_storage == STORAGE_IMAGE)
1502 {
1503 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1504 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1505 }
1506 else
1507 DE_ASSERT(DE_FALSE);
1508 }
1509
runCommand(const op::ReadMultipleData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1510 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1511 {
1512 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1513
1514 m_testCtx.getLog()
1515 << tcu::TestLog::Message
1516 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1517 << " Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1518 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1519 << tcu::TestLog::EndMessage;
1520
1521 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1522
1523 // set sources
1524 if (m_storage == STORAGE_BUFFER)
1525 {
1526 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1527 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1528
1529 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1530 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1531 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1532 }
1533 else if (m_storage == STORAGE_IMAGE)
1534 {
1535 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1536 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1537
1538 gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1539 gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1540 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1541 }
1542 else
1543 DE_ASSERT(DE_FALSE);
1544
1545 // set destination
1546 DE_ASSERT(m_operationResultStorages[stepNdx]);
1547 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1548 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1549
1550 // calc
1551 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1552 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1553 }
1554
runCommand(const op::WriteDataInterleaved & cmd,int stepNdx,int & programFriendlyName)1555 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1556 {
1557 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1558
1559 m_testCtx.getLog()
1560 << tcu::TestLog::Message
1561 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1562 << " Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1563 << " Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1564 << tcu::TestLog::EndMessage;
1565
1566 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1567
1568 // set destination
1569 if (m_storage == STORAGE_BUFFER)
1570 {
1571 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1572
1573 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1574 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1575 }
1576 else if (m_storage == STORAGE_IMAGE)
1577 {
1578 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1579
1580 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1581 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1582 }
1583 else
1584 DE_ASSERT(DE_FALSE);
1585
1586 // calc
1587 gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1588 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1589 }
1590
runCommand(const op::ReadDataInterleaved & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1591 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1592 {
1593 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1594 }
1595
runCommand(const op::ReadZeroData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1596 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597 {
1598 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1599 }
1600
runSingleRead(int targetHandle,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1601 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602 {
1603 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1604
1605 m_testCtx.getLog()
1606 << tcu::TestLog::Message
1607 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1608 << " Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1609 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1610 << tcu::TestLog::EndMessage;
1611
1612 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1613
1614 // set source
1615 if (m_storage == STORAGE_BUFFER)
1616 {
1617 DE_ASSERT(m_storageIDs[targetHandle]);
1618
1619 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1620 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1621 }
1622 else if (m_storage == STORAGE_IMAGE)
1623 {
1624 DE_ASSERT(m_storageIDs[targetHandle]);
1625
1626 gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1627 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1628 }
1629 else
1630 DE_ASSERT(DE_FALSE);
1631
1632 // set destination
1633 DE_ASSERT(m_operationResultStorages[stepNdx]);
1634 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1635 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1636
1637 // calc
1638 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1639 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1640 }
1641
genStorage(int friendlyName)1642 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1643 {
1644 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1645
1646 if (m_storage == STORAGE_BUFFER)
1647 {
1648 const int numElements = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1649 const int bufferSize = numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1650 glw::GLuint retVal = 0;
1651
1652 m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1653
1654 gl.genBuffers(1, &retVal);
1655 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1656
1657 if (m_formatInteger)
1658 {
1659 const std::vector<deUint32> zeroBuffer(numElements, 0);
1660 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1661 }
1662 else
1663 {
1664 const std::vector<float> zeroBuffer(numElements, 0.0f);
1665 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666 }
1667 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1668
1669 return retVal;
1670 }
1671 else if (m_storage == STORAGE_IMAGE)
1672 {
1673 const int imageWidth = m_invocationGridSize;
1674 const int imageHeight = m_invocationGridSize * m_perInvocationSize;
1675 glw::GLuint retVal = 0;
1676
1677 m_testCtx.getLog()
1678 << tcu::TestLog::Message
1679 << "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1680 << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1681 << ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1682 << tcu::TestLog::EndMessage;
1683
1684 gl.genTextures(1, &retVal);
1685 gl.bindTexture(GL_TEXTURE_2D, retVal);
1686
1687 if (m_formatInteger)
1688 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1689 else
1690 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1691
1692 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1693 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1694 GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1695
1696 m_testCtx.getLog()
1697 << tcu::TestLog::Message
1698 << "Filling image with 0"
1699 << tcu::TestLog::EndMessage;
1700
1701 if (m_formatInteger)
1702 {
1703 const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1704 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1705 }
1706 else
1707 {
1708 const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1709 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1710 }
1711
1712 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1713
1714 return retVal;
1715 }
1716 else
1717 {
1718 DE_ASSERT(DE_FALSE);
1719 return 0;
1720 }
1721 }
1722
genResultStorage(void)1723 glw::GLuint InterCallTestCase::genResultStorage (void)
1724 {
1725 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1726 glw::GLuint retVal = 0;
1727
1728 gl.genBuffers(1, &retVal);
1729 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1730 gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1731 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1732
1733 return retVal;
1734 }
1735
genWriteProgram(int seed)1736 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1737 {
1738 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1739 std::ostringstream buf;
1740
1741 buf << "${GLSL_VERSION_DECL}\n"
1742 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1743 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1744
1745 if (m_storage == STORAGE_BUFFER)
1746 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1747 << "{\n"
1748 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1749 << "} sb_out;\n";
1750 else if (m_storage == STORAGE_IMAGE)
1751 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1752 else
1753 DE_ASSERT(DE_FALSE);
1754
1755 buf << "\n"
1756 << "void main (void)\n"
1757 << "{\n"
1758 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1759 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1760 << "\n";
1761
1762 // Write to buffer/image m_perInvocationSize elements
1763 if (m_storage == STORAGE_BUFFER)
1764 {
1765 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1766 {
1767 if (m_useAtomic)
1768 buf << " atomicExchange(";
1769 else
1770 buf << " ";
1771
1772 buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1773
1774 if (m_useAtomic)
1775 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1776 else
1777 buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1778 }
1779 }
1780 else if (m_storage == STORAGE_IMAGE)
1781 {
1782 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1783 {
1784 if (m_useAtomic)
1785 buf << " imageAtomicExchange";
1786 else
1787 buf << " imageStore";
1788
1789 buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1790
1791 if (m_useAtomic)
1792 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1793 else
1794 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1795 }
1796 }
1797 else
1798 DE_ASSERT(DE_FALSE);
1799
1800 buf << "}\n";
1801
1802 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1803 }
1804
genReadProgram(int seed)1805 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1806 {
1807 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1808 std::ostringstream buf;
1809
1810 buf << "${GLSL_VERSION_DECL}\n"
1811 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1812 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1813
1814 if (m_storage == STORAGE_BUFFER)
1815 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1816 << "{\n"
1817 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1818 << "} sb_in;\n";
1819 else if (m_storage == STORAGE_IMAGE)
1820 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1821 else
1822 DE_ASSERT(DE_FALSE);
1823
1824 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1825 << "{\n"
1826 << " highp int resultOk[];\n"
1827 << "} sb_result;\n"
1828 << "\n"
1829 << "void main (void)\n"
1830 << "{\n"
1831 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1832 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1833 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1834 << " bool allOk = true;\n"
1835 << "\n";
1836
1837 // Verify data
1838
1839 if (m_storage == STORAGE_BUFFER)
1840 {
1841 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1842 {
1843 if (!m_useAtomic)
1844 buf << " allOk = allOk && (sb_in.values[(groupNdx + "
1845 << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1846 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1847 else
1848 buf << " allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1849 << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1850 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1851 }
1852 }
1853 else if (m_storage == STORAGE_IMAGE)
1854 {
1855 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1856 {
1857 if (!m_useAtomic)
1858 buf << " allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1859 << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1860 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1861 else
1862 buf << " allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1863 << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1864 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1865 }
1866 }
1867 else
1868 DE_ASSERT(DE_FALSE);
1869
1870 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1871 << "}\n";
1872
1873 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1874 }
1875
genReadMultipleProgram(int seed0,int seed1)1876 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1877 {
1878 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1879 std::ostringstream buf;
1880
1881 buf << "${GLSL_VERSION_DECL}\n"
1882 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1883 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1884
1885 if (m_storage == STORAGE_BUFFER)
1886 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1887 << "{\n"
1888 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1889 << "} sb_in0;\n"
1890 << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1891 << "{\n"
1892 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1893 << "} sb_in1;\n";
1894 else if (m_storage == STORAGE_IMAGE)
1895 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1896 << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1897 else
1898 DE_ASSERT(DE_FALSE);
1899
1900 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1901 << "{\n"
1902 << " highp int resultOk[];\n"
1903 << "} sb_result;\n"
1904 << "\n"
1905 << "void main (void)\n"
1906 << "{\n"
1907 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1908 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1909 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1910 << " bool allOk = true;\n"
1911 << "\n";
1912
1913 // Verify data
1914
1915 if (m_storage == STORAGE_BUFFER)
1916 {
1917 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1918 buf << " allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1919 << " allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1920 }
1921 else if (m_storage == STORAGE_IMAGE)
1922 {
1923 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1924 buf << " allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1925 << " allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1926 }
1927 else
1928 DE_ASSERT(DE_FALSE);
1929
1930 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1931 << "}\n";
1932
1933 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1934 }
1935
genWriteInterleavedProgram(int seed,bool evenOdd)1936 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1937 {
1938 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1939 std::ostringstream buf;
1940
1941 buf << "${GLSL_VERSION_DECL}\n"
1942 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1943 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1944
1945 if (m_storage == STORAGE_BUFFER)
1946 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1947 << "{\n"
1948 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1949 << "} sb_out;\n";
1950 else if (m_storage == STORAGE_IMAGE)
1951 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1952 else
1953 DE_ASSERT(DE_FALSE);
1954
1955 buf << "\n"
1956 << "void main (void)\n"
1957 << "{\n"
1958 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1959 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1960 << "\n";
1961
1962 // Write to buffer/image m_perInvocationSize elements
1963 if (m_storage == STORAGE_BUFFER)
1964 {
1965 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1966 {
1967 if (m_useAtomic)
1968 buf << " atomicExchange(";
1969 else
1970 buf << " ";
1971
1972 buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1973
1974 if (m_useAtomic)
1975 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1976 else
1977 buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1978 }
1979 }
1980 else if (m_storage == STORAGE_IMAGE)
1981 {
1982 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1983 {
1984 if (m_useAtomic)
1985 buf << " imageAtomicExchange";
1986 else
1987 buf << " imageStore";
1988
1989 buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1990
1991 if (m_useAtomic)
1992 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1993 else
1994 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1995 }
1996 }
1997 else
1998 DE_ASSERT(DE_FALSE);
1999
2000 buf << "}\n";
2001
2002 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2003 }
2004
genReadInterleavedProgram(int seed0,int seed1)2005 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2006 {
2007 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2008 std::ostringstream buf;
2009
2010 buf << "${GLSL_VERSION_DECL}\n"
2011 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2012 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2013
2014 if (m_storage == STORAGE_BUFFER)
2015 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2016 << "{\n"
2017 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2018 << "} sb_in;\n";
2019 else if (m_storage == STORAGE_IMAGE)
2020 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2021 else
2022 DE_ASSERT(DE_FALSE);
2023
2024 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2025 << "{\n"
2026 << " highp int resultOk[];\n"
2027 << "} sb_result;\n"
2028 << "\n"
2029 << "void main (void)\n"
2030 << "{\n"
2031 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2032 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2033 << " int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2034 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2035 << " bool allOk = true;\n"
2036 << "\n";
2037
2038 // Verify data
2039
2040 if (m_storage == STORAGE_BUFFER)
2041 {
2042 buf << " if (groupNdx % 2 == 0)\n"
2043 << " {\n";
2044 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2045 buf << " allOk = allOk && ("
2046 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2047 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2048 buf << " }\n"
2049 << " else\n"
2050 << " {\n";
2051 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2052 buf << " allOk = allOk && ("
2053 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2054 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2055 buf << " }\n";
2056 }
2057 else if (m_storage == STORAGE_IMAGE)
2058 {
2059 buf << " if (groupNdx % 2 == 0)\n"
2060 << " {\n";
2061 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2062 buf << " allOk = allOk && ("
2063 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2064 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2065 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2066 buf << " }\n"
2067 << " else\n"
2068 << " {\n";
2069 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2070 buf << " allOk = allOk && ("
2071 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2072 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2073 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2074 buf << " }\n";
2075 }
2076 else
2077 DE_ASSERT(DE_FALSE);
2078
2079 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2080 << "}\n";
2081
2082 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2083 }
2084
genReadZeroProgram(void)2085 glu::ShaderProgram* InterCallTestCase::genReadZeroProgram (void)
2086 {
2087 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2088 std::ostringstream buf;
2089
2090 buf << "${GLSL_VERSION_DECL}\n"
2091 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2092 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2093
2094 if (m_storage == STORAGE_BUFFER)
2095 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2096 << "{\n"
2097 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2098 << "} sb_in;\n";
2099 else if (m_storage == STORAGE_IMAGE)
2100 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2101 else
2102 DE_ASSERT(DE_FALSE);
2103
2104 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2105 << "{\n"
2106 << " highp int resultOk[];\n"
2107 << "} sb_result;\n"
2108 << "\n"
2109 << "void main (void)\n"
2110 << "{\n"
2111 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2112 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2113 << " " << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2114 << " bool allOk = true;\n"
2115 << "\n";
2116
2117 // Verify data
2118
2119 if (m_storage == STORAGE_BUFFER)
2120 {
2121 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2122 buf << " allOk = allOk && ("
2123 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2124 << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2125 }
2126 else if (m_storage == STORAGE_IMAGE)
2127 {
2128 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2129 buf << " allOk = allOk && ("
2130 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2131 << ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2132 }
2133 else
2134 DE_ASSERT(DE_FALSE);
2135
2136 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2137 << "}\n";
2138
2139 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2140 }
2141
2142 class SSBOConcurrentAtomicCase : public TestCase
2143 {
2144 public:
2145
2146 SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2147 ~SSBOConcurrentAtomicCase (void);
2148
2149 void init (void);
2150 void deinit (void);
2151 IterateResult iterate (void);
2152
2153 private:
2154 std::string genComputeSource (void) const;
2155
2156 const int m_numCalls;
2157 const int m_workSize;
2158 glu::ShaderProgram* m_program;
2159 deUint32 m_bufferID;
2160 std::vector<deUint32> m_intermediateResultBuffers;
2161 };
2162
SSBOConcurrentAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2163 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2164 : TestCase (context, name, description)
2165 , m_numCalls (numCalls)
2166 , m_workSize (workSize)
2167 , m_program (DE_NULL)
2168 , m_bufferID (DE_NULL)
2169 {
2170 }
2171
~SSBOConcurrentAtomicCase(void)2172 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2173 {
2174 deinit();
2175 }
2176
init(void)2177 void SSBOConcurrentAtomicCase::init (void)
2178 {
2179 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2180 std::vector<deUint32> zeroData (m_workSize, 0);
2181
2182 // gen buffers
2183
2184 gl.genBuffers(1, &m_bufferID);
2185 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2186 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2187
2188 for (int ndx = 0; ndx < m_numCalls; ++ndx)
2189 {
2190 deUint32 buffer = 0;
2191
2192 gl.genBuffers(1, &buffer);
2193 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2194 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2195
2196 m_intermediateResultBuffers.push_back(buffer);
2197 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2198 }
2199
2200 // gen program
2201
2202 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2203 m_testCtx.getLog() << *m_program;
2204 if (!m_program->isOk())
2205 throw tcu::TestError("could not build program");
2206 }
2207
deinit(void)2208 void SSBOConcurrentAtomicCase::deinit (void)
2209 {
2210 if (m_bufferID)
2211 {
2212 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2213 m_bufferID = 0;
2214 }
2215
2216 for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2217 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2218 m_intermediateResultBuffers.clear();
2219
2220 delete m_program;
2221 m_program = DE_NULL;
2222 }
2223
iterate(void)2224 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2225 {
2226 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2227 const deUint32 sumValue = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2228 std::vector<int> deltas;
2229
2230 // generate unique deltas
2231 generateShuffledRamp(m_numCalls, deltas);
2232
2233 // invoke program N times, each with a different delta
2234 {
2235 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2236
2237 m_testCtx.getLog()
2238 << tcu::TestLog::Message
2239 << "Running shader " << m_numCalls << " times.\n"
2240 << "Num groups = (" << m_workSize << ", 1, 1)\n"
2241 << "Setting u_atomicDelta to a unique value for each call.\n"
2242 << tcu::TestLog::EndMessage;
2243
2244 if (deltaLocation == -1)
2245 throw tcu::TestError("u_atomicDelta location was -1");
2246
2247 gl.useProgram(m_program->getProgram());
2248 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2249
2250 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2251 {
2252 m_testCtx.getLog()
2253 << tcu::TestLog::Message
2254 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2255 << tcu::TestLog::EndMessage;
2256
2257 gl.uniform1ui(deltaLocation, deltas[callNdx]);
2258 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2259 gl.dispatchCompute(m_workSize, 1, 1);
2260 }
2261
2262 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2263 }
2264
2265 // Verify result
2266 {
2267 std::vector<deUint32> result;
2268
2269 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2270
2271 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2272 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2273
2274 for (int ndx = 0; ndx < m_workSize; ++ndx)
2275 {
2276 if (result[ndx] != sumValue)
2277 {
2278 m_testCtx.getLog()
2279 << tcu::TestLog::Message
2280 << "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2281 << "Work buffer contains invalid values."
2282 << tcu::TestLog::EndMessage;
2283
2284 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2285 return STOP;
2286 }
2287 }
2288
2289 m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2290 }
2291
2292 // verify steps
2293 {
2294 std::vector<std::vector<deUint32> > intermediateResults (m_numCalls);
2295 std::vector<deUint32> valueChain (m_numCalls);
2296
2297 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2298
2299 // collect results
2300
2301 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2302 {
2303 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2304 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2305 }
2306
2307 // verify values
2308
2309 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2310 {
2311 int invalidOperationNdx;
2312 deUint32 errorDelta;
2313 deUint32 errorExpected;
2314
2315 // collect result chain for each element
2316 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2317 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2318
2319 // check there exists a path from 0 to sumValue using each addition once
2320 // decompose cumulative results to addition operations (all additions positive => this works)
2321
2322 std::sort(valueChain.begin(), valueChain.end());
2323
2324 // validate chain
2325 if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2326 {
2327 m_testCtx.getLog()
2328 << tcu::TestLog::Message
2329 << "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2330 << "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2331 << tcu::TestLog::EndMessage;
2332
2333 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2334 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2335 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2336
2337 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2338 return STOP;
2339 }
2340 }
2341
2342 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2343 }
2344
2345 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2346 return STOP;
2347 }
2348
genComputeSource(void) const2349 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2350 {
2351 std::ostringstream buf;
2352
2353 buf << "${GLSL_VERSION_DECL}\n"
2354 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2355 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2356 << "{\n"
2357 << " highp uint values[" << m_workSize << "];\n"
2358 << "} sb_ires;\n"
2359 << "\n"
2360 << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2361 << "{\n"
2362 << " highp uint values[" << m_workSize << "];\n"
2363 << "} sb_work;\n"
2364 << "uniform highp uint u_atomicDelta;\n"
2365 << "\n"
2366 << "void main ()\n"
2367 << "{\n"
2368 << " highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2369 << " sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2370 << "}";
2371
2372 return specializeShader(m_context, buf.str().c_str());
2373 }
2374
2375 class ConcurrentAtomicCounterCase : public TestCase
2376 {
2377 public:
2378
2379 ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2380 ~ConcurrentAtomicCounterCase (void);
2381
2382 void init (void);
2383 void deinit (void);
2384 IterateResult iterate (void);
2385
2386 private:
2387 std::string genComputeSource (bool evenOdd) const;
2388
2389 const int m_numCalls;
2390 const int m_workSize;
2391 glu::ShaderProgram* m_evenProgram;
2392 glu::ShaderProgram* m_oddProgram;
2393 deUint32 m_counterBuffer;
2394 deUint32 m_intermediateResultBuffer;
2395 };
2396
ConcurrentAtomicCounterCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2397 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2398 : TestCase (context, name, description)
2399 , m_numCalls (numCalls)
2400 , m_workSize (workSize)
2401 , m_evenProgram (DE_NULL)
2402 , m_oddProgram (DE_NULL)
2403 , m_counterBuffer (DE_NULL)
2404 , m_intermediateResultBuffer(DE_NULL)
2405 {
2406 }
2407
~ConcurrentAtomicCounterCase(void)2408 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2409 {
2410 deinit();
2411 }
2412
init(void)2413 void ConcurrentAtomicCounterCase::init (void)
2414 {
2415 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2416 const std::vector<deUint32> zeroData (m_numCalls * m_workSize, 0);
2417
2418 // gen buffer
2419
2420 gl.genBuffers(1, &m_counterBuffer);
2421 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2422 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2423
2424 gl.genBuffers(1, &m_intermediateResultBuffer);
2425 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2426 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2427
2428 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2429
2430 // gen programs
2431
2432 {
2433 const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2434
2435 m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2436 m_testCtx.getLog() << *m_evenProgram;
2437 if (!m_evenProgram->isOk())
2438 throw tcu::TestError("could not build program");
2439 }
2440 {
2441 const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2442
2443 m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2444 m_testCtx.getLog() << *m_oddProgram;
2445 if (!m_oddProgram->isOk())
2446 throw tcu::TestError("could not build program");
2447 }
2448 }
2449
deinit(void)2450 void ConcurrentAtomicCounterCase::deinit (void)
2451 {
2452 if (m_counterBuffer)
2453 {
2454 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2455 m_counterBuffer = 0;
2456 }
2457 if (m_intermediateResultBuffer)
2458 {
2459 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2460 m_intermediateResultBuffer = 0;
2461 }
2462
2463 delete m_evenProgram;
2464 m_evenProgram = DE_NULL;
2465
2466 delete m_oddProgram;
2467 m_oddProgram = DE_NULL;
2468 }
2469
iterate(void)2470 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2471 {
2472 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2473
2474 // invoke program N times, each with a different delta
2475 {
2476 const int evenCallNdxLocation = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2477 const int oddCallNdxLocation = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2478
2479 m_testCtx.getLog()
2480 << tcu::TestLog::Message
2481 << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2482 << "Num groups = (" << m_workSize << ", 1, 1)\n"
2483 << tcu::TestLog::EndMessage;
2484
2485 if (evenCallNdxLocation == -1)
2486 throw tcu::TestError("u_callNdx location was -1");
2487 if (oddCallNdxLocation == -1)
2488 throw tcu::TestError("u_callNdx location was -1");
2489
2490 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2491 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
2492
2493 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2494 {
2495 gl.useProgram(m_evenProgram->getProgram());
2496 gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2497 gl.dispatchCompute(m_workSize, 1, 1);
2498
2499 gl.useProgram(m_oddProgram->getProgram());
2500 gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2501 gl.dispatchCompute(m_workSize, 1, 1);
2502 }
2503
2504 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2505 }
2506
2507 // Verify result
2508 {
2509 deUint32 result;
2510
2511 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2512
2513 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2514 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2515
2516 if ((int)result != m_numCalls*m_workSize)
2517 {
2518 m_testCtx.getLog()
2519 << tcu::TestLog::Message
2520 << "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2521 << tcu::TestLog::EndMessage;
2522
2523 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2524 return STOP;
2525 }
2526
2527 m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2528 }
2529
2530 // verify steps
2531 {
2532 std::vector<deUint32> intermediateResults;
2533
2534 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2535
2536 // collect results
2537
2538 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2539 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2540
2541 // verify values
2542
2543 std::sort(intermediateResults.begin(), intermediateResults.end());
2544
2545 for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2546 {
2547 if ((int)intermediateResults[valueNdx] != valueNdx)
2548 {
2549 m_testCtx.getLog()
2550 << tcu::TestLog::Message
2551 << "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2552 << "Intermediate buffer contains invalid values. Intermediate results:\n"
2553 << tcu::TestLog::EndMessage;
2554
2555 for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2556 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2557
2558 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2559 return STOP;
2560 }
2561 }
2562
2563 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2564 }
2565
2566 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2567 return STOP;
2568 }
2569
genComputeSource(bool evenOdd) const2570 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2571 {
2572 std::ostringstream buf;
2573
2574 buf << "${GLSL_VERSION_DECL}\n"
2575 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2576 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2577 << "{\n"
2578 << " highp uint values[" << m_workSize * m_numCalls << "];\n"
2579 << "} sb_ires;\n"
2580 << "\n"
2581 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
2582 << "uniform highp uint u_callNdx;\n"
2583 << "\n"
2584 << "void main ()\n"
2585 << "{\n"
2586 << " highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2587 << " if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2588 << " sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2589 << "}";
2590
2591 return specializeShader(m_context, buf.str().c_str());
2592 }
2593
2594 class ConcurrentImageAtomicCase : public TestCase
2595 {
2596 public:
2597
2598 ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2599 ~ConcurrentImageAtomicCase (void);
2600
2601 void init (void);
2602 void deinit (void);
2603 IterateResult iterate (void);
2604
2605 private:
2606 void readWorkImage (std::vector<deUint32>& result);
2607
2608 std::string genComputeSource (void) const;
2609 std::string genImageReadSource (void) const;
2610 std::string genImageClearSource (void) const;
2611
2612 const int m_numCalls;
2613 const int m_workSize;
2614 glu::ShaderProgram* m_program;
2615 glu::ShaderProgram* m_imageReadProgram;
2616 glu::ShaderProgram* m_imageClearProgram;
2617 deUint32 m_imageID;
2618 std::vector<deUint32> m_intermediateResultBuffers;
2619 };
2620
ConcurrentImageAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2621 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2622 : TestCase (context, name, description)
2623 , m_numCalls (numCalls)
2624 , m_workSize (workSize)
2625 , m_program (DE_NULL)
2626 , m_imageReadProgram (DE_NULL)
2627 , m_imageClearProgram (DE_NULL)
2628 , m_imageID (DE_NULL)
2629 {
2630 }
2631
~ConcurrentImageAtomicCase(void)2632 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2633 {
2634 deinit();
2635 }
2636
init(void)2637 void ConcurrentImageAtomicCase::init (void)
2638 {
2639 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2640 std::vector<deUint32> zeroData (m_workSize * m_workSize, 0);
2641 const bool supportsES32 = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
2642
2643 if (!supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
2644 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2645
2646 // gen image
2647
2648 gl.genTextures(1, &m_imageID);
2649 gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2650 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2651 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2652 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2653 GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2654
2655 // gen buffers
2656
2657 for (int ndx = 0; ndx < m_numCalls; ++ndx)
2658 {
2659 deUint32 buffer = 0;
2660
2661 gl.genBuffers(1, &buffer);
2662 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2663 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2664
2665 m_intermediateResultBuffers.push_back(buffer);
2666 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2667 }
2668
2669 // gen programs
2670
2671 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2672 m_testCtx.getLog() << *m_program;
2673 if (!m_program->isOk())
2674 throw tcu::TestError("could not build program");
2675
2676 m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2677 if (!m_imageReadProgram->isOk())
2678 {
2679 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2680
2681 m_testCtx.getLog() << *m_imageReadProgram;
2682 throw tcu::TestError("could not build program");
2683 }
2684
2685 m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2686 if (!m_imageClearProgram->isOk())
2687 {
2688 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2689
2690 m_testCtx.getLog() << *m_imageClearProgram;
2691 throw tcu::TestError("could not build program");
2692 }
2693 }
2694
deinit(void)2695 void ConcurrentImageAtomicCase::deinit (void)
2696 {
2697 if (m_imageID)
2698 {
2699 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2700 m_imageID = 0;
2701 }
2702
2703 for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2704 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2705 m_intermediateResultBuffers.clear();
2706
2707 delete m_program;
2708 m_program = DE_NULL;
2709
2710 delete m_imageReadProgram;
2711 m_imageReadProgram = DE_NULL;
2712
2713 delete m_imageClearProgram;
2714 m_imageClearProgram = DE_NULL;
2715 }
2716
iterate(void)2717 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2718 {
2719 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2720 const deUint32 sumValue = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2721 std::vector<int> deltas;
2722
2723 // generate unique deltas
2724 generateShuffledRamp(m_numCalls, deltas);
2725
2726 // clear image
2727 {
2728 m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2729
2730 gl.useProgram(m_imageClearProgram->getProgram());
2731 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2732 gl.dispatchCompute(m_workSize, m_workSize, 1);
2733 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2734
2735 GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2736 }
2737
2738 // invoke program N times, each with a different delta
2739 {
2740 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2741
2742 m_testCtx.getLog()
2743 << tcu::TestLog::Message
2744 << "Running shader " << m_numCalls << " times.\n"
2745 << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2746 << "Setting u_atomicDelta to a unique value for each call.\n"
2747 << tcu::TestLog::EndMessage;
2748
2749 if (deltaLocation == -1)
2750 throw tcu::TestError("u_atomicDelta location was -1");
2751
2752 gl.useProgram(m_program->getProgram());
2753 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2754
2755 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2756 {
2757 m_testCtx.getLog()
2758 << tcu::TestLog::Message
2759 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2760 << tcu::TestLog::EndMessage;
2761
2762 gl.uniform1ui(deltaLocation, deltas[callNdx]);
2763 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2764 gl.dispatchCompute(m_workSize, m_workSize, 1);
2765 }
2766
2767 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2768 }
2769
2770 // Verify result
2771 {
2772 std::vector<deUint32> result;
2773
2774 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2775
2776 readWorkImage(result);
2777
2778 for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2779 {
2780 if (result[ndx] != sumValue)
2781 {
2782 m_testCtx.getLog()
2783 << tcu::TestLog::Message
2784 << "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2785 << "Work image contains invalid values."
2786 << tcu::TestLog::EndMessage;
2787
2788 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2789 return STOP;
2790 }
2791 }
2792
2793 m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2794 }
2795
2796 // verify steps
2797 {
2798 std::vector<std::vector<deUint32> > intermediateResults (m_numCalls);
2799 std::vector<deUint32> valueChain (m_numCalls);
2800 std::vector<deUint32> chainDelta (m_numCalls);
2801
2802 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2803
2804 // collect results
2805
2806 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2807 {
2808 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2809 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2810 }
2811
2812 // verify values
2813
2814 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2815 {
2816 int invalidOperationNdx;
2817 deUint32 errorDelta;
2818 deUint32 errorExpected;
2819
2820 // collect result chain for each element
2821 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2822 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2823
2824 // check there exists a path from 0 to sumValue using each addition once
2825 // decompose cumulative results to addition operations (all additions positive => this works)
2826
2827 std::sort(valueChain.begin(), valueChain.end());
2828
2829 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2830 chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2831
2832 // chainDelta contains now the actual additions applied to the value
2833 std::sort(chainDelta.begin(), chainDelta.end());
2834
2835 // validate chain
2836 if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2837 {
2838 m_testCtx.getLog()
2839 << tcu::TestLog::Message
2840 << "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2841 << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2842 << "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2843 << tcu::TestLog::EndMessage;
2844
2845 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2846 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2847 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2848
2849 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2850 return STOP;
2851 }
2852 }
2853
2854 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2855 }
2856
2857 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2858 return STOP;
2859 }
2860
readWorkImage(std::vector<deUint32> & result)2861 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2862 {
2863 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2864 glu::Buffer resultBuffer (m_context.getRenderContext());
2865
2866 // Read image to an ssbo
2867
2868 {
2869 const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2870
2871 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2872 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2873
2874 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2875 gl.useProgram(m_imageReadProgram->getProgram());
2876
2877 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2878 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2879 gl.dispatchCompute(m_workSize, m_workSize, 1);
2880
2881 GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2882 }
2883
2884 // Read ssbo
2885 {
2886 const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2887 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2888
2889 if (!ptr)
2890 throw tcu::TestError("mapBufferRange returned NULL");
2891
2892 result.resize(m_workSize * m_workSize);
2893 memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2894
2895 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2896 throw tcu::TestError("unmapBuffer returned false");
2897 }
2898 }
2899
genComputeSource(void) const2900 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2901 {
2902 std::ostringstream buf;
2903
2904 buf << "${GLSL_VERSION_DECL}\n"
2905 << "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2906 << "\n"
2907 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2908 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2909 << "{\n"
2910 << " highp uint values[" << m_workSize * m_workSize << "];\n"
2911 << "} sb_ires;\n"
2912 << "\n"
2913 << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2914 << "uniform highp uint u_atomicDelta;\n"
2915 << "\n"
2916 << "void main ()\n"
2917 << "{\n"
2918 << " highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2919 << " sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2920 << "}";
2921
2922 return specializeShader(m_context, buf.str().c_str());
2923 }
2924
genImageReadSource(void) const2925 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2926 {
2927 std::ostringstream buf;
2928
2929 buf << "${GLSL_VERSION_DECL}\n"
2930 << "\n"
2931 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2932 << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2933 << "{\n"
2934 << " highp uint values[" << m_workSize * m_workSize << "];\n"
2935 << "} sb_res;\n"
2936 << "\n"
2937 << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2938 << "\n"
2939 << "void main ()\n"
2940 << "{\n"
2941 << " highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2942 << " sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2943 << "}";
2944
2945 return specializeShader(m_context, buf.str().c_str());
2946 }
2947
genImageClearSource(void) const2948 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2949 {
2950 std::ostringstream buf;
2951
2952 buf << "${GLSL_VERSION_DECL}\n"
2953 << "\n"
2954 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2955 << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2956 << "\n"
2957 << "void main ()\n"
2958 << "{\n"
2959 << " imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2960 << "}";
2961
2962 return specializeShader(m_context, buf.str().c_str());
2963 }
2964
2965 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2966 {
2967 public:
2968 ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2969 ~ConcurrentSSBOAtomicCounterMixedCase (void);
2970
2971 void init (void);
2972 void deinit (void);
2973 IterateResult iterate (void);
2974
2975 private:
2976 std::string genSSBOComputeSource (void) const;
2977 std::string genAtomicCounterComputeSource (void) const;
2978
2979 const int m_numCalls;
2980 const int m_workSize;
2981 deUint32 m_bufferID;
2982 glu::ShaderProgram* m_ssboAtomicProgram;
2983 glu::ShaderProgram* m_atomicCounterProgram;
2984 };
2985
ConcurrentSSBOAtomicCounterMixedCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2986 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2987 : TestCase (context, name, description)
2988 , m_numCalls (numCalls)
2989 , m_workSize (workSize)
2990 , m_bufferID (DE_NULL)
2991 , m_ssboAtomicProgram (DE_NULL)
2992 , m_atomicCounterProgram (DE_NULL)
2993 {
2994 // SSBO atomic XORs cancel out
2995 DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
2996 }
2997
~ConcurrentSSBOAtomicCounterMixedCase(void)2998 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
2999 {
3000 deinit();
3001 }
3002
init(void)3003 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3004 {
3005 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3006 const deUint32 zeroBuf[2] = { 0, 0 };
3007
3008 // gen buffer
3009
3010 gl.genBuffers(1, &m_bufferID);
3011 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3012 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3013
3014 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3015
3016 // gen programs
3017
3018 {
3019 const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3020
3021 m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3022 m_testCtx.getLog() << *m_ssboAtomicProgram;
3023 if (!m_ssboAtomicProgram->isOk())
3024 throw tcu::TestError("could not build program");
3025 }
3026 {
3027 const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3028
3029 m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3030 m_testCtx.getLog() << *m_atomicCounterProgram;
3031 if (!m_atomicCounterProgram->isOk())
3032 throw tcu::TestError("could not build program");
3033 }
3034 }
3035
deinit(void)3036 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3037 {
3038 if (m_bufferID)
3039 {
3040 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3041 m_bufferID = 0;
3042 }
3043
3044 delete m_ssboAtomicProgram;
3045 m_ssboAtomicProgram = DE_NULL;
3046
3047 delete m_atomicCounterProgram;
3048 m_atomicCounterProgram = DE_NULL;
3049 }
3050
iterate(void)3051 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3052 {
3053 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3054
3055 m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3056
3057 // invoke programs N times
3058 {
3059 m_testCtx.getLog()
3060 << tcu::TestLog::Message
3061 << "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3062 << "Num groups = (" << m_workSize << ", 1, 1)\n"
3063 << tcu::TestLog::EndMessage;
3064
3065 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3066 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
3067
3068 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3069 {
3070 gl.useProgram(m_atomicCounterProgram->getProgram());
3071 gl.dispatchCompute(m_workSize, 1, 1);
3072
3073 gl.useProgram(m_ssboAtomicProgram->getProgram());
3074 gl.dispatchCompute(m_workSize, 1, 1);
3075 }
3076
3077 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3078 }
3079
3080 // Verify result
3081 {
3082 deUint32 result;
3083
3084 // XORs cancel out, only addition is left
3085 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3086
3087 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3088 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3089
3090 if ((int)result != m_numCalls*m_workSize)
3091 {
3092 m_testCtx.getLog()
3093 << tcu::TestLog::Message
3094 << "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3095 << tcu::TestLog::EndMessage;
3096
3097 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3098 return STOP;
3099 }
3100
3101 m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3102 }
3103
3104 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3105 return STOP;
3106 }
3107
genSSBOComputeSource(void) const3108 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3109 {
3110 std::ostringstream buf;
3111
3112 buf << "${GLSL_VERSION_DECL}\n"
3113 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3114 << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3115 << "{\n"
3116 << " highp uint targetValue;\n"
3117 << " highp uint dummy;\n"
3118 << "} sb_work;\n"
3119 << "\n"
3120 << "void main ()\n"
3121 << "{\n"
3122 << " // flip high bits\n"
3123 << " highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3124 << " sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3125 << "}";
3126
3127 return specializeShader(m_context, buf.str().c_str());
3128 }
3129
genAtomicCounterComputeSource(void) const3130 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3131 {
3132 std::ostringstream buf;
3133
3134 buf << "${GLSL_VERSION_DECL}\n"
3135 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3136 << "\n"
3137 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
3138 << "\n"
3139 << "void main ()\n"
3140 << "{\n"
3141 << " atomicCounterIncrement(u_counter);\n"
3142 << "}";
3143
3144 return specializeShader(m_context, buf.str().c_str());
3145 }
3146
3147 } // anonymous
3148
SynchronizationTests(Context & context)3149 SynchronizationTests::SynchronizationTests (Context& context)
3150 : TestCaseGroup(context, "synchronization", "Synchronization tests")
3151 {
3152 }
3153
~SynchronizationTests(void)3154 SynchronizationTests::~SynchronizationTests (void)
3155 {
3156 }
3157
init(void)3158 void SynchronizationTests::init (void)
3159 {
3160 tcu::TestCaseGroup* const inInvocationGroup = new tcu::TestCaseGroup(m_testCtx, "in_invocation", "Test intra-invocation synchronization");
3161 tcu::TestCaseGroup* const interInvocationGroup = new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3162 tcu::TestCaseGroup* const interCallGroup = new tcu::TestCaseGroup(m_testCtx, "inter_call", "Test inter-call synchronization");
3163
3164 addChild(inInvocationGroup);
3165 addChild(interInvocationGroup);
3166 addChild(interCallGroup);
3167
3168 // .in_invocation & .inter_invocation
3169 {
3170 static const struct CaseConfig
3171 {
3172 const char* namePrefix;
3173 const InterInvocationTestCase::StorageType storage;
3174 const int flags;
3175 } configs[] =
3176 {
3177 { "image", InterInvocationTestCase::STORAGE_IMAGE, 0 },
3178 { "image_atomic", InterInvocationTestCase::STORAGE_IMAGE, InterInvocationTestCase::FLAG_ATOMIC },
3179 { "ssbo", InterInvocationTestCase::STORAGE_BUFFER, 0 },
3180 { "ssbo_atomic", InterInvocationTestCase::STORAGE_BUFFER, InterInvocationTestCase::FLAG_ATOMIC },
3181 };
3182
3183 for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3184 {
3185 tcu::TestCaseGroup* const targetGroup = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3186 const int extraFlags = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3187
3188 for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3189 {
3190 const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3191
3192 targetGroup->addChild(new InvocationWriteReadCase(m_context,
3193 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3194 (std::string("Write to ") + target + " and read it").c_str(),
3195 configs[configNdx].storage,
3196 configs[configNdx].flags | extraFlags));
3197
3198 targetGroup->addChild(new InvocationReadWriteCase(m_context,
3199 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3200 (std::string("Read form ") + target + " and then write to it").c_str(),
3201 configs[configNdx].storage,
3202 configs[configNdx].flags | extraFlags));
3203
3204 targetGroup->addChild(new InvocationOverWriteCase(m_context,
3205 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3206 (std::string("Write to ") + target + " twice and read it").c_str(),
3207 configs[configNdx].storage,
3208 configs[configNdx].flags | extraFlags));
3209
3210 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3211 (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3212 (std::string("Write to aliasing ") + target + " and read it").c_str(),
3213 InvocationAliasWriteCase::TYPE_WRITE,
3214 configs[configNdx].storage,
3215 configs[configNdx].flags | extraFlags));
3216
3217 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3218 (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3219 (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3220 InvocationAliasWriteCase::TYPE_OVERWRITE,
3221 configs[configNdx].storage,
3222 configs[configNdx].flags | extraFlags));
3223 }
3224 }
3225 }
3226
3227 // .inter_call
3228 {
3229 tcu::TestCaseGroup* const withBarrierGroup = new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3230 tcu::TestCaseGroup* const withoutBarrierGroup = new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3231
3232 interCallGroup->addChild(withBarrierGroup);
3233 interCallGroup->addChild(withoutBarrierGroup);
3234
3235 // .with_memory_barrier
3236 {
3237 static const struct CaseConfig
3238 {
3239 const char* namePrefix;
3240 const InterCallTestCase::StorageType storage;
3241 const int flags;
3242 } configs[] =
3243 {
3244 { "image", InterCallTestCase::STORAGE_IMAGE, 0 },
3245 { "image_atomic", InterCallTestCase::STORAGE_IMAGE, InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT },
3246 { "ssbo", InterCallTestCase::STORAGE_BUFFER, 0 },
3247 { "ssbo_atomic", InterCallTestCase::STORAGE_BUFFER, InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT },
3248 };
3249
3250 const int seed0 = 123;
3251 const int seed1 = 457;
3252
3253 for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3254 {
3255 const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3256
3257 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3258 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3259 (std::string("Write to ") + target + " and read it").c_str(),
3260 configs[configNdx].storage,
3261 configs[configNdx].flags,
3262 InterCallOperations()
3263 << op::WriteData::Generate(1, seed0)
3264 << op::Barrier()
3265 << op::ReadData::Generate(1, seed0)));
3266
3267 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3268 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3269 (std::string("Read from ") + target + " and then write to it").c_str(),
3270 configs[configNdx].storage,
3271 configs[configNdx].flags,
3272 InterCallOperations()
3273 << op::ReadZeroData::Generate(1)
3274 << op::Barrier()
3275 << op::WriteData::Generate(1, seed0)));
3276
3277 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3278 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3279 (std::string("Write to ") + target + " twice and read it").c_str(),
3280 configs[configNdx].storage,
3281 configs[configNdx].flags,
3282 InterCallOperations()
3283 << op::WriteData::Generate(1, seed0)
3284 << op::Barrier()
3285 << op::WriteData::Generate(1, seed1)
3286 << op::Barrier()
3287 << op::ReadData::Generate(1, seed1)));
3288
3289 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3290 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3291 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3292 configs[configNdx].storage,
3293 configs[configNdx].flags,
3294 InterCallOperations()
3295 << op::WriteData::Generate(1, seed0)
3296 << op::WriteData::Generate(2, seed1)
3297 << op::Barrier()
3298 << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3299
3300 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3301 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3302 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3303 configs[configNdx].storage,
3304 configs[configNdx].flags,
3305 InterCallOperations()
3306 << op::WriteDataInterleaved::Generate(1, seed0, true)
3307 << op::WriteDataInterleaved::Generate(1, seed1, false)
3308 << op::Barrier()
3309 << op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3310
3311 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3312 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3313 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3314 configs[configNdx].storage,
3315 configs[configNdx].flags,
3316 InterCallOperations()
3317 << op::WriteData::Generate(1, seed0)
3318 << op::WriteData::Generate(2, seed1)
3319 << op::Barrier()
3320 << op::ReadData::Generate(1, seed0)
3321 << op::ReadData::Generate(2, seed1)));
3322
3323 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3324 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3325 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3326 configs[configNdx].storage,
3327 configs[configNdx].flags,
3328 InterCallOperations()
3329 << op::WriteData::Generate(1, seed0)
3330 << op::WriteData::Generate(2, seed1)
3331 << op::Barrier()
3332 << op::ReadData::Generate(2, seed1)
3333 << op::ReadData::Generate(1, seed0)));
3334 }
3335
3336 // .without_memory_barrier
3337 {
3338 struct InvocationConfig
3339 {
3340 const char* name;
3341 int count;
3342 };
3343
3344 static const InvocationConfig ssboInvocations[] =
3345 {
3346 { "1k", 1024 },
3347 { "4k", 4096 },
3348 { "32k", 32768 },
3349 };
3350 static const InvocationConfig imageInvocations[] =
3351 {
3352 { "8x8", 8 },
3353 { "32x32", 32 },
3354 { "128x128", 128 },
3355 };
3356 static const InvocationConfig counterInvocations[] =
3357 {
3358 { "32", 32 },
3359 { "128", 128 },
3360 { "1k", 1024 },
3361 };
3362 static const int callCounts[] = { 2, 5, 100 };
3363
3364 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3365 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3366 withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3367
3368 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3369 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3370 withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3371
3372 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3373 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3374 withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3375
3376 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3377 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3378 withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3379 }
3380 }
3381 }
3382 }
3383
3384 } // Functional
3385 } // gles31
3386 } // deqp
3387