1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2018-2019 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *	  http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Vulkan Memory Model tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMemoryModelTests.hpp"
26 #include "vktMemoryModelPadding.hpp"
27 
28 #include "vkBufferWithMemory.hpp"
29 #include "vkImageWithMemory.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkObjUtil.hpp"
35 
36 #include "vktTestGroupUtil.hpp"
37 #include "vktTestCase.hpp"
38 
39 #include "deDefs.h"
40 #include "deMath.h"
41 #include "deSharedPtr.hpp"
42 #include "deString.h"
43 
44 #include "tcuTestCase.hpp"
45 #include "tcuTestLog.hpp"
46 
47 #include <string>
48 #include <sstream>
49 
50 namespace vkt
51 {
52 namespace MemoryModel
53 {
54 namespace
55 {
56 using namespace vk;
57 using namespace std;
58 
59 typedef enum
60 {
61 	TT_MP = 0,  // message passing
62 	TT_WAR, // write-after-read hazard
63 } TestType;
64 
65 typedef enum
66 {
67 	ST_FENCE_FENCE = 0,
68 	ST_FENCE_ATOMIC,
69 	ST_ATOMIC_FENCE,
70 	ST_ATOMIC_ATOMIC,
71 	ST_CONTROL_BARRIER,
72 	ST_CONTROL_AND_MEMORY_BARRIER,
73 } SyncType;
74 
75 typedef enum
76 {
77 	SC_BUFFER = 0,
78 	SC_IMAGE,
79 	SC_WORKGROUP,
80 	SC_PHYSBUFFER,
81 } StorageClass;
82 
83 typedef enum
84 {
85 	SCOPE_DEVICE = 0,
86 	SCOPE_QUEUEFAMILY,
87 	SCOPE_WORKGROUP,
88 	SCOPE_SUBGROUP,
89 } Scope;
90 
91 typedef enum
92 {
93 	STAGE_COMPUTE = 0,
94 	STAGE_VERTEX,
95 	STAGE_FRAGMENT,
96 } Stage;
97 
98 typedef enum
99 {
100 	DATA_TYPE_UINT = 0,
101 	DATA_TYPE_UINT64,
102 	DATA_TYPE_FLOAT32,
103 	DATA_TYPE_FLOAT64,
104 } DataType;
105 
106 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
107 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
108 
109 struct CaseDef
110 {
111 	bool payloadMemLocal;
112 	bool guardMemLocal;
113 	bool coherent;
114 	bool core11;
115 	bool atomicRMW;
116 	TestType testType;
117 	StorageClass payloadSC;
118 	StorageClass guardSC;
119 	Scope scope;
120 	SyncType syncType;
121 	Stage stage;
122 	DataType dataType;
123 	bool transitive;
124 	bool transitiveVis;
125 };
126 
127 class MemoryModelTestInstance : public TestInstance
128 {
129 public:
130 						MemoryModelTestInstance	(Context& context, const CaseDef& data);
131 						~MemoryModelTestInstance	(void);
132 	tcu::TestStatus		iterate				(void);
133 private:
134 	CaseDef			m_data;
135 
136 	enum
137 	{
138 		WIDTH = 256,
139 		HEIGHT = 256
140 	};
141 };
142 
MemoryModelTestInstance(Context & context,const CaseDef & data)143 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
144 	: vkt::TestInstance		(context)
145 	, m_data				(data)
146 {
147 }
148 
~MemoryModelTestInstance(void)149 MemoryModelTestInstance::~MemoryModelTestInstance (void)
150 {
151 }
152 
153 class MemoryModelTestCase : public TestCase
154 {
155 	public:
156 								MemoryModelTestCase		(tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
157 								~MemoryModelTestCase	(void);
158 	virtual	void				initPrograms		(SourceCollections& programCollection) const;
159 	virtual	void				initProgramsTransitive(SourceCollections& programCollection) const;
160 	virtual TestInstance*		createInstance		(Context& context) const;
161 	virtual void				checkSupport		(Context& context) const;
162 
163 private:
164 	CaseDef					m_data;
165 };
166 
MemoryModelTestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)167 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
168 	: vkt::TestCase	(context, name, desc)
169 	, m_data		(data)
170 {
171 }
172 
~MemoryModelTestCase(void)173 MemoryModelTestCase::~MemoryModelTestCase	(void)
174 {
175 }
176 
checkSupport(Context & context) const177 void MemoryModelTestCase::checkSupport(Context& context) const
178 {
179 	if (!context.contextSupports(vk::ApiVersion(1, 1, 0)))
180 	{
181 		TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
182 	}
183 
184 	if (!m_data.core11)
185 	{
186 		if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
187 		{
188 			TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
189 		}
190 
191 		if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
192 		{
193 			TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
194 		}
195 	}
196 
197 	if (m_data.scope == SCOPE_SUBGROUP)
198 	{
199 		// Check for subgroup support for scope_subgroup tests.
200 		VkPhysicalDeviceSubgroupProperties subgroupProperties;
201 		subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
202 		subgroupProperties.pNext = DE_NULL;
203 		subgroupProperties.supportedOperations = 0;
204 
205 		VkPhysicalDeviceProperties2 properties;
206 		properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
207 		properties.pNext = &subgroupProperties;
208 
209 		context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
210 
211 		if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
212 			!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
213 			!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
214 		{
215 			TCU_THROW(NotSupportedError, "Subgroup features not supported");
216 		}
217 
218 		VkShaderStageFlags stage= VK_SHADER_STAGE_COMPUTE_BIT;
219 		if (m_data.stage == STAGE_VERTEX)
220 		{
221 			stage = VK_SHADER_STAGE_VERTEX_BIT;
222 		}
223 		else if (m_data.stage == STAGE_COMPUTE)
224 		{
225 			stage = VK_SHADER_STAGE_COMPUTE_BIT;
226 		}
227 		else if (m_data.stage == STAGE_FRAGMENT)
228 		{
229 			stage = VK_SHADER_STAGE_FRAGMENT_BIT;
230 		}
231 
232 		if((subgroupProperties.supportedStages & stage)==0)
233 		{
234 			TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
235 		}
236 	}
237 	if (m_data.dataType == DATA_TYPE_UINT64)
238 	{
239 		if (!context.getDeviceFeatures().shaderInt64)
240 		{
241 			TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
242 		}
243 		if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
244 			(m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER))
245 		{
246 			TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
247 		}
248 		if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
249 			m_data.guardSC == SC_WORKGROUP)
250 		{
251 			TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
252 		}
253 	}
254 
255 	if (m_data.dataType == DATA_TYPE_FLOAT32)
256 	{
257 		if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
258 			TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
259 
260 		if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
261 			(!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics))
262 		{
263 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
264 		}
265 
266 		if (m_data.guardSC == SC_IMAGE && (!context.getShaderAtomicFloatFeaturesEXT().shaderImageFloat32Atomics))
267 		{
268 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point image atomic operations not supported");
269 		}
270 
271 		if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics))
272 		{
273 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
274 		}
275 	}
276 
277 	if (m_data.dataType == DATA_TYPE_FLOAT64)
278 	{
279 		if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
280 			TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
281 
282 		if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
283 			(!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics))
284 		{
285 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
286 		}
287 
288 		if (m_data.guardSC == SC_IMAGE || m_data.payloadSC == SC_IMAGE)
289 		{
290 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point image atomic operations not supported");
291 		}
292 
293 		if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics))
294 		{
295 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
296 		}
297 	}
298 
299 	if (m_data.transitive &&
300 		!context.getVulkanMemoryModelFeatures().vulkanMemoryModelAvailabilityVisibilityChains)
301 		TCU_THROW(NotSupportedError, "vulkanMemoryModelAvailabilityVisibilityChains not supported");
302 
303 	if ((m_data.payloadSC == SC_PHYSBUFFER || m_data.guardSC == SC_PHYSBUFFER) && !context.isBufferDeviceAddressSupported())
304 		TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
305 
306 	if (m_data.stage == STAGE_VERTEX)
307 	{
308 		if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
309 		{
310 			TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
311 		}
312 	}
313 	if (m_data.stage == STAGE_FRAGMENT)
314 	{
315 		if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
316 		{
317 			TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
318 		}
319 	}
320 }
321 
322 
initPrograms(SourceCollections & programCollection) const323 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
324 {
325 	if (m_data.transitive)
326 	{
327 		initProgramsTransitive(programCollection);
328 		return;
329 	}
330 	DE_ASSERT(!m_data.transitiveVis);
331 
332 	Scope invocationMapping = m_data.scope;
333 	if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
334 		(m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
335 	{
336 		invocationMapping = SCOPE_WORKGROUP;
337 	}
338 
339 	const char *scopeStr;
340 	switch (m_data.scope)
341 	{
342 	default: DE_ASSERT(0); // fall through
343 	case SCOPE_DEVICE:		scopeStr = "gl_ScopeDevice"; break;
344 	case SCOPE_QUEUEFAMILY:	scopeStr = "gl_ScopeQueueFamily"; break;
345 	case SCOPE_WORKGROUP:	scopeStr = "gl_ScopeWorkgroup"; break;
346 	case SCOPE_SUBGROUP:	scopeStr = "gl_ScopeSubgroup"; break;
347 	}
348 
349 	const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
350 		(m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
351 	const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
352 
353 	// Construct storageSemantics strings. Both release and acquire
354 	// always have the payload storage class. They only include the
355 	// guard storage class if they're using FENCE for that side of the
356 	// sync.
357 	std::stringstream storageSemanticsRelease;
358 	switch (m_data.payloadSC)
359 	{
360 	default: DE_ASSERT(0); // fall through
361 	case SC_PHYSBUFFER: // fall through
362 	case SC_BUFFER:		storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
363 	case SC_IMAGE:		storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
364 	case SC_WORKGROUP:	storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
365 	}
366 	std::stringstream storageSemanticsAcquire;
367 	storageSemanticsAcquire << storageSemanticsRelease.str();
368 	if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
369 	{
370 		switch (m_data.guardSC)
371 		{
372 		default: DE_ASSERT(0); // fall through
373 		case SC_PHYSBUFFER: // fall through
374 		case SC_BUFFER:		storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
375 		case SC_IMAGE:		storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
376 		case SC_WORKGROUP:	storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
377 		}
378 	}
379 	if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
380 	{
381 		switch (m_data.guardSC)
382 		{
383 		default: DE_ASSERT(0); // fall through
384 		case SC_PHYSBUFFER: // fall through
385 		case SC_BUFFER:		storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
386 		case SC_IMAGE:		storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
387 		case SC_WORKGROUP:	storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
388 		}
389 	}
390 
391 	std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
392 
393 	semanticsRelease << "gl_SemanticsRelease";
394 	semanticsAcquire << "gl_SemanticsAcquire";
395 	semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
396 	if (!m_data.coherent && m_data.testType != TT_WAR)
397 	{
398 		DE_ASSERT(!m_data.core11);
399 		semanticsRelease << " | gl_SemanticsMakeAvailable";
400 		semanticsAcquire << " | gl_SemanticsMakeVisible";
401 		semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
402 	}
403 
404 	std::stringstream css;
405 	css << "#version 450 core\n";
406 	if (!m_data.core11)
407 	{
408 		css << "#pragma use_vulkan_memory_model\n";
409 	}
410 	if (!intType)
411 	{
412 		css <<
413 			"#extension GL_EXT_shader_atomic_float : enable\n"
414 			"#extension GL_KHR_memory_scope_semantics : enable\n";
415 	}
416 	css <<
417 		"#extension GL_KHR_shader_subgroup_basic : enable\n"
418 		"#extension GL_KHR_shader_subgroup_shuffle : enable\n"
419 		"#extension GL_KHR_shader_subgroup_ballot : enable\n"
420 		"#extension GL_KHR_memory_scope_semantics : enable\n"
421 		"#extension GL_ARB_gpu_shader_int64 : enable\n"
422 		"#extension GL_EXT_buffer_reference : enable\n"
423 		"// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
424 		"layout(constant_id = 0) const int DIM = 1;\n"
425 		"layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
426 		"struct S { " << typeStr << " x[DIM*DIM]; };\n";
427 
428 	if (m_data.stage == STAGE_COMPUTE)
429 	{
430 		css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
431 	}
432 
433 	const char *memqual = "";
434 	if (m_data.coherent)
435 	{
436 		if (m_data.core11)
437 		{
438 			// Vulkan 1.1 only has "coherent", use it regardless of scope
439 			memqual = "coherent";
440 		}
441 		else
442 		{
443 			switch (m_data.scope)
444 			{
445 			default: DE_ASSERT(0); // fall through
446 			case SCOPE_DEVICE:		memqual = "devicecoherent"; break;
447 			case SCOPE_QUEUEFAMILY:	memqual = "queuefamilycoherent"; break;
448 			case SCOPE_WORKGROUP:	memqual = "workgroupcoherent"; break;
449 			case SCOPE_SUBGROUP:	memqual = "subgroupcoherent"; break;
450 			}
451 		}
452 	}
453 	else
454 	{
455 		DE_ASSERT(!m_data.core11);
456 		memqual = "nonprivate";
457 	}
458 
459 	stringstream pushConstMembers;
460 
461 	// Declare payload, guard, and fail resources
462 	switch (m_data.payloadSC)
463 	{
464 	default: DE_ASSERT(0); // fall through
465 	case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
466 						pushConstMembers << "   layout(offset = 0) PayloadRef payloadref;\n"; break;
467 	case SC_BUFFER:		css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
468 	case SC_IMAGE:
469 		if (intType)
470 			css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
471 		else
472 			css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
473 		break;
474 	case SC_WORKGROUP:	css << "shared S payload;\n"; break;
475 	}
476 	if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
477 	{
478 		// The guard variable is only accessed with atomics and need not be declared coherent.
479 		switch (m_data.guardSC)
480 		{
481 		default: DE_ASSERT(0); // fall through
482 		case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
483 							pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
484 		case SC_BUFFER:		css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
485 		case SC_IMAGE:
486 			if (intType)
487 				css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
488 			else
489 				css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
490 			break;
491 		case SC_WORKGROUP:	css << "shared S guard;\n"; break;
492 		}
493 	}
494 
495 	css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
496 
497 	if (pushConstMembers.str().size() != 0) {
498 		css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
499 	}
500 
501 	css <<
502 		"void main()\n"
503 		"{\n"
504 		"   bool pass = true;\n"
505 		"   bool skip = false;\n";
506 
507 	if (m_data.payloadSC == SC_PHYSBUFFER)
508 		css << "   " << memqual << " PayloadRef payload = payloadref;\n";
509 
510 	if (m_data.stage == STAGE_FRAGMENT)
511 	{
512 		// Kill helper invocations so they don't load outside the bounds of the SSBO.
513 		// Helper pixels are also initially "active" and if a thread gets one as its
514 		// partner in SCOPE_SUBGROUP mode, it can't run the test.
515 		css << "   if (gl_HelperInvocation) { return; }\n";
516 	}
517 
518 	// Compute coordinates based on the storage class and scope.
519 	// For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
520 	// For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
521 	// For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
522 	switch (invocationMapping)
523 	{
524 	default: DE_ASSERT(0); // fall through
525 	case SCOPE_SUBGROUP:
526 		// If the partner invocation isn't active, the shuffle below will be undefined. Bail.
527 		css << "   uvec4 ballot = subgroupBallot(true);\n"
528 			   "   if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
529 
530 		switch (m_data.stage)
531 		{
532 		default: DE_ASSERT(0); // fall through
533 		case STAGE_COMPUTE:
534 			css <<
535 			"   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
536 			"   ivec2 partnerLocalId    = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
537 			"   uint sharedCoord        = localId.y * DIM + localId.x;\n"
538 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
539 			"   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
540 			"   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
541 			"   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
542 			"   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
543 			break;
544 		case STAGE_VERTEX:
545 			css <<
546 			"   uint bufferCoord        = gl_VertexIndex;\n"
547 			"   uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
548 			"   ivec2 imageCoord        = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
549 			"   ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
550 			"   gl_PointSize            = 1.0f;\n"
551 			"   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
552 			break;
553 		case STAGE_FRAGMENT:
554 			css <<
555 			"   ivec2 localId        = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
556 			"   ivec2 groupId        = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
557 			"   ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
558 			"   ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
559 			"   uint sharedCoord     = localId.y * DIM + localId.x;\n"
560 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
561 			"   uint bufferCoord     = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
562 			"   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
563 			"   ivec2 imageCoord     = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
564 			"   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
565 			break;
566 		}
567 		break;
568 	case SCOPE_WORKGROUP:
569 		css <<
570 		"   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
571 		"   ivec2 partnerLocalId    = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
572 		"   uint sharedCoord        = localId.y * DIM + localId.x;\n"
573 		"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
574 		"   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
575 		"   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
576 		"   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
577 		"   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
578 		break;
579 	case SCOPE_QUEUEFAMILY:
580 	case SCOPE_DEVICE:
581 		switch (m_data.stage)
582 		{
583 		default: DE_ASSERT(0); // fall through
584 		case STAGE_COMPUTE:
585 			css <<
586 			"   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
587 			"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
588 			"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
589 			"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
590 			"   ivec2 imageCoord        = globalId;\n"
591 			"   ivec2 partnerImageCoord = partnerGlobalId;\n";
592 			break;
593 		case STAGE_VERTEX:
594 			css <<
595 			"   ivec2 globalId          = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
596 			"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
597 			"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
598 			"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
599 			"   ivec2 imageCoord        = globalId;\n"
600 			"   ivec2 partnerImageCoord = partnerGlobalId;\n"
601 			"   gl_PointSize            = 1.0f;\n"
602 			"   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
603 			break;
604 		case STAGE_FRAGMENT:
605 			css <<
606 			"   ivec2 localId       = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
607 			"   ivec2 groupId       = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
608 			"   ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
609 			"   ivec2 partnerGroupId = groupId;\n"
610 			"   uint sharedCoord    = localId.y * DIM + localId.x;\n"
611 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
612 			"   uint bufferCoord    = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
613 			"   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
614 			"   ivec2 imageCoord    = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
615 			"   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
616 			break;
617 		}
618 		break;
619 	}
620 
621 	// Initialize shared memory, followed by a barrier
622 	if (m_data.payloadSC == SC_WORKGROUP)
623 	{
624 		css << "   payload.x[sharedCoord] = 0;\n";
625 	}
626 	if (m_data.guardSC == SC_WORKGROUP)
627 	{
628 		css << "   guard.x[sharedCoord] = 0;\n";
629 	}
630 	if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
631 	{
632 		switch (invocationMapping)
633 		{
634 		default: DE_ASSERT(0); // fall through
635 		case SCOPE_SUBGROUP:	css << "   subgroupBarrier();\n"; break;
636 		case SCOPE_WORKGROUP:	css << "   barrier();\n"; break;
637 		}
638 	}
639 
640 	if (m_data.testType == TT_MP)
641 	{
642 		if (intType)
643 		{
644 			// Store payload
645 			switch (m_data.payloadSC)
646 			{
647 			default: DE_ASSERT(0); // fall through
648 			case SC_PHYSBUFFER: // fall through
649 			case SC_BUFFER:		css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
650 			case SC_IMAGE:		css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
651 			case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
652 			}
653 		}
654 		else
655 		{
656 			// Store payload
657 			switch (m_data.payloadSC)
658 			{
659 			default: DE_ASSERT(0); // fall through
660 			case SC_PHYSBUFFER: // fall through
661 			case SC_BUFFER:		css << "   payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
662 			case SC_IMAGE:		css << "   imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x))>>31)), 0, 0, 0)); \n"; break;
663 			case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerSharedCoord])))>>31);\n"; break;
664 			}
665 		}
666 	}
667 	else
668 	{
669 		DE_ASSERT(m_data.testType == TT_WAR);
670 		// Load payload
671 		switch (m_data.payloadSC)
672 		{
673 		default: DE_ASSERT(0); // fall through
674 		case SC_PHYSBUFFER: // fall through
675 		case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
676 		case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
677 		case SC_WORKGROUP:	css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
678 		}
679 	}
680 	if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
681 	{
682 		// Acquire and release separate from control barrier
683 		css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
684 			   "   controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
685 			   "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
686 	}
687 	else if (m_data.syncType == ST_CONTROL_BARRIER)
688 	{
689 		// Control barrier performs both acquire and release
690 		css << "   controlBarrier(" << scopeStr << ", " << scopeStr << ", "
691 									<< storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
692 									<< semanticsAcquireRelease.str() << ");\n";
693 	}
694 	else
695 	{
696 		// Don't type cast for 64 bit image atomics
697 		const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
698 		// Release barrier
699 		std::stringstream atomicReleaseSemantics;
700 		if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
701 		{
702 			css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
703 			atomicReleaseSemantics << ", 0, 0";
704 		}
705 		else
706 		{
707 			atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
708 		}
709 		// Atomic store guard
710 		if (m_data.atomicRMW)
711 		{
712 			switch (m_data.guardSC)
713 			{
714 			default: DE_ASSERT(0); // fall through
715 			case SC_PHYSBUFFER: // fall through
716 			case SC_BUFFER:		css << "   atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
717 			case SC_IMAGE:		css << "   imageAtomicExchange(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
718 			case SC_WORKGROUP:	css << "   atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
719 			}
720 		}
721 		else
722 		{
723 			switch (m_data.guardSC)
724 			{
725 			default: DE_ASSERT(0); // fall through
726 			case SC_PHYSBUFFER: // fall through
727 			case SC_BUFFER:		css << "   atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
728 			case SC_IMAGE:		css << "   imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
729 			case SC_WORKGROUP:	css << "   atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
730 			}
731 		}
732 
733 		std::stringstream atomicAcquireSemantics;
734 		if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
735 		{
736 			atomicAcquireSemantics << ", 0, 0";
737 		}
738 		else
739 		{
740 			atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
741 		}
742 		// Atomic load guard
743 		if (m_data.atomicRMW)
744 		{
745 			switch (m_data.guardSC)
746 			{
747 			default: DE_ASSERT(0); // fall through
748 			case SC_PHYSBUFFER: // fall through
749 			case SC_BUFFER: css << "   skip = atomicExchange(guard.x[partnerBufferCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
750 			case SC_IMAGE:  css << "   skip = imageAtomicExchange(guard, partnerImageCoord, " << typeCastStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
751 			case SC_WORKGROUP: css << "   skip = atomicExchange(guard.x[partnerSharedCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
752 			}
753 		} else
754 		{
755 			switch (m_data.guardSC)
756 			{
757 			default: DE_ASSERT(0); // fall through
758 			case SC_PHYSBUFFER: // fall through
759 			case SC_BUFFER:		css << "   skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
760 			case SC_IMAGE:		css << "   skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
761 			case SC_WORKGROUP:	css << "   skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
762 			}
763 		}
764 		// Acquire barrier
765 		if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
766 		{
767 			css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
768 		}
769 	}
770 	if (m_data.testType == TT_MP)
771 	{
772 		// Load payload
773 		switch (m_data.payloadSC)
774 		{
775 		default: DE_ASSERT(0); // fall through
776 		case SC_PHYSBUFFER: // fall through
777 		case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
778 		case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
779 		case SC_WORKGROUP:	css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
780 		}
781 		css <<
782 			"   if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
783 			"}\n";
784 	}
785 	else
786 	{
787 		DE_ASSERT(m_data.testType == TT_WAR);
788 		// Store payload, only if the partner invocation has already done its read
789 		css << "   if (!skip) {\n   ";
790 		switch (m_data.payloadSC)
791 		{
792 		default: DE_ASSERT(0); // fall through
793 		case SC_PHYSBUFFER: // fall through
794 		case SC_BUFFER:		css << "   payload.x[bufferCoord] = " << typeStr << "(bufferCoord);\n"; break;
795 		case SC_IMAGE:
796 			if (intType) {
797 				css << "   imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n";
798 			}
799 			else {
800 				css << "   imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord), 0, 0, 0));\n";
801 			}
802 			break;
803 		case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = " << typeStr << "(bufferCoord);\n"; break;
804 		}
805 		css <<
806 			"   }\n"
807 			"   if (r != 0) { fail.x[bufferCoord] = 1; }\n"
808 			"}\n";
809 	}
810 
811 	// Draw a fullscreen triangle strip based on gl_VertexIndex
812 	std::stringstream vss;
813 	vss <<
814 		"#version 450 core\n"
815 		"vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
816 		"void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
817 
818 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
819 
820 	switch (m_data.stage)
821 	{
822 	default: DE_ASSERT(0); // fall through
823 	case STAGE_COMPUTE:
824 		programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
825 		break;
826 	case STAGE_VERTEX:
827 		programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
828 		break;
829 	case STAGE_FRAGMENT:
830 		programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
831 		programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
832 		break;
833 	}
834 }
835 
836 
initProgramsTransitive(SourceCollections & programCollection) const837 void MemoryModelTestCase::initProgramsTransitive (SourceCollections& programCollection) const
838 {
839 	Scope invocationMapping = m_data.scope;
840 
841 	const char* typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
842 		(m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
843 	const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
844 
845 	// Construct storageSemantics strings. Both release and acquire
846 	// always have the payload storage class. They only include the
847 	// guard storage class if they're using FENCE for that side of the
848 	// sync.
849 	std::stringstream storageSemanticsPayload;
850 	switch (m_data.payloadSC)
851 	{
852 	default: DE_ASSERT(0); // fall through
853 	case SC_PHYSBUFFER: // fall through
854 	case SC_BUFFER:		storageSemanticsPayload << "gl_StorageSemanticsBuffer"; break;
855 	case SC_IMAGE:		storageSemanticsPayload << "gl_StorageSemanticsImage"; break;
856 	}
857 	std::stringstream storageSemanticsGuard;
858 	switch (m_data.guardSC)
859 	{
860 	default: DE_ASSERT(0); // fall through
861 	case SC_PHYSBUFFER: // fall through
862 	case SC_BUFFER:		storageSemanticsGuard << "gl_StorageSemanticsBuffer"; break;
863 	case SC_IMAGE:		storageSemanticsGuard << "gl_StorageSemanticsImage"; break;
864 	}
865 	std::stringstream storageSemanticsAll;
866 	storageSemanticsAll << storageSemanticsPayload.str() << " | " << storageSemanticsGuard.str();
867 
868 	std::stringstream css;
869 	css << "#version 450 core\n";
870 	css << "#pragma use_vulkan_memory_model\n";
871 	if (!intType)
872 	{
873 		css <<
874 			"#extension GL_EXT_shader_atomic_float : enable\n"
875 			"#extension GL_KHR_memory_scope_semantics : enable\n";
876 	}
877 	css <<
878 		"#extension GL_KHR_shader_subgroup_basic : enable\n"
879 		"#extension GL_KHR_shader_subgroup_shuffle : enable\n"
880 		"#extension GL_KHR_shader_subgroup_ballot : enable\n"
881 		"#extension GL_KHR_memory_scope_semantics : enable\n"
882 		"#extension GL_ARB_gpu_shader_int64 : enable\n"
883 		"#extension GL_EXT_buffer_reference : enable\n"
884 		"// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
885 		"layout(constant_id = 0) const int DIM = 1;\n"
886 		"layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
887 		"shared bool sharedSkip;\n";
888 
889 	css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
890 
891 	const char *memqual = "";
892 	const char *semAvail = "";
893 	const char *semVis = "";
894 	if (m_data.coherent)
895 	{
896 		memqual = "workgroupcoherent";
897 	}
898 	else
899 	{
900 		memqual = "nonprivate";
901 		semAvail = " | gl_SemanticsMakeAvailable";
902 		semVis = " | gl_SemanticsMakeVisible";
903 	}
904 
905 	stringstream pushConstMembers;
906 
907 	// Declare payload, guard, and fail resources
908 	switch (m_data.payloadSC)
909 	{
910 	default: DE_ASSERT(0); // fall through
911 	case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
912 						pushConstMembers << "   layout(offset = 0) PayloadRef payloadref;\n"; break;
913 	case SC_BUFFER:		css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
914 	case SC_IMAGE:
915 		if (intType)
916 			css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
917 		else
918 			css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
919 		break;
920 	}
921 	// The guard variable is only accessed with atomics and need not be declared coherent.
922 	switch (m_data.guardSC)
923 	{
924 	default: DE_ASSERT(0); // fall through
925 	case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
926 						pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
927 	case SC_BUFFER:		css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
928 	case SC_IMAGE:
929 		if (intType)
930 			css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
931 		else
932 			css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
933 		break;
934 	}
935 
936 	css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
937 
938 	if (pushConstMembers.str().size() != 0) {
939 		css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
940 	}
941 
942 	css <<
943 		"void main()\n"
944 		"{\n"
945 		"   bool pass = true;\n"
946 		"   bool skip = false;\n"
947 		"   sharedSkip = false;\n";
948 
949 	if (m_data.payloadSC == SC_PHYSBUFFER)
950 		css << "   " << memqual << " PayloadRef payload = payloadref;\n";
951 
952 	// Compute coordinates based on the storage class and scope.
953 	switch (invocationMapping)
954 	{
955 	default: DE_ASSERT(0); // fall through
956 	case SCOPE_DEVICE:
957 		css <<
958 		"   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
959 		"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
960 		"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
961 		"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
962 		"   ivec2 imageCoord        = globalId;\n"
963 		"   ivec2 partnerImageCoord = partnerGlobalId;\n"
964 		"   ivec2 globalId00          = ivec2(DIM) * ivec2(gl_WorkGroupID.xy);\n"
965 		"   ivec2 partnerGlobalId00   = ivec2(DIM) * (ivec2(NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_WorkGroupID.xy));\n"
966 		"   uint bufferCoord00        = globalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId00.x;\n"
967 		"   uint partnerBufferCoord00 = partnerGlobalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId00.x;\n"
968 		"   ivec2 imageCoord00        = globalId00;\n"
969 		"   ivec2 partnerImageCoord00 = partnerGlobalId00;\n";
970 		break;
971 	}
972 
973 	// Store payload
974 	if (intType)
975 	{
976 		switch (m_data.payloadSC)
977 		{
978 		default: DE_ASSERT(0); // fall through
979 		case SC_PHYSBUFFER: // fall through
980 		case SC_BUFFER:		css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
981 		case SC_IMAGE:		css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
982 		}
983 	}
984 	else
985 	{
986 		switch (m_data.payloadSC)
987 		{
988 		default: DE_ASSERT(0); // fall through
989 		case SC_PHYSBUFFER: // fall through
990 		case SC_BUFFER:	css << "   payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
991 		case SC_IMAGE:	css << "   imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x)>>31))), 0, 0, 0)); \n"; break;
992 		}
993 	}
994 
995 	// Sync to other threads in the workgroup
996 	css << "   controlBarrier(gl_ScopeWorkgroup, "
997 							 "gl_ScopeWorkgroup, " <<
998 							  storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
999 							 "gl_SemanticsAcquireRelease" << semAvail << ");\n";
1000 
1001 	// Device-scope release/availability in invocation(0,0)
1002 	css << "   if (all(equal(gl_LocalInvocationID.xy, ivec2(0,0)))) {\n";
1003 	const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
1004 	if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_ATOMIC_FENCE) {
1005 		switch (m_data.guardSC)
1006 		{
1007 		default: DE_ASSERT(0); // fall through
1008 		case SC_PHYSBUFFER: // fall through
1009 		case SC_BUFFER:		css << "       atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1010 		case SC_IMAGE:		css << "       imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1011 		}
1012 	} else {
1013 		css << "       memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1014 		switch (m_data.guardSC)
1015 		{
1016 		default: DE_ASSERT(0); // fall through
1017 		case SC_PHYSBUFFER: // fall through
1018 		case SC_BUFFER:		css << "       atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1019 		case SC_IMAGE:		css << "       imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1020 		}
1021 	}
1022 
1023 	// Device-scope acquire/visibility either in invocation(0,0) or in every invocation
1024 	if (!m_data.transitiveVis) {
1025 		css << "   }\n";
1026 	}
1027 	if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_FENCE_ATOMIC) {
1028 		switch (m_data.guardSC)
1029 		{
1030 		default: DE_ASSERT(0); // fall through
1031 		case SC_PHYSBUFFER: // fall through
1032 		case SC_BUFFER:		css << "       skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1033 		case SC_IMAGE:		css << "       skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1034 		}
1035 	} else {
1036 		switch (m_data.guardSC)
1037 		{
1038 		default: DE_ASSERT(0); // fall through
1039 		case SC_PHYSBUFFER: // fall through
1040 		case SC_BUFFER:		css << "       skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, 0, 0) == 0;\n"; break;
1041 		case SC_IMAGE:		css << "       skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, 0, 0) == 0;\n"; break;
1042 		}
1043 		css << "       memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible);\n";
1044 	}
1045 
1046 	// If invocation(0,0) did the acquire then store "skip" to shared memory and
1047 	// synchronize with the workgroup
1048 	if (m_data.transitiveVis) {
1049 		css << "       sharedSkip = skip;\n";
1050 		css << "   }\n";
1051 
1052 		css << "   controlBarrier(gl_ScopeWorkgroup, "
1053 								 "gl_ScopeWorkgroup, " <<
1054 								  storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
1055 								 "gl_SemanticsAcquireRelease" << semVis << ");\n";
1056 		css << "   skip = sharedSkip;\n";
1057 	}
1058 
1059 	// Load payload
1060 	switch (m_data.payloadSC)
1061 	{
1062 	default: DE_ASSERT(0); // fall through
1063 	case SC_PHYSBUFFER: // fall through
1064 	case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
1065 	case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
1066 	}
1067 	css <<
1068 		"   if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
1069 		"}\n";
1070 
1071 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
1072 
1073 	programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
1074 }
1075 
createInstance(Context & context) const1076 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
1077 {
1078 	return new MemoryModelTestInstance(context, m_data);
1079 }
1080 
iterate(void)1081 tcu::TestStatus MemoryModelTestInstance::iterate (void)
1082 {
1083 	const DeviceInterface&	vk						= m_context.getDeviceInterface();
1084 	const VkDevice			device					= m_context.getDevice();
1085 	Allocator&				allocator				= m_context.getDefaultAllocator();
1086 
1087 	VkPhysicalDeviceProperties2 properties;
1088 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1089 	properties.pNext = NULL;
1090 
1091 	m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
1092 
1093 	deUint32 DIM = 31;
1094 	deUint32 NUM_WORKGROUP_EACH_DIM = 8;
1095 	// If necessary, shrink workgroup size to fit HW limits
1096 	if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
1097 	{
1098 		DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
1099 	}
1100 	deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
1101 
1102 	VkDeviceSize bufferSizes[3];
1103 	de::MovePtr<BufferWithMemory> buffers[3];
1104 	vk::VkDescriptorBufferInfo bufferDescriptors[3];
1105 	de::MovePtr<BufferWithMemory> copyBuffer;
1106 
1107 	for (deUint32 i = 0; i < 3; ++i)
1108 	{
1109 		size_t elementSize = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64)? sizeof(deUint64) : sizeof(deUint32);
1110 		// buffer2 is the "fail" buffer, and is always uint
1111 		if (i == 2)
1112 			elementSize = sizeof(deUint32);
1113 		bufferSizes[i] = NUM_INVOCATIONS * elementSize;
1114 
1115 		vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1116 
1117 		bool memoryDeviceAddress = false;
1118 
1119 		bool local;
1120 		switch (i)
1121 		{
1122 		default: DE_ASSERT(0); // fall through
1123 		case 0:
1124 			if (m_data.payloadSC != SC_BUFFER && m_data.payloadSC != SC_PHYSBUFFER)
1125 				continue;
1126 			local = m_data.payloadMemLocal;
1127 			if (m_data.payloadSC == SC_PHYSBUFFER)
1128 			{
1129 				usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1130 				if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1131 					memoryDeviceAddress = true;
1132 			}
1133 			break;
1134 		case 1:
1135 			if (m_data.guardSC != SC_BUFFER && m_data.guardSC != SC_PHYSBUFFER)
1136 				continue;
1137 			local = m_data.guardMemLocal;
1138 			if (m_data.guardSC == SC_PHYSBUFFER)
1139 			{
1140 				usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1141 				if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1142 					memoryDeviceAddress = true;
1143 			}
1144 			break;
1145 		case 2: local = true; break;
1146 		}
1147 
1148 		try
1149 		{
1150 			buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1151 				vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], usageFlags),
1152 				(memoryDeviceAddress ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
1153 				(local ? MemoryRequirement::Local : MemoryRequirement::NonLocal)));
1154 		}
1155 		catch (const tcu::NotSupportedError&)
1156 		{
1157 			if (!local)
1158 			{
1159 				TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1160 			}
1161 			throw;
1162 		}
1163 		bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
1164 	}
1165 
1166 	// Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
1167 	try
1168 	{
1169 		copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1170 			vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1171 	}
1172 	catch (const tcu::NotSupportedError&)
1173 	{
1174 		copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1175 			vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
1176 	}
1177 
1178 	VkFormat imageFormat;
1179 	switch (m_data.dataType)
1180 	{
1181 	case DATA_TYPE_UINT:
1182 	case DATA_TYPE_UINT64:
1183 		imageFormat = VK_FORMAT_R32_UINT;
1184 		break;
1185 	case DATA_TYPE_FLOAT32:
1186 	case DATA_TYPE_FLOAT64:
1187 		imageFormat = VK_FORMAT_R32_SFLOAT;
1188 		break;
1189 	default:
1190 		TCU_FAIL("Invalid data type.");
1191 	}
1192 
1193 	const VkImageCreateInfo			imageCreateInfo			=
1194 	{
1195 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType		sType;
1196 		DE_NULL,								// const void*			pNext;
1197 		(VkImageCreateFlags)0u,					// VkImageCreateFlags	flags;
1198 		VK_IMAGE_TYPE_2D,						// VkImageType			imageType;
1199 		imageFormat,							// VkFormat				format;
1200 		{
1201 			DIM*NUM_WORKGROUP_EACH_DIM,	// deUint32	width;
1202 			DIM*NUM_WORKGROUP_EACH_DIM,	// deUint32	height;
1203 			1u		// deUint32	depth;
1204 		},										// VkExtent3D			   extent;
1205 		1u,										// deUint32				 mipLevels;
1206 		1u,										// deUint32				 arrayLayers;
1207 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
1208 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
1209 		VK_IMAGE_USAGE_STORAGE_BIT
1210 		| VK_IMAGE_USAGE_TRANSFER_SRC_BIT
1211 		| VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
1212 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
1213 		0u,										// deUint32				 queueFamilyIndexCount;
1214 		DE_NULL,								// const deUint32*		  pQueueFamilyIndices;
1215 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
1216 	};
1217 	VkImageViewCreateInfo		imageViewCreateInfo		=
1218 	{
1219 		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	// VkStructureType			sType;
1220 		DE_NULL,									// const void*				pNext;
1221 		(VkImageViewCreateFlags)0u,					// VkImageViewCreateFlags	 flags;
1222 		DE_NULL,									// VkImage					image;
1223 		VK_IMAGE_VIEW_TYPE_2D,						// VkImageViewType			viewType;
1224 		imageFormat,								// VkFormat					format;
1225 		{
1226 			VK_COMPONENT_SWIZZLE_R,	// VkComponentSwizzle	r;
1227 			VK_COMPONENT_SWIZZLE_G,	// VkComponentSwizzle	g;
1228 			VK_COMPONENT_SWIZZLE_B,	// VkComponentSwizzle	b;
1229 			VK_COMPONENT_SWIZZLE_A	// VkComponentSwizzle	a;
1230 		},											// VkComponentMapping		 components;
1231 		{
1232 			VK_IMAGE_ASPECT_COLOR_BIT,	// VkImageAspectFlags	aspectMask;
1233 			0u,							// deUint32			  baseMipLevel;
1234 			1u,							// deUint32			  levelCount;
1235 			0u,							// deUint32			  baseArrayLayer;
1236 			1u							// deUint32			  layerCount;
1237 		}											// VkImageSubresourceRange	subresourceRange;
1238 	};
1239 
1240 
1241 	de::MovePtr<ImageWithMemory> images[2];
1242 	Move<VkImageView> imageViews[2];
1243 	vk::VkDescriptorImageInfo imageDescriptors[2];
1244 
1245 	for (deUint32 i = 0; i < 2; ++i)
1246 	{
1247 
1248 		bool local;
1249 		switch (i)
1250 		{
1251 		default: DE_ASSERT(0); // fall through
1252 		case 0:
1253 			if (m_data.payloadSC != SC_IMAGE)
1254 				continue;
1255 			local = m_data.payloadMemLocal;
1256 			break;
1257 		case 1:
1258 			if (m_data.guardSC != SC_IMAGE)
1259 				continue;
1260 			local = m_data.guardMemLocal;
1261 			break;
1262 		}
1263 
1264 		try
1265 		{
1266 			images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
1267 				vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
1268 		}
1269 		catch (const tcu::NotSupportedError&)
1270 		{
1271 			if (!local)
1272 			{
1273 				TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1274 			}
1275 			throw;
1276 		}
1277 		imageViewCreateInfo.image = **images[i];
1278 		imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
1279 
1280 		imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
1281 	}
1282 
1283 	vk::DescriptorSetLayoutBuilder layoutBuilder;
1284 
1285 	switch (m_data.payloadSC)
1286 	{
1287 	default:
1288 	case SC_BUFFER:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1289 	case SC_IMAGE:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1290 	}
1291 	switch (m_data.guardSC)
1292 	{
1293 	default:
1294 	case SC_BUFFER:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1295 	case SC_IMAGE:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1296 	}
1297 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1298 
1299 	vk::Unique<vk::VkDescriptorSetLayout>	descriptorSetLayout(layoutBuilder.build(vk, device));
1300 
1301 	vk::Unique<vk::VkDescriptorPool>		descriptorPool(vk::DescriptorPoolBuilder()
1302 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
1303 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
1304 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1305 	vk::Unique<vk::VkDescriptorSet>			descriptorSet		(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1306 
1307 	vk::DescriptorSetUpdateBuilder setUpdateBuilder;
1308 	switch (m_data.payloadSC)
1309 	{
1310 	default: DE_ASSERT(0); // fall through
1311 	case SC_PHYSBUFFER:
1312 	case SC_WORKGROUP:
1313 		break;
1314 	case SC_BUFFER:
1315 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1316 			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1317 		break;
1318 	case SC_IMAGE:
1319 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1320 			VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
1321 		break;
1322 	}
1323 	switch (m_data.guardSC)
1324 	{
1325 	default: DE_ASSERT(0); // fall through
1326 	case SC_PHYSBUFFER:
1327 	case SC_WORKGROUP:
1328 		break;
1329 	case SC_BUFFER:
1330 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1331 			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
1332 		break;
1333 	case SC_IMAGE:
1334 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1335 			VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
1336 		break;
1337 	}
1338 	setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
1339 		VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
1340 
1341 	setUpdateBuilder.update(vk, device);
1342 
1343 	const VkPushConstantRange pushConstRange =
1344 	{
1345 		allShaderStages,		// VkShaderStageFlags	stageFlags
1346 		0,						// deUint32				offset
1347 		16						// deUint32				size
1348 	};
1349 
1350 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1351 	{
1352 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
1353 		DE_NULL,													// pNext
1354 		(VkPipelineLayoutCreateFlags)0,
1355 		1,															// setLayoutCount
1356 		&descriptorSetLayout.get(),									// pSetLayouts
1357 		1u,															// pushConstantRangeCount
1358 		&pushConstRange,											// pPushConstantRanges
1359 	};
1360 
1361 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1362 
1363 	Move<VkPipeline> pipeline;
1364 	Move<VkRenderPass> renderPass;
1365 	Move<VkFramebuffer> framebuffer;
1366 
1367 	VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1368 
1369 	const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
1370 
1371 	const vk::VkSpecializationMapEntry entries[3] =
1372 	{
1373 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
1374 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
1375 	};
1376 
1377 	const vk::VkSpecializationInfo specInfo =
1378 	{
1379 		2,						// mapEntryCount
1380 		entries,				// pMapEntries
1381 		sizeof(specData),		// dataSize
1382 		specData				// pData
1383 	};
1384 
1385 	if (m_data.stage == STAGE_COMPUTE)
1386 	{
1387 		const Unique<VkShaderModule>	shader						(createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1388 
1389 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo =
1390 		{
1391 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1392 			DE_NULL,
1393 			(VkPipelineShaderStageCreateFlags)0,
1394 			VK_SHADER_STAGE_COMPUTE_BIT,								// stage
1395 			*shader,													// shader
1396 			"main",
1397 			&specInfo,													// pSpecializationInfo
1398 		};
1399 
1400 		const VkComputePipelineCreateInfo		pipelineCreateInfo =
1401 		{
1402 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1403 			DE_NULL,
1404 			0u,															// flags
1405 			shaderCreateInfo,											// cs
1406 			*pipelineLayout,											// layout
1407 			(vk::VkPipeline)0,											// basePipelineHandle
1408 			0u,															// basePipelineIndex
1409 		};
1410 		pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1411 	}
1412 	else
1413 	{
1414 
1415 		const vk::VkSubpassDescription		subpassDesc			=
1416 		{
1417 			(vk::VkSubpassDescriptionFlags)0,
1418 			vk::VK_PIPELINE_BIND_POINT_GRAPHICS,					// pipelineBindPoint
1419 			0u,														// inputCount
1420 			DE_NULL,												// pInputAttachments
1421 			0u,														// colorCount
1422 			DE_NULL,												// pColorAttachments
1423 			DE_NULL,												// pResolveAttachments
1424 			DE_NULL,												// depthStencilAttachment
1425 			0u,														// preserveCount
1426 			DE_NULL,												// pPreserveAttachments
1427 
1428 		};
1429 		const vk::VkRenderPassCreateInfo	renderPassParams	=
1430 		{
1431 			vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// sType
1432 			DE_NULL,												// pNext
1433 			(vk::VkRenderPassCreateFlags)0,
1434 			0u,														// attachmentCount
1435 			DE_NULL,												// pAttachments
1436 			1u,														// subpassCount
1437 			&subpassDesc,											// pSubpasses
1438 			0u,														// dependencyCount
1439 			DE_NULL,												// pDependencies
1440 		};
1441 
1442 		renderPass = createRenderPass(vk, device, &renderPassParams);
1443 
1444 		const vk::VkFramebufferCreateInfo	framebufferParams	=
1445 		{
1446 			vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// sType
1447 			DE_NULL,										// pNext
1448 			(vk::VkFramebufferCreateFlags)0,
1449 			*renderPass,									// renderPass
1450 			0u,												// attachmentCount
1451 			DE_NULL,										// pAttachments
1452 			DIM*NUM_WORKGROUP_EACH_DIM,						// width
1453 			DIM*NUM_WORKGROUP_EACH_DIM,						// height
1454 			1u,												// layers
1455 		};
1456 
1457 		framebuffer = createFramebuffer(vk, device, &framebufferParams);
1458 
1459 		const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfo		=
1460 		{
1461 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType							sType;
1462 			DE_NULL,													// const void*								pNext;
1463 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags	flags;
1464 			0u,															// deUint32									vertexBindingDescriptionCount;
1465 			DE_NULL,													// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
1466 			0u,															// deUint32									vertexAttributeDescriptionCount;
1467 			DE_NULL														// const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
1468 		};
1469 
1470 		const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo	=
1471 		{
1472 			VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
1473 			DE_NULL,														// const void*								pNext;
1474 			(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags	flags;
1475 			(m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology						topology;
1476 			VK_FALSE														// VkBool32									primitiveRestartEnable;
1477 		};
1478 
1479 		const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfo	=
1480 		{
1481 			VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType							sType;
1482 			DE_NULL,														// const void*								pNext;
1483 			(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags	flags;
1484 			VK_FALSE,														// VkBool32									depthClampEnable;
1485 			(m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE,			// VkBool32									rasterizerDiscardEnable;
1486 			VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
1487 			VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
1488 			VK_FRONT_FACE_CLOCKWISE,										// VkFrontFace								frontFace;
1489 			VK_FALSE,														// VkBool32									depthBiasEnable;
1490 			0.0f,															// float									depthBiasConstantFactor;
1491 			0.0f,															// float									depthBiasClamp;
1492 			0.0f,															// float									depthBiasSlopeFactor;
1493 			1.0f															// float									lineWidth;
1494 		};
1495 
1496 		const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfo =
1497 		{
1498 			VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType						  sType
1499 			DE_NULL,													// const void*							  pNext
1500 			0u,															// VkPipelineMultisampleStateCreateFlags	flags
1501 			VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits					rasterizationSamples
1502 			VK_FALSE,													// VkBool32								 sampleShadingEnable
1503 			1.0f,														// float									minSampleShading
1504 			DE_NULL,													// const VkSampleMask*					  pSampleMask
1505 			VK_FALSE,													// VkBool32								 alphaToCoverageEnable
1506 			VK_FALSE													// VkBool32								 alphaToOneEnable
1507 		};
1508 
1509 		VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1510 		VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1511 
1512 		const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
1513 		{
1514 			VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType							 sType
1515 			DE_NULL,												// const void*								 pNext
1516 			(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags		  flags
1517 			1u,														// deUint32									viewportCount
1518 			&viewport,												// const VkViewport*						   pViewports
1519 			1u,														// deUint32									scissorCount
1520 			&scissor												// const VkRect2D*							 pScissors
1521 		};
1522 
1523 		Move<VkShaderModule> fs;
1524 		Move<VkShaderModule> vs;
1525 
1526 		deUint32 numStages;
1527 		if (m_data.stage == STAGE_VERTEX)
1528 		{
1529 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1530 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1531 			numStages = 1u;
1532 		}
1533 		else
1534 		{
1535 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1536 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1537 			numStages = 2u;
1538 		}
1539 
1540 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo[2] = {
1541 			{
1542 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1543 				DE_NULL,
1544 				(VkPipelineShaderStageCreateFlags)0,
1545 				VK_SHADER_STAGE_VERTEX_BIT,									// stage
1546 				*vs,														// shader
1547 				"main",
1548 				&specInfo,													// pSpecializationInfo
1549 			},
1550 			{
1551 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1552 				DE_NULL,
1553 				(VkPipelineShaderStageCreateFlags)0,
1554 				VK_SHADER_STAGE_FRAGMENT_BIT,								// stage
1555 				*fs,														// shader
1556 				"main",
1557 				&specInfo,													// pSpecializationInfo
1558 			}
1559 		};
1560 
1561 		const VkGraphicsPipelineCreateInfo				graphicsPipelineCreateInfo		=
1562 		{
1563 			VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
1564 			DE_NULL,											// const void*										pNext;
1565 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
1566 			numStages,											// deUint32											stageCount;
1567 			&shaderCreateInfo[0],								// const VkPipelineShaderStageCreateInfo*			pStages;
1568 			&vertexInputStateCreateInfo,						// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
1569 			&inputAssemblyStateCreateInfo,						// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
1570 			DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
1571 			&viewportStateCreateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
1572 			&rasterizationStateCreateInfo,						// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
1573 			&multisampleStateCreateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
1574 			DE_NULL,											// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
1575 			DE_NULL,											// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
1576 			DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
1577 			pipelineLayout.get(),								// VkPipelineLayout									layout;
1578 			renderPass.get(),									// VkRenderPass										renderPass;
1579 			0u,													// deUint32											subpass;
1580 			DE_NULL,											// VkPipeline										basePipelineHandle;
1581 			0													// int												basePipelineIndex;
1582 		};
1583 
1584 		pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1585 	}
1586 
1587 	const VkQueue					queue					= m_context.getUniversalQueue();
1588 	Move<VkCommandPool>				cmdPool					= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_context.getUniversalQueueFamilyIndex());
1589 	Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1590 
1591 	VkBufferDeviceAddressInfo addrInfo =
1592 		{
1593 			VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	// VkStructureType	sType;
1594 			DE_NULL,										// const void*		 pNext;
1595 			0,												// VkBuffer			buffer
1596 		};
1597 
1598 	VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1599 	VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1600 
1601 	VkMemoryBarrier					memBarrier =
1602 		{
1603 			VK_STRUCTURE_TYPE_MEMORY_BARRIER,	// sType
1604 			DE_NULL,							// pNext
1605 			0u,									// srcAccessMask
1606 			0u,									// dstAccessMask
1607 		};
1608 
1609 	const VkBufferCopy	copyParams =
1610 		{
1611 			(VkDeviceSize)0u,						// srcOffset
1612 			(VkDeviceSize)0u,						// dstOffset
1613 			bufferSizes[2]							// size
1614 		};
1615 
1616 	deUint32 NUM_SUBMITS = 4;
1617 
1618 	for (deUint32 x = 0; x < NUM_SUBMITS; ++x)
1619 	{
1620 		beginCommandBuffer(vk, *cmdBuffer, 0u);
1621 
1622 		if (x == 0)
1623 			vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1624 
1625 		for (deUint32 i = 0; i < 2; ++i)
1626 		{
1627 			if (!images[i])
1628 				continue;
1629 
1630 			const VkImageMemoryBarrier imageBarrier =
1631 			{
1632 				VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,				// VkStructureType		sType
1633 				DE_NULL,											// const void*			pNext
1634 				0u,													// VkAccessFlags		srcAccessMask
1635 				VK_ACCESS_TRANSFER_WRITE_BIT,						// VkAccessFlags		dstAccessMask
1636 				VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout		oldLayout
1637 				VK_IMAGE_LAYOUT_GENERAL,							// VkImageLayout		newLayout
1638 				VK_QUEUE_FAMILY_IGNORED,							// uint32_t				srcQueueFamilyIndex
1639 				VK_QUEUE_FAMILY_IGNORED,							// uint32_t				dstQueueFamilyIndex
1640 				**images[i],										// VkImage				image
1641 				{
1642 					VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask
1643 					0u,										// uint32_t				baseMipLevel
1644 					1u,										// uint32_t				mipLevels,
1645 					0u,										// uint32_t				baseArray
1646 					1u,										// uint32_t				arraySize
1647 				}
1648 			};
1649 
1650 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1651 								 (VkDependencyFlags)0,
1652 								  0, (const VkMemoryBarrier*)DE_NULL,
1653 								  0, (const VkBufferMemoryBarrier*)DE_NULL,
1654 								  1, &imageBarrier);
1655 		}
1656 
1657 		vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1658 		vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1659 
1660 		if (m_data.payloadSC == SC_PHYSBUFFER)
1661 		{
1662 			const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
1663 			addrInfo.buffer = **buffers[0];
1664 			VkDeviceAddress addr;
1665 			if (useKHR)
1666 				addr = vk.getBufferDeviceAddress(device, &addrInfo);
1667 			else
1668 				addr = vk.getBufferDeviceAddressEXT(device, &addrInfo);
1669 			vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1670 								0, sizeof(VkDeviceSize), &addr);
1671 		}
1672 		if (m_data.guardSC == SC_PHYSBUFFER)
1673 		{
1674 			const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
1675 			addrInfo.buffer = **buffers[1];
1676 			VkDeviceAddress addr;
1677 			if (useKHR)
1678 				addr = vk.getBufferDeviceAddress(device, &addrInfo);
1679 			else
1680 				addr = vk.getBufferDeviceAddressEXT(device, &addrInfo);
1681 			vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1682 								8, sizeof(VkDeviceSize), &addr);
1683 		}
1684 
1685 		for (deUint32 iters = 0; iters < 50; ++iters)
1686 		{
1687 			for (deUint32 i = 0; i < 2; ++i)
1688 			{
1689 				if (buffers[i])
1690 					vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1691 				if (images[i])
1692 					vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1693 			}
1694 
1695 			memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1696 			memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1697 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1698 				0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1699 
1700 			if (m_data.stage == STAGE_COMPUTE)
1701 			{
1702 				vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1703 			}
1704 			else
1705 			{
1706 				beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1707 								makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1708 								0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1709 				// Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1710 				if (m_data.stage == STAGE_VERTEX)
1711 				{
1712 					vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1713 				}
1714 				else
1715 				{
1716 					vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1717 				}
1718 				endRenderPass(vk, *cmdBuffer);
1719 			}
1720 
1721 			memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1722 			memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1723 			vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1724 				0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1725 		}
1726 
1727 		if (x == NUM_SUBMITS - 1)
1728 		{
1729 			vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, &copyParams);
1730 			memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1731 			memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
1732 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1733 				0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1734 		}
1735 
1736 		endCommandBuffer(vk, *cmdBuffer);
1737 
1738 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1739 
1740 		vk.resetCommandBuffer(*cmdBuffer, 0x00000000);
1741 	}
1742 
1743 	tcu::TestLog& log = m_context.getTestContext().getLog();
1744 
1745 	deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1746 	invalidateAlloc(vk, device, copyBuffer->getAllocation());
1747 	qpTestResult res = QP_TEST_RESULT_PASS;
1748 
1749 	deUint32 numErrors = 0;
1750 	for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1751 	{
1752 		if (ptr[i] != 0)
1753 		{
1754 			if (numErrors < 256)
1755 			{
1756 				log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1757 			}
1758 			numErrors++;
1759 			res = QP_TEST_RESULT_FAIL;
1760 		}
1761 	}
1762 
1763 	if (numErrors)
1764 	{
1765 		log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1766 	}
1767 
1768 	return tcu::TestStatus(res, qpGetTestResultName(res));
1769 }
1770 
1771 }	// anonymous
1772 
createTests(tcu::TestContext & testCtx)1773 tcu::TestCaseGroup*	createTests (tcu::TestContext& testCtx)
1774 {
1775 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1776 			testCtx, "memory_model", "Memory model tests"));
1777 
1778 	typedef struct
1779 	{
1780 		deUint32				value;
1781 		const char*				name;
1782 		const char*				description;
1783 	} TestGroupCase;
1784 
1785 	TestGroupCase ttCases[] =
1786 	{
1787 		{ TT_MP,	"message_passing",	"message passing"		},
1788 		{ TT_WAR,	"write_after_read",	"write after read"		},
1789 	};
1790 
1791 	TestGroupCase core11Cases[] =
1792 	{
1793 		{ 1,	"core11",	"Supported by Vulkan1.1"							},
1794 		{ 0,	"ext",		"Requires VK_KHR_vulkan_memory_model extension"		},
1795 	};
1796 
1797 	TestGroupCase dtCases[] =
1798 	{
1799 		{ DATA_TYPE_UINT,		"u32",	"uint32_t atomics"		},
1800 		{ DATA_TYPE_UINT64,		"u64",	"uint64_t atomics"		},
1801 		{ DATA_TYPE_FLOAT32,	"f32",	"float32 atomics"		},
1802 		{ DATA_TYPE_FLOAT64,	"f64",	"float64 atomics"		},
1803 	};
1804 
1805 	TestGroupCase cohCases[] =
1806 	{
1807 		{ 1,	"coherent",		"coherent payload variable"			},
1808 		{ 0,	"noncoherent",	"noncoherent payload variable"		},
1809 	};
1810 
1811 	TestGroupCase stCases[] =
1812 	{
1813 		{ ST_FENCE_FENCE,					"fence_fence",					"release fence, acquire fence"			},
1814 		{ ST_FENCE_ATOMIC,					"fence_atomic",					"release fence, atomic acquire"			},
1815 		{ ST_ATOMIC_FENCE,					"atomic_fence",					"atomic release, acquire fence"			},
1816 		{ ST_ATOMIC_ATOMIC,					"atomic_atomic",				"atomic release, atomic acquire"		},
1817 		{ ST_CONTROL_BARRIER,				"control_barrier",				"control barrier"						},
1818 		{ ST_CONTROL_AND_MEMORY_BARRIER,	"control_and_memory_barrier",	"control barrier with release/acquire"	},
1819 	};
1820 
1821 	TestGroupCase rmwCases[] =
1822 	{
1823 		{ 0,	"atomicwrite",		"atomic write"		},
1824 		{ 1,	"atomicrmw",		"atomic rmw"		},
1825 	};
1826 
1827 	TestGroupCase scopeCases[] =
1828 	{
1829 		{ SCOPE_DEVICE,			"device",		"device scope"			},
1830 		{ SCOPE_QUEUEFAMILY,	"queuefamily",	"queuefamily scope"		},
1831 		{ SCOPE_WORKGROUP,		"workgroup",	"workgroup scope"		},
1832 		{ SCOPE_SUBGROUP,		"subgroup",		"subgroup scope"		},
1833 	};
1834 
1835 	TestGroupCase plCases[] =
1836 	{
1837 		{ 0,	"payload_nonlocal",		"payload variable in non-local memory"		},
1838 		{ 1,	"payload_local",		"payload variable in local memory"			},
1839 	};
1840 
1841 	TestGroupCase pscCases[] =
1842 	{
1843 		{ SC_BUFFER,	"buffer",		"payload variable in buffer memory"			},
1844 		{ SC_IMAGE,		"image",		"payload variable in image memory"			},
1845 		{ SC_WORKGROUP,	"workgroup",	"payload variable in workgroup memory"		},
1846 		{ SC_PHYSBUFFER,"physbuffer",	"payload variable in physical storage buffer memory"	},
1847 	};
1848 
1849 	TestGroupCase glCases[] =
1850 	{
1851 		{ 0,	"guard_nonlocal",		"guard variable in non-local memory"		},
1852 		{ 1,	"guard_local",			"guard variable in local memory"			},
1853 	};
1854 
1855 	TestGroupCase gscCases[] =
1856 	{
1857 		{ SC_BUFFER,	"buffer",		"guard variable in buffer memory"			},
1858 		{ SC_IMAGE,		"image",		"guard variable in image memory"			},
1859 		{ SC_WORKGROUP,	"workgroup",	"guard variable in workgroup memory"		},
1860 		{ SC_PHYSBUFFER,"physbuffer",	"guard variable in physical storage buffer memory"	},
1861 	};
1862 
1863 	TestGroupCase stageCases[] =
1864 	{
1865 		{ STAGE_COMPUTE,	"comp",		"compute shader"			},
1866 		{ STAGE_VERTEX,		"vert",		"vertex shader"				},
1867 		{ STAGE_FRAGMENT,	"frag",		"fragment shader"			},
1868 	};
1869 
1870 
1871 	for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1872 	{
1873 		de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description));
1874 		for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1875 		{
1876 			de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description));
1877 			for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1878 			{
1879 				de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description));
1880 				for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1881 				{
1882 					de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
1883 					for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1884 					{
1885 						de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
1886 						for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1887 						{
1888 							de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description));
1889 							for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1890 							{
1891 								de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description));
1892 								for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1893 								{
1894 									de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
1895 									for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1896 									{
1897 										de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
1898 										for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1899 										{
1900 											de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
1901 											for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1902 											{
1903 												de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
1904 												for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1905 												{
1906 													CaseDef c =
1907 													{
1908 														!!plCases[plNdx].value,					// bool payloadMemLocal;
1909 														!!glCases[glNdx].value,					// bool guardMemLocal;
1910 														!!cohCases[cohNdx].value,				// bool coherent;
1911 														!!core11Cases[core11Ndx].value,			// bool core11;
1912 														!!rmwCases[rmwNdx].value,				// bool atomicRMW;
1913 														(TestType)ttCases[ttNdx].value,			// TestType testType;
1914 														(StorageClass)pscCases[pscNdx].value,	// StorageClass payloadSC;
1915 														(StorageClass)gscCases[gscNdx].value,	// StorageClass guardSC;
1916 														(Scope)scopeCases[scopeNdx].value,		// Scope scope;
1917 														(SyncType)stCases[stNdx].value,			// SyncType syncType;
1918 														(Stage)stageCases[stageNdx].value,		// Stage stage;
1919 														(DataType)dtCases[dtNdx].value,			// DataType dataType;
1920 														false,									// bool transitive;
1921 														false,									// bool transitiveVis;
1922 													};
1923 
1924 													// Mustpass11 tests should only exercise things we expect to work on
1925 													// existing implementations. Exclude noncoherent tests which require
1926 													// new extensions, and assume atomic synchronization wouldn't work
1927 													// (i.e. atomics may be implemented as relaxed atomics). Exclude
1928 													// queuefamily scope which doesn't exist in Vulkan 1.1. Exclude
1929 													// physical storage buffer which doesn't support the legacy decorations.
1930 													if (c.core11 &&
1931 														(c.coherent == 0 ||
1932 														c.syncType == ST_FENCE_ATOMIC ||
1933 														c.syncType == ST_ATOMIC_FENCE ||
1934 														c.syncType == ST_ATOMIC_ATOMIC ||
1935 														c.dataType == DATA_TYPE_UINT64 ||
1936 														c.dataType == DATA_TYPE_FLOAT64 ||
1937 														c.scope == SCOPE_QUEUEFAMILY ||
1938 														c.payloadSC == SC_PHYSBUFFER ||
1939 														c.guardSC == SC_PHYSBUFFER))
1940 													{
1941 														continue;
1942 													}
1943 
1944 													if (c.stage != STAGE_COMPUTE &&
1945 														c.scope == SCOPE_WORKGROUP)
1946 													{
1947 														continue;
1948 													}
1949 
1950 													// Don't exercise local and non-local for workgroup memory
1951 													// Also don't exercise workgroup memory for non-compute stages
1952 													if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
1953 													{
1954 														continue;
1955 													}
1956 													if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
1957 													{
1958 														continue;
1959 													}
1960 													// Can't do control barrier with larger than workgroup scope, or non-compute stages
1961 													if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1962 														(c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
1963 													{
1964 														continue;
1965 													}
1966 
1967 													// Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
1968 													if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
1969 													{
1970 														continue;
1971 													}
1972 
1973 													// uint64/float32/float64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
1974 													const bool atomicTesting = (c.dataType == DATA_TYPE_UINT64 || c.dataType == DATA_TYPE_FLOAT32 || c.dataType == DATA_TYPE_FLOAT64);
1975 													if (atomicTesting && c.syncType != ST_ATOMIC_ATOMIC)
1976 													{
1977 														continue;
1978 													}
1979 
1980 													// No 64-bit image types, so skip tests with both payload and guard in image memory
1981 													if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
1982 													{
1983 														continue;
1984 													}
1985 
1986 													// No support for atomic operations on 64-bit floating point images
1987 													if (c.dataType == DATA_TYPE_FLOAT64 && (c.payloadSC == SC_IMAGE || c.guardSC == SC_IMAGE))
1988 													{
1989 														continue;
1990 													}
1991 													// Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
1992 													if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1993 														(c.guardSC != 0 || c.guardMemLocal != 0))
1994 													{
1995 														continue;
1996 													}
1997 
1998 													gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1999 												}
2000 												glGroup->addChild(gscGroup.release());
2001 											}
2002 											pscGroup->addChild(glGroup.release());
2003 										}
2004 										plGroup->addChild(pscGroup.release());
2005 									}
2006 									scopeGroup->addChild(plGroup.release());
2007 								}
2008 								rmwGroup->addChild(scopeGroup.release());
2009 							}
2010 							stGroup->addChild(rmwGroup.release());
2011 						}
2012 						cohGroup->addChild(stGroup.release());
2013 					}
2014 					dtGroup->addChild(cohGroup.release());
2015 				}
2016 				core11Group->addChild(dtGroup.release());
2017 			}
2018 			ttGroup->addChild(core11Group.release());
2019 		}
2020 		group->addChild(ttGroup.release());
2021 	}
2022 
2023 	TestGroupCase transVisCases[] =
2024 	{
2025 		{ 0,	"nontransvis",		"destination invocation acquires"		},
2026 		{ 1,	"transvis",			"invocation 0,0 acquires"				},
2027 	};
2028 
2029 	de::MovePtr<tcu::TestCaseGroup> transGroup(new tcu::TestCaseGroup(testCtx, "transitive", "transitive"));
2030 	for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2031 	{
2032 		de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
2033 		for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2034 		{
2035 			de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
2036 			for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2037 			{
2038 				de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
2039 				for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2040 				{
2041 					de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
2042 					for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2043 					{
2044 						de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
2045 						for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2046 						{
2047 							de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
2048 							for (int visNdx = 0; visNdx < DE_LENGTH_OF_ARRAY(transVisCases); visNdx++)
2049 							{
2050 								CaseDef c =
2051 								{
2052 									!!plCases[plNdx].value,					// bool payloadMemLocal;
2053 									!!glCases[glNdx].value,					// bool guardMemLocal;
2054 									!!cohCases[cohNdx].value,				// bool coherent;
2055 									false,									// bool core11;
2056 									false,									// bool atomicRMW;
2057 									TT_MP,									// TestType testType;
2058 									(StorageClass)pscCases[pscNdx].value,	// StorageClass payloadSC;
2059 									(StorageClass)gscCases[gscNdx].value,	// StorageClass guardSC;
2060 									SCOPE_DEVICE,							// Scope scope;
2061 									(SyncType)stCases[stNdx].value,			// SyncType syncType;
2062 									STAGE_COMPUTE,							// Stage stage;
2063 									DATA_TYPE_UINT,							// DataType dataType;
2064 									true,									// bool transitive;
2065 									!!transVisCases[visNdx].value,			// bool transitiveVis;
2066 								};
2067 								if (c.payloadSC == SC_WORKGROUP || c.guardSC == SC_WORKGROUP)
2068 								{
2069 									continue;
2070 								}
2071 								if (c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
2072 								{
2073 									continue;
2074 								}
2075 								gscGroup->addChild(new MemoryModelTestCase(testCtx, transVisCases[visNdx].name, transVisCases[visNdx].description, c));
2076 							}
2077 							glGroup->addChild(gscGroup.release());
2078 						}
2079 						pscGroup->addChild(glGroup.release());
2080 					}
2081 					plGroup->addChild(pscGroup.release());
2082 				}
2083 				stGroup->addChild(plGroup.release());
2084 			}
2085 			cohGroup->addChild(stGroup.release());
2086 		}
2087 		transGroup->addChild(cohGroup.release());
2088 	}
2089 	group->addChild(transGroup.release());
2090 
2091 	// Padding tests.
2092 	group->addChild(createPaddingTests(testCtx));
2093 
2094 	return group.release();
2095 }
2096 
2097 }	// MemoryModel
2098 }	// vkt
2099