1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2018 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *	  http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Vulkan Memory Model tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMemoryModelTests.hpp"
26 
27 #include "vkBufferWithMemory.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vktTestGroupUtil.hpp"
34 #include "vktTestCase.hpp"
35 
36 #include "deDefs.h"
37 #include "deMath.h"
38 #include "deSharedPtr.hpp"
39 #include "deString.h"
40 
41 #include "tcuTestCase.hpp"
42 #include "tcuTestLog.hpp"
43 
44 #include <string>
45 #include <sstream>
46 
47 namespace vkt
48 {
49 namespace MemoryModel
50 {
51 namespace
52 {
53 using namespace vk;
54 using namespace std;
55 
56 typedef enum
57 {
58 	TT_MP = 0,  // message passing
59 	TT_WAR, // write-after-read hazard
60 } TestType;
61 
62 typedef enum
63 {
64 	ST_FENCE_FENCE = 0,
65 	ST_FENCE_ATOMIC,
66 	ST_ATOMIC_FENCE,
67 	ST_ATOMIC_ATOMIC,
68 	ST_CONTROL_BARRIER,
69 	ST_CONTROL_AND_MEMORY_BARRIER,
70 } SyncType;
71 
72 typedef enum
73 {
74 	SC_BUFFER = 0,
75 	SC_IMAGE,
76 	SC_WORKGROUP,
77 } StorageClass;
78 
79 typedef enum
80 {
81 	SCOPE_DEVICE = 0,
82 	SCOPE_QUEUEFAMILY,
83 	SCOPE_WORKGROUP,
84 	SCOPE_SUBGROUP,
85 } Scope;
86 
87 typedef enum
88 {
89 	STAGE_COMPUTE = 0,
90 	STAGE_VERTEX,
91 	STAGE_FRAGMENT,
92 } Stage;
93 
94 typedef enum
95 {
96 	DATA_TYPE_UINT = 0,
97 	DATA_TYPE_UINT64,
98 } DataType;
99 
100 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
101 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
102 
103 struct CaseDef
104 {
105 	bool payloadMemLocal;
106 	bool guardMemLocal;
107 	bool coherent;
108 	bool core11;
109 	bool atomicRMW;
110 	TestType testType;
111 	StorageClass payloadSC;
112 	StorageClass guardSC;
113 	Scope scope;
114 	SyncType syncType;
115 	Stage stage;
116 	DataType dataType;
117 };
118 
119 class MemoryModelTestInstance : public TestInstance
120 {
121 public:
122 						MemoryModelTestInstance	(Context& context, const CaseDef& data);
123 						~MemoryModelTestInstance	(void);
124 	tcu::TestStatus		iterate				(void);
125 private:
126 	CaseDef			m_data;
127 
128 	enum
129 	{
130 		WIDTH = 256,
131 		HEIGHT = 256
132 	};
133 };
134 
MemoryModelTestInstance(Context & context,const CaseDef & data)135 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
136 	: vkt::TestInstance		(context)
137 	, m_data				(data)
138 {
139 }
140 
~MemoryModelTestInstance(void)141 MemoryModelTestInstance::~MemoryModelTestInstance (void)
142 {
143 }
144 
145 class MemoryModelTestCase : public TestCase
146 {
147 	public:
148 								MemoryModelTestCase		(tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
149 								~MemoryModelTestCase	(void);
150 	virtual	void				initPrograms		(SourceCollections& programCollection) const;
151 	virtual TestInstance*		createInstance		(Context& context) const;
152 	virtual void				checkSupport		(Context& context) const;
153 
154 private:
155 	CaseDef					m_data;
156 };
157 
MemoryModelTestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)158 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
159 	: vkt::TestCase	(context, name, desc)
160 	, m_data		(data)
161 {
162 }
163 
~MemoryModelTestCase(void)164 MemoryModelTestCase::~MemoryModelTestCase	(void)
165 {
166 }
167 
checkSupport(Context & context) const168 void MemoryModelTestCase::checkSupport(Context& context) const
169 {
170 	if (!context.contextSupports(vk::ApiVersion(1, 1, 0)))
171 	{
172 		TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
173 	}
174 
175 	if (!m_data.core11)
176 	{
177 		if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
178 		{
179 			TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
180 		}
181 
182 		if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
183 		{
184 			TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
185 		}
186 	}
187 
188 	if (m_data.scope == SCOPE_SUBGROUP)
189 	{
190 		// Check for subgroup support for scope_subgroup tests.
191 		VkPhysicalDeviceSubgroupProperties subgroupProperties;
192 		subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
193 		subgroupProperties.pNext = DE_NULL;
194 		subgroupProperties.supportedOperations = 0;
195 
196 		VkPhysicalDeviceProperties2 properties;
197 		properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
198 		properties.pNext = &subgroupProperties;
199 
200 		context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
201 
202 		if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
203 			!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
204 			!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
205 		{
206 			TCU_THROW(NotSupportedError, "Subgroup features not supported");
207 		}
208 	}
209 	if (m_data.dataType == DATA_TYPE_UINT64)
210 	{
211 		if (!context.getDeviceFeatures().shaderInt64)
212 		{
213 			TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
214 		}
215 		if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
216 			m_data.guardSC == SC_BUFFER)
217 		{
218 			TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
219 		}
220 		if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
221 			m_data.guardSC == SC_WORKGROUP)
222 		{
223 			TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
224 		}
225 	}
226 	if (m_data.stage == STAGE_VERTEX)
227 	{
228 		if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
229 		{
230 			TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
231 		}
232 	}
233 	if (m_data.stage == STAGE_FRAGMENT)
234 	{
235 		if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
236 		{
237 			TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
238 		}
239 	}
240 }
241 
242 
initPrograms(SourceCollections & programCollection) const243 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
244 {
245 	Scope invocationMapping = m_data.scope;
246 	if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
247 		(m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
248 	{
249 		invocationMapping = SCOPE_WORKGROUP;
250 	}
251 
252 	const char *scopeStr;
253 	switch (m_data.scope)
254 	{
255 	default: DE_ASSERT(0); // fall through
256 	case SCOPE_DEVICE:		scopeStr = "gl_ScopeDevice"; break;
257 	case SCOPE_QUEUEFAMILY:	scopeStr = "gl_ScopeQueueFamily"; break;
258 	case SCOPE_WORKGROUP:	scopeStr = "gl_ScopeWorkgroup"; break;
259 	case SCOPE_SUBGROUP:	scopeStr = "gl_ScopeSubgroup"; break;
260 	}
261 
262 	const char *typeStr = m_data.dataType == DATA_TYPE_UINT64 ? "uint64_t" : "uint";
263 
264 	// Construct storageSemantics strings. Both release and acquire
265 	// always have the payload storage class. They only include the
266 	// guard storage class if they're using FENCE for that side of the
267 	// sync.
268 	std::stringstream storageSemanticsRelease;
269 	switch (m_data.payloadSC)
270 	{
271 	default: DE_ASSERT(0); // fall through
272 	case SC_BUFFER:		storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
273 	case SC_IMAGE:		storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
274 	case SC_WORKGROUP:	storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
275 	}
276 	std::stringstream storageSemanticsAcquire;
277 	storageSemanticsAcquire << storageSemanticsRelease.str();
278 	if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
279 	{
280 		switch (m_data.guardSC)
281 		{
282 		default: DE_ASSERT(0); // fall through
283 		case SC_BUFFER:		storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
284 		case SC_IMAGE:		storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
285 		case SC_WORKGROUP:	storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
286 		}
287 	}
288 	if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
289 	{
290 		switch (m_data.guardSC)
291 		{
292 		default: DE_ASSERT(0); // fall through
293 		case SC_BUFFER:		storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
294 		case SC_IMAGE:		storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
295 		case SC_WORKGROUP:	storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
296 		}
297 	}
298 
299 	std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
300 
301 	semanticsRelease << "gl_SemanticsRelease";
302 	semanticsAcquire << "gl_SemanticsAcquire";
303 	semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
304 	if (!m_data.coherent && m_data.testType != TT_WAR)
305 	{
306 		DE_ASSERT(!m_data.core11);
307 		semanticsRelease << " | gl_SemanticsMakeAvailable";
308 		semanticsAcquire << " | gl_SemanticsMakeVisible";
309 		semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
310 	}
311 
312 	std::stringstream css;
313 	css << "#version 450 core\n";
314 	if (!m_data.core11)
315 	{
316 		css << "#pragma use_vulkan_memory_model\n";
317 	}
318 	css <<
319 		"#extension GL_KHR_shader_subgroup_basic : enable\n"
320 		"#extension GL_KHR_shader_subgroup_shuffle : enable\n"
321 		"#extension GL_KHR_shader_subgroup_ballot : enable\n"
322 		"#extension GL_KHR_memory_scope_semantics : enable\n"
323 		"#extension GL_ARB_gpu_shader_int64 : enable\n"
324 		"// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
325 		"layout(constant_id = 0) const int DIM = 1;\n"
326 		"layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
327 		"struct S { " << typeStr << " x[DIM*DIM]; };\n";
328 
329 	if (m_data.stage == STAGE_COMPUTE)
330 	{
331 		css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
332 	}
333 
334 	const char *memqual = "";
335 	if (m_data.coherent)
336 	{
337 		if (m_data.core11)
338 		{
339 			// Vulkan 1.1 only has "coherent", use it regardless of scope
340 			memqual = "coherent";
341 		}
342 		else
343 		{
344 			switch (m_data.scope)
345 			{
346 			default: DE_ASSERT(0); // fall through
347 			case SCOPE_DEVICE:		memqual = "devicecoherent"; break;
348 			case SCOPE_QUEUEFAMILY:	memqual = "queuefamilycoherent"; break;
349 			case SCOPE_WORKGROUP:	memqual = "workgroupcoherent"; break;
350 			case SCOPE_SUBGROUP:	memqual = "subgroupcoherent"; break;
351 			}
352 		}
353 	}
354 	else
355 	{
356 		DE_ASSERT(!m_data.core11);
357 		memqual = "nonprivate";
358 	}
359 
360 	// Declare payload, guard, and fail resources
361 	switch (m_data.payloadSC)
362 	{
363 	default: DE_ASSERT(0); // fall through
364 	case SC_BUFFER:		css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
365 	case SC_IMAGE:		css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n"; break;
366 	case SC_WORKGROUP:	css << "shared S payload;\n"; break;
367 	}
368 	if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
369 	{
370 		// The guard variable is only accessed with atomics and need not be declared coherent.
371 		switch (m_data.guardSC)
372 		{
373 		default: DE_ASSERT(0); // fall through
374 		case SC_BUFFER:		css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
375 		case SC_IMAGE:		css << "layout(set=0, binding=1, r32ui) uniform uimage2D guard;\n"; break;
376 		case SC_WORKGROUP:	css << "shared S guard;\n"; break;
377 		}
378 	}
379 
380 	css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
381 
382 	css <<
383 		"void main()\n"
384 		"{\n"
385 		"   bool pass = true;\n"
386 		"   bool skip = false;\n";
387 
388 	if (m_data.stage == STAGE_FRAGMENT)
389 	{
390 		// Kill helper invocations so they don't load outside the bounds of the SSBO.
391 		// Helper pixels are also initially "active" and if a thread gets one as its
392 		// partner in SCOPE_SUBGROUP mode, it can't run the test.
393 		css << "   if (gl_HelperInvocation) { return; }\n";
394 	}
395 
396 	// Compute coordinates based on the storage class and scope.
397 	// For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
398 	// For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
399 	// For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
400 	switch (invocationMapping)
401 	{
402 	default: DE_ASSERT(0); // fall through
403 	case SCOPE_SUBGROUP:
404 		// If the partner invocation isn't active, the shuffle below will be undefined. Bail.
405 		css << "   uvec4 ballot = subgroupBallot(true);\n"
406 			   "   if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
407 
408 		switch (m_data.stage)
409 		{
410 		default: DE_ASSERT(0); // fall through
411 		case STAGE_COMPUTE:
412 			css <<
413 			"   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
414 			"   ivec2 partnerLocalId    = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
415 			"   uint sharedCoord        = localId.y * DIM + localId.x;\n"
416 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
417 			"   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
418 			"   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
419 			"   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
420 			"   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
421 			break;
422 		case STAGE_VERTEX:
423 			css <<
424 			"   uint bufferCoord        = gl_VertexIndex;\n"
425 			"   uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
426 			"   ivec2 imageCoord        = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
427 			"   ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
428 			"   gl_PointSize            = 1.0f;\n"
429 			"   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
430 			break;
431 		case STAGE_FRAGMENT:
432 			css <<
433 			"   ivec2 localId        = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
434 			"   ivec2 groupId        = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
435 			"   ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
436 			"   ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
437 			"   uint sharedCoord     = localId.y * DIM + localId.x;\n"
438 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
439 			"   uint bufferCoord     = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
440 			"   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
441 			"   ivec2 imageCoord     = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
442 			"   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
443 			break;
444 		}
445 		break;
446 	case SCOPE_WORKGROUP:
447 		css <<
448 		"   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
449 		"   ivec2 partnerLocalId    = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
450 		"   uint sharedCoord        = localId.y * DIM + localId.x;\n"
451 		"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
452 		"   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
453 		"   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
454 		"   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
455 		"   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
456 		break;
457 	case SCOPE_QUEUEFAMILY:
458 	case SCOPE_DEVICE:
459 		switch (m_data.stage)
460 		{
461 		default: DE_ASSERT(0); // fall through
462 		case STAGE_COMPUTE:
463 			css <<
464 			"   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
465 			"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
466 			"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
467 			"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
468 			"   ivec2 imageCoord        = globalId;\n"
469 			"   ivec2 partnerImageCoord = partnerGlobalId;\n";
470 			break;
471 		case STAGE_VERTEX:
472 			css <<
473 			"   ivec2 globalId          = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
474 			"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
475 			"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
476 			"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
477 			"   ivec2 imageCoord        = globalId;\n"
478 			"   ivec2 partnerImageCoord = partnerGlobalId;\n"
479 			"   gl_PointSize            = 1.0f;\n"
480 			"   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
481 			break;
482 		case STAGE_FRAGMENT:
483 			css <<
484 			"   ivec2 localId       = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
485 			"   ivec2 groupId       = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
486 			"   ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
487 			"   ivec2 partnerGroupId = groupId;\n"
488 			"   uint sharedCoord    = localId.y * DIM + localId.x;\n"
489 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
490 			"   uint bufferCoord    = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
491 			"   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
492 			"   ivec2 imageCoord    = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
493 			"   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
494 			break;
495 		}
496 		break;
497 	}
498 
499 	// Initialize shared memory, followed by a barrier
500 	if (m_data.payloadSC == SC_WORKGROUP)
501 	{
502 		css << "   payload.x[sharedCoord] = 0;\n";
503 	}
504 	if (m_data.guardSC == SC_WORKGROUP)
505 	{
506 		css << "   guard.x[sharedCoord] = 0;\n";
507 	}
508 	if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
509 	{
510 		switch (invocationMapping)
511 		{
512 		default: DE_ASSERT(0); // fall through
513 		case SCOPE_SUBGROUP:	css << "   subgroupBarrier();\n"; break;
514 		case SCOPE_WORKGROUP:	css << "   barrier();\n"; break;
515 		}
516 	}
517 
518 	if (m_data.testType == TT_MP)
519 	{
520 		// Store payload
521 		switch (m_data.payloadSC)
522 		{
523 		default: DE_ASSERT(0); // fall through
524 		case SC_BUFFER:		css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
525 		case SC_IMAGE:		css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
526 		case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
527 		}
528 	}
529 	else
530 	{
531 		DE_ASSERT(m_data.testType == TT_WAR);
532 		// Load payload
533 		switch (m_data.payloadSC)
534 		{
535 		default: DE_ASSERT(0); // fall through
536 		case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
537 		case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
538 		case SC_WORKGROUP:	css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
539 		}
540 	}
541 	if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
542 	{
543 		// Acquire and release separate from control barrier
544 		css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
545 			   "   controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
546 			   "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
547 	}
548 	else if (m_data.syncType == ST_CONTROL_BARRIER)
549 	{
550 		// Control barrier performs both acquire and release
551 		css << "   controlBarrier(" << scopeStr << ", " << scopeStr << ", "
552 									<< storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
553 									<< semanticsAcquireRelease.str() << ");\n";
554 	}
555 	else
556 	{
557 		// Release barrier
558 		std::stringstream atomicReleaseSemantics;
559 		if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
560 		{
561 			css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
562 			atomicReleaseSemantics << ", 0, 0";
563 		}
564 		else
565 		{
566 			atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
567 		}
568 		// Atomic store guard
569 		if (m_data.atomicRMW)
570 		{
571 			switch (m_data.guardSC)
572 			{
573 			default: DE_ASSERT(0); // fall through
574 			case SC_BUFFER:		css << "   atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
575 			case SC_IMAGE:		css << "   imageAtomicExchange(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
576 			case SC_WORKGROUP:	css << "   atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
577 			}
578 		}
579 		else
580 		{
581 			switch (m_data.guardSC)
582 			{
583 			default: DE_ASSERT(0); // fall through
584 			case SC_BUFFER:		css << "   atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
585 			case SC_IMAGE:		css << "   imageAtomicStore(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
586 			case SC_WORKGROUP:	css << "   atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
587 			}
588 		}
589 
590 		std::stringstream atomicAcquireSemantics;
591 		if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
592 		{
593 			atomicAcquireSemantics << ", 0, 0";
594 		}
595 		else
596 		{
597 			atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
598 		}
599 		// Atomic load guard
600 		if (m_data.atomicRMW)
601 		{
602 			switch (m_data.guardSC)
603 			{
604 			default: DE_ASSERT(0); // fall through
605 			case SC_BUFFER:		css << "   skip = atomicExchange(guard.x[partnerBufferCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
606 			case SC_IMAGE:		css << "   skip = imageAtomicExchange(guard, partnerImageCoord, 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
607 			case SC_WORKGROUP:	css << "   skip = atomicExchange(guard.x[partnerSharedCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
608 			}
609 		} else
610 		{
611 			switch (m_data.guardSC)
612 			{
613 			default: DE_ASSERT(0); // fall through
614 			case SC_BUFFER:		css << "   skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
615 			case SC_IMAGE:		css << "   skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
616 			case SC_WORKGROUP:	css << "   skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
617 			}
618 		}
619 		// Acquire barrier
620 		if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
621 		{
622 			css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
623 		}
624 	}
625 	if (m_data.testType == TT_MP)
626 	{
627 		// Load payload
628 		switch (m_data.payloadSC)
629 		{
630 		default: DE_ASSERT(0); // fall through
631 		case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
632 		case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
633 		case SC_WORKGROUP:	css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
634 		}
635 		css <<
636 			"   if (!skip && r != partnerBufferCoord) { fail.x[bufferCoord] = 1; }\n"
637 			"}\n";
638 	}
639 	else
640 	{
641 		DE_ASSERT(m_data.testType == TT_WAR);
642 		// Store payload, only if the partner invocation has already done its read
643 		css << "   if (!skip) {\n   ";
644 		switch (m_data.payloadSC)
645 		{
646 		default: DE_ASSERT(0); // fall through
647 		case SC_BUFFER:		css << "   payload.x[bufferCoord] = bufferCoord;\n"; break;
648 		case SC_IMAGE:		css << "   imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n"; break;
649 		case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = bufferCoord;\n"; break;
650 		}
651 		css <<
652 			"   }\n"
653 			"   if (r != 0) { fail.x[bufferCoord] = 1; }\n"
654 			"}\n";
655 	}
656 
657 	// Draw a fullscreen triangle strip based on gl_VertexIndex
658 	std::stringstream vss;
659 	vss <<
660 		"#version 450 core\n"
661 		"vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
662 		"void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
663 
664 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
665 
666 	switch (m_data.stage)
667 	{
668 	default: DE_ASSERT(0); // fall through
669 	case STAGE_COMPUTE:
670 		programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
671 		break;
672 	case STAGE_VERTEX:
673 		programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
674 		break;
675 	case STAGE_FRAGMENT:
676 		programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
677 		programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
678 		break;
679 	}
680 }
681 
createInstance(Context & context) const682 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
683 {
684 	return new MemoryModelTestInstance(context, m_data);
685 }
686 
makeBufferCreateInfo(const VkDeviceSize bufferSize,const VkBufferUsageFlags usage)687 VkBufferCreateInfo makeBufferCreateInfo (const VkDeviceSize			bufferSize,
688 										 const VkBufferUsageFlags	usage)
689 {
690 	const VkBufferCreateInfo bufferCreateInfo =
691 	{
692 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
693 		DE_NULL,								// const void*			pNext;
694 		(VkBufferCreateFlags)0,					// VkBufferCreateFlags	flags;
695 		bufferSize,								// VkDeviceSize			size;
696 		usage,									// VkBufferUsageFlags	usage;
697 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
698 		0u,										// deUint32				queueFamilyIndexCount;
699 		DE_NULL,								// const deUint32*		pQueueFamilyIndices;
700 	};
701 	return bufferCreateInfo;
702 }
703 
makeDescriptorSet(const DeviceInterface & vk,const VkDevice device,const VkDescriptorPool descriptorPool,const VkDescriptorSetLayout setLayout)704 Move<VkDescriptorSet> makeDescriptorSet (const DeviceInterface&			vk,
705 										 const VkDevice					device,
706 										 const VkDescriptorPool			descriptorPool,
707 										 const VkDescriptorSetLayout	setLayout)
708 {
709 	const VkDescriptorSetAllocateInfo allocateParams =
710 	{
711 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,		// VkStructureType				sType;
712 		DE_NULL,											// const void*					pNext;
713 		descriptorPool,										// VkDescriptorPool				descriptorPool;
714 		1u,													// deUint32						setLayoutCount;
715 		&setLayout,											// const VkDescriptorSetLayout*	pSetLayouts;
716 	};
717 	return allocateDescriptorSet(vk, device, &allocateParams);
718 }
719 
iterate(void)720 tcu::TestStatus MemoryModelTestInstance::iterate (void)
721 {
722 	const DeviceInterface&	vk						= m_context.getDeviceInterface();
723 	const VkDevice			device					= m_context.getDevice();
724 	Allocator&				allocator				= m_context.getDefaultAllocator();
725 
726 	VkPhysicalDeviceProperties2 properties;
727 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
728 	properties.pNext = NULL;
729 
730 	m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
731 
732 	deUint32 DIM = 31;
733 	deUint32 NUM_WORKGROUP_EACH_DIM = 8;
734 	// If necessary, shrink workgroup size to fit HW limits
735 	if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
736 	{
737 		DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
738 	}
739 	deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
740 
741 	VkDeviceSize bufferSizes[3];
742 	de::MovePtr<BufferWithMemory> buffers[3];
743 	vk::VkDescriptorBufferInfo bufferDescriptors[3];
744 	de::MovePtr<BufferWithMemory> copyBuffer;
745 
746 	for (deUint32 i = 0; i < 3; ++i)
747 	{
748 		size_t elementSize = m_data.dataType == DATA_TYPE_UINT64 ? sizeof(deUint64) : sizeof(deUint32);
749 		// buffer2 is the "fail" buffer, and is always uint
750 		if (i == 2)
751 			elementSize = sizeof(deUint32);
752 		bufferSizes[i] = NUM_INVOCATIONS * elementSize;
753 
754 		bool local;
755 		switch (i)
756 		{
757 		default: DE_ASSERT(0); // fall through
758 		case 0:
759 			if (m_data.payloadSC != SC_BUFFER)
760 				continue;
761 			local = m_data.payloadMemLocal;
762 			break;
763 		case 1:
764 			if (m_data.guardSC != SC_BUFFER)
765 				continue;
766 			local = m_data.guardMemLocal;
767 			break;
768 		case 2: local = true; break;
769 		}
770 
771 		try
772 		{
773 			buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
774 				vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
775 				local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
776 		}
777 		catch (const tcu::NotSupportedError&)
778 		{
779 			if (!local)
780 			{
781 				TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
782 			}
783 			throw;
784 		}
785 		bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
786 	}
787 
788 	// Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
789 	try
790 	{
791 		copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
792 			vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
793 	}
794 	catch (const tcu::NotSupportedError&)
795 	{
796 		copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
797 			vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
798 	}
799 
800 	const VkImageCreateInfo			imageCreateInfo			=
801 	{
802 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType		  sType;
803 		DE_NULL,								// const void*			  pNext;
804 		(VkImageCreateFlags)0u,					// VkImageCreateFlags	   flags;
805 		VK_IMAGE_TYPE_2D,						// VkImageType			  imageType;
806 		VK_FORMAT_R32_UINT,						// VkFormat				 format;
807 		{
808 			DIM*NUM_WORKGROUP_EACH_DIM,	// deUint32	width;
809 			DIM*NUM_WORKGROUP_EACH_DIM,	// deUint32	height;
810 			1u		// deUint32	depth;
811 		},										// VkExtent3D			   extent;
812 		1u,										// deUint32				 mipLevels;
813 		1u,										// deUint32				 arrayLayers;
814 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
815 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
816 		VK_IMAGE_USAGE_STORAGE_BIT
817 		| VK_IMAGE_USAGE_TRANSFER_SRC_BIT
818 		| VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
819 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
820 		0u,										// deUint32				 queueFamilyIndexCount;
821 		DE_NULL,								// const deUint32*		  pQueueFamilyIndices;
822 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
823 	};
824 	VkImageViewCreateInfo		imageViewCreateInfo		=
825 	{
826 		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	// VkStructureType			sType;
827 		DE_NULL,									// const void*				pNext;
828 		(VkImageViewCreateFlags)0u,					// VkImageViewCreateFlags	 flags;
829 		DE_NULL,									// VkImage					image;
830 		VK_IMAGE_VIEW_TYPE_2D,						// VkImageViewType			viewType;
831 		VK_FORMAT_R32_UINT,										// VkFormat				   format;
832 		{
833 			VK_COMPONENT_SWIZZLE_R,	// VkComponentSwizzle	r;
834 			VK_COMPONENT_SWIZZLE_G,	// VkComponentSwizzle	g;
835 			VK_COMPONENT_SWIZZLE_B,	// VkComponentSwizzle	b;
836 			VK_COMPONENT_SWIZZLE_A	// VkComponentSwizzle	a;
837 		},											// VkComponentMapping		 components;
838 		{
839 			VK_IMAGE_ASPECT_COLOR_BIT,	// VkImageAspectFlags	aspectMask;
840 			0u,							// deUint32			  baseMipLevel;
841 			1u,							// deUint32			  levelCount;
842 			0u,							// deUint32			  baseArrayLayer;
843 			1u							// deUint32			  layerCount;
844 		}											// VkImageSubresourceRange	subresourceRange;
845 	};
846 
847 
848 	de::MovePtr<ImageWithMemory> images[2];
849 	Move<VkImageView> imageViews[2];
850 	vk::VkDescriptorImageInfo imageDescriptors[2];
851 
852 	for (deUint32 i = 0; i < 2; ++i)
853 	{
854 
855 		bool local;
856 		switch (i)
857 		{
858 		default: DE_ASSERT(0); // fall through
859 		case 0:
860 			if (m_data.payloadSC != SC_IMAGE)
861 				continue;
862 			local = m_data.payloadMemLocal;
863 			break;
864 		case 1:
865 			if (m_data.guardSC != SC_IMAGE)
866 				continue;
867 			local = m_data.guardMemLocal;
868 			break;
869 		}
870 
871 		try
872 		{
873 			images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
874 				vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
875 		}
876 		catch (const tcu::NotSupportedError&)
877 		{
878 			if (!local)
879 			{
880 				TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
881 			}
882 			throw;
883 		}
884 		imageViewCreateInfo.image = **images[i];
885 		imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
886 
887 		imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
888 	}
889 
890 	vk::DescriptorSetLayoutBuilder layoutBuilder;
891 
892 	switch (m_data.payloadSC)
893 	{
894 	default:
895 	case SC_BUFFER:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
896 	case SC_IMAGE:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
897 	}
898 	switch (m_data.guardSC)
899 	{
900 	default:
901 	case SC_BUFFER:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
902 	case SC_IMAGE:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
903 	}
904 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
905 
906 	vk::Unique<vk::VkDescriptorSetLayout>	descriptorSetLayout(layoutBuilder.build(vk, device));
907 
908 	vk::Unique<vk::VkDescriptorPool>		descriptorPool(vk::DescriptorPoolBuilder()
909 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
910 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
911 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
912 	vk::Unique<vk::VkDescriptorSet>			descriptorSet		(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
913 
914 	vk::DescriptorSetUpdateBuilder setUpdateBuilder;
915 	switch (m_data.payloadSC)
916 	{
917 	default: DE_ASSERT(0); // fall through
918 	case SC_WORKGROUP:
919 		break;
920 	case SC_BUFFER:
921 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
922 			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
923 		break;
924 	case SC_IMAGE:
925 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
926 			VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
927 		break;
928 	}
929 	switch (m_data.guardSC)
930 	{
931 	default: DE_ASSERT(0); // fall through
932 	case SC_WORKGROUP:
933 		break;
934 	case SC_BUFFER:
935 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
936 			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
937 		break;
938 	case SC_IMAGE:
939 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
940 			VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
941 		break;
942 	}
943 	setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
944 		VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
945 
946 	setUpdateBuilder.update(vk, device);
947 
948 
949 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
950 	{
951 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
952 		DE_NULL,													// pNext
953 		(VkPipelineLayoutCreateFlags)0,
954 		1,															// setLayoutCount
955 		&descriptorSetLayout.get(),									// pSetLayouts
956 		0u,															// pushConstantRangeCount
957 		DE_NULL,													// pPushConstantRanges
958 	};
959 
960 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
961 
962 	Move<VkPipeline> pipeline;
963 	Move<VkRenderPass> renderPass;
964 	Move<VkFramebuffer> framebuffer;
965 
966 	VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
967 
968 	const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
969 
970 	const vk::VkSpecializationMapEntry entries[3] =
971 	{
972 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
973 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
974 	};
975 
976 	const vk::VkSpecializationInfo specInfo =
977 	{
978 		2,						// mapEntryCount
979 		entries,				// pMapEntries
980 		sizeof(specData),		// dataSize
981 		specData				// pData
982 	};
983 
984 	if (m_data.stage == STAGE_COMPUTE)
985 	{
986 		const Unique<VkShaderModule>	shader						(createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
987 
988 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo =
989 		{
990 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
991 			DE_NULL,
992 			(VkPipelineShaderStageCreateFlags)0,
993 			VK_SHADER_STAGE_COMPUTE_BIT,								// stage
994 			*shader,													// shader
995 			"main",
996 			&specInfo,													// pSpecializationInfo
997 		};
998 
999 		const VkComputePipelineCreateInfo		pipelineCreateInfo =
1000 		{
1001 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1002 			DE_NULL,
1003 			0u,															// flags
1004 			shaderCreateInfo,											// cs
1005 			*pipelineLayout,											// layout
1006 			(vk::VkPipeline)0,											// basePipelineHandle
1007 			0u,															// basePipelineIndex
1008 		};
1009 		pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1010 	}
1011 	else
1012 	{
1013 
1014 		const vk::VkSubpassDescription		subpassDesc			=
1015 		{
1016 			(vk::VkSubpassDescriptionFlags)0,
1017 			vk::VK_PIPELINE_BIND_POINT_GRAPHICS,					// pipelineBindPoint
1018 			0u,														// inputCount
1019 			DE_NULL,												// pInputAttachments
1020 			0u,														// colorCount
1021 			DE_NULL,												// pColorAttachments
1022 			DE_NULL,												// pResolveAttachments
1023 			DE_NULL,												// depthStencilAttachment
1024 			0u,														// preserveCount
1025 			DE_NULL,												// pPreserveAttachments
1026 
1027 		};
1028 		const vk::VkRenderPassCreateInfo	renderPassParams	=
1029 		{
1030 			vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// sType
1031 			DE_NULL,												// pNext
1032 			(vk::VkRenderPassCreateFlags)0,
1033 			0u,														// attachmentCount
1034 			DE_NULL,												// pAttachments
1035 			1u,														// subpassCount
1036 			&subpassDesc,											// pSubpasses
1037 			0u,														// dependencyCount
1038 			DE_NULL,												// pDependencies
1039 		};
1040 
1041 		renderPass = createRenderPass(vk, device, &renderPassParams);
1042 
1043 		const vk::VkFramebufferCreateInfo	framebufferParams	=
1044 		{
1045 			vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// sType
1046 			DE_NULL,										// pNext
1047 			(vk::VkFramebufferCreateFlags)0,
1048 			*renderPass,									// renderPass
1049 			0u,												// attachmentCount
1050 			DE_NULL,										// pAttachments
1051 			DIM*NUM_WORKGROUP_EACH_DIM,						// width
1052 			DIM*NUM_WORKGROUP_EACH_DIM,						// height
1053 			1u,												// layers
1054 		};
1055 
1056 		framebuffer = createFramebuffer(vk, device, &framebufferParams);
1057 
1058 		const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfo		=
1059 		{
1060 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType							sType;
1061 			DE_NULL,													// const void*								pNext;
1062 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags	flags;
1063 			0u,															// deUint32									vertexBindingDescriptionCount;
1064 			DE_NULL,													// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
1065 			0u,															// deUint32									vertexAttributeDescriptionCount;
1066 			DE_NULL														// const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
1067 		};
1068 
1069 		const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo	=
1070 		{
1071 			VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
1072 			DE_NULL,														// const void*								pNext;
1073 			(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags	flags;
1074 			(m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology						topology;
1075 			VK_FALSE														// VkBool32									primitiveRestartEnable;
1076 		};
1077 
1078 		const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfo	=
1079 		{
1080 			VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType							sType;
1081 			DE_NULL,														// const void*								pNext;
1082 			(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags	flags;
1083 			VK_FALSE,														// VkBool32									depthClampEnable;
1084 			(m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE,			// VkBool32									rasterizerDiscardEnable;
1085 			VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
1086 			VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
1087 			VK_FRONT_FACE_CLOCKWISE,										// VkFrontFace								frontFace;
1088 			VK_FALSE,														// VkBool32									depthBiasEnable;
1089 			0.0f,															// float									depthBiasConstantFactor;
1090 			0.0f,															// float									depthBiasClamp;
1091 			0.0f,															// float									depthBiasSlopeFactor;
1092 			1.0f															// float									lineWidth;
1093 		};
1094 
1095 		const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfo =
1096 		{
1097 			VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType						  sType
1098 			DE_NULL,													// const void*							  pNext
1099 			0u,															// VkPipelineMultisampleStateCreateFlags	flags
1100 			VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits					rasterizationSamples
1101 			VK_FALSE,													// VkBool32								 sampleShadingEnable
1102 			1.0f,														// float									minSampleShading
1103 			DE_NULL,													// const VkSampleMask*					  pSampleMask
1104 			VK_FALSE,													// VkBool32								 alphaToCoverageEnable
1105 			VK_FALSE													// VkBool32								 alphaToOneEnable
1106 		};
1107 
1108 		VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1109 		VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1110 
1111 		const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
1112 		{
1113 			VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType							 sType
1114 			DE_NULL,												// const void*								 pNext
1115 			(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags		  flags
1116 			1u,														// deUint32									viewportCount
1117 			&viewport,												// const VkViewport*						   pViewports
1118 			1u,														// deUint32									scissorCount
1119 			&scissor												// const VkRect2D*							 pScissors
1120 		};
1121 
1122 		Move<VkShaderModule> fs;
1123 		Move<VkShaderModule> vs;
1124 
1125 		deUint32 numStages;
1126 		if (m_data.stage == STAGE_VERTEX)
1127 		{
1128 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1129 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1130 			numStages = 1u;
1131 		}
1132 		else
1133 		{
1134 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1135 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1136 			numStages = 2u;
1137 		}
1138 
1139 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo[2] = {
1140 			{
1141 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1142 				DE_NULL,
1143 				(VkPipelineShaderStageCreateFlags)0,
1144 				VK_SHADER_STAGE_VERTEX_BIT,									// stage
1145 				*vs,														// shader
1146 				"main",
1147 				&specInfo,													// pSpecializationInfo
1148 			},
1149 			{
1150 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1151 				DE_NULL,
1152 				(VkPipelineShaderStageCreateFlags)0,
1153 				VK_SHADER_STAGE_FRAGMENT_BIT,								// stage
1154 				*fs,														// shader
1155 				"main",
1156 				&specInfo,													// pSpecializationInfo
1157 			}
1158 		};
1159 
1160 		const VkGraphicsPipelineCreateInfo				graphicsPipelineCreateInfo		=
1161 		{
1162 			VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
1163 			DE_NULL,											// const void*										pNext;
1164 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
1165 			numStages,											// deUint32											stageCount;
1166 			&shaderCreateInfo[0],								// const VkPipelineShaderStageCreateInfo*			pStages;
1167 			&vertexInputStateCreateInfo,						// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
1168 			&inputAssemblyStateCreateInfo,						// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
1169 			DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
1170 			&viewportStateCreateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
1171 			&rasterizationStateCreateInfo,						// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
1172 			&multisampleStateCreateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
1173 			DE_NULL,											// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
1174 			DE_NULL,											// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
1175 			DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
1176 			pipelineLayout.get(),								// VkPipelineLayout									layout;
1177 			renderPass.get(),									// VkRenderPass										renderPass;
1178 			0u,													// deUint32											subpass;
1179 			DE_NULL,											// VkPipeline										basePipelineHandle;
1180 			0													// int												basePipelineIndex;
1181 		};
1182 
1183 		pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1184 	}
1185 
1186 	const VkQueue				queue				= m_context.getUniversalQueue();
1187 	Move<VkCommandPool>				cmdPool					= createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
1188 	Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1189 
1190 	beginCommandBuffer(vk, *cmdBuffer, 0u);
1191 
1192 	vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1193 
1194 	for (deUint32 i = 0; i < 2; ++i)
1195 	{
1196 		if (!images[i])
1197 			continue;
1198 
1199 		const VkImageMemoryBarrier imageBarrier =
1200 		{
1201 			VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,				// VkStructureType		sType
1202 			DE_NULL,											// const void*			pNext
1203 			0u,													// VkAccessFlags		srcAccessMask
1204 			VK_ACCESS_TRANSFER_WRITE_BIT,						// VkAccessFlags		dstAccessMask
1205 			VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout		oldLayout
1206 			VK_IMAGE_LAYOUT_GENERAL,							// VkImageLayout		newLayout
1207 			VK_QUEUE_FAMILY_IGNORED,							// uint32_t				srcQueueFamilyIndex
1208 			VK_QUEUE_FAMILY_IGNORED,							// uint32_t				dstQueueFamilyIndex
1209 			**images[i],										// VkImage				image
1210 			{
1211 				VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask
1212 				0u,										// uint32_t				baseMipLevel
1213 				1u,										// uint32_t				mipLevels,
1214 				0u,										// uint32_t				baseArray
1215 				1u,										// uint32_t				arraySize
1216 			}
1217 		};
1218 
1219 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1220 							 (VkDependencyFlags)0,
1221 							  0, (const VkMemoryBarrier*)DE_NULL,
1222 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1223 							  1, &imageBarrier);
1224 	}
1225 
1226 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1227 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1228 
1229 	VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1230 	VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1231 
1232 	VkMemoryBarrier					memBarrier =
1233 	{
1234 		VK_STRUCTURE_TYPE_MEMORY_BARRIER,	// sType
1235 		DE_NULL,							// pNext
1236 		0u,									// srcAccessMask
1237 		0u,									// dstAccessMask
1238 	};
1239 
1240 	for (deUint32 iters = 0; iters < 200; ++iters)
1241 	{
1242 		for (deUint32 i = 0; i < 2; ++i)
1243 		{
1244 			if (buffers[i])
1245 				vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1246 			if (images[i])
1247 				vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1248 		}
1249 
1250 		memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1251 		memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1252 		vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1253 			0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1254 
1255 		if (m_data.stage == STAGE_COMPUTE)
1256 		{
1257 			vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1258 		}
1259 		else
1260 		{
1261 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1262 							makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1263 							0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1264 			// Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1265 			if (m_data.stage == STAGE_VERTEX)
1266 			{
1267 				vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1268 			}
1269 			else
1270 			{
1271 				vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1272 			}
1273 			endRenderPass(vk, *cmdBuffer);
1274 		}
1275 
1276 		memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1277 		memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1278 		vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1279 			0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1280 	}
1281 
1282 	const VkBufferCopy	copyParams =
1283 	{
1284 		(VkDeviceSize)0u,						// srcOffset
1285 		(VkDeviceSize)0u,						// dstOffset
1286 		bufferSizes[2]							// size
1287 	};
1288 
1289 	vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, &copyParams);
1290 
1291 	endCommandBuffer(vk, *cmdBuffer);
1292 
1293 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1294 
1295 	tcu::TestLog& log = m_context.getTestContext().getLog();
1296 
1297 	deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1298 	invalidateMappedMemoryRange(vk, device, copyBuffer->getAllocation().getMemory(), copyBuffer->getAllocation().getOffset(), bufferSizes[2]);
1299 	qpTestResult res = QP_TEST_RESULT_PASS;
1300 
1301 	deUint32 numErrors = 0;
1302 	for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1303 	{
1304 		if (ptr[i] != 0)
1305 		{
1306 			if (numErrors < 256)
1307 			{
1308 				log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1309 			}
1310 			numErrors++;
1311 			res = QP_TEST_RESULT_FAIL;
1312 		}
1313 	}
1314 
1315 	if (numErrors)
1316 	{
1317 		log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1318 	}
1319 
1320 	return tcu::TestStatus(res, qpGetTestResultName(res));
1321 }
1322 
1323 }	// anonymous
1324 
createTests(tcu::TestContext & testCtx)1325 tcu::TestCaseGroup*	createTests (tcu::TestContext& testCtx)
1326 {
1327 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1328 			testCtx, "memory_model", "Memory model tests"));
1329 
1330 	typedef struct
1331 	{
1332 		deUint32				value;
1333 		const char*				name;
1334 		const char*				description;
1335 	} TestGroupCase;
1336 
1337 	TestGroupCase ttCases[] =
1338 	{
1339 		{ TT_MP,	"message_passing",	"message passing"		},
1340 		{ TT_WAR,	"write_after_read",	"write after read"		},
1341 	};
1342 
1343 	TestGroupCase core11Cases[] =
1344 	{
1345 		{ 1,	"core11",	"Supported by Vulkan1.1"							},
1346 		{ 0,	"ext",		"Requires VK_KHR_vulkan_memory_model extension"		},
1347 	};
1348 
1349 	TestGroupCase dtCases[] =
1350 	{
1351 		{ DATA_TYPE_UINT,	"u32",	"uint32_t atomics"		},
1352 		{ DATA_TYPE_UINT64,	"u64",	"uint64_t atomics"		},
1353 	};
1354 
1355 	TestGroupCase cohCases[] =
1356 	{
1357 		{ 1,	"coherent",		"coherent payload variable"			},
1358 		{ 0,	"noncoherent",	"noncoherent payload variable"		},
1359 	};
1360 
1361 	TestGroupCase stCases[] =
1362 	{
1363 		{ ST_FENCE_FENCE,					"fence_fence",					"release fence, acquire fence"			},
1364 		{ ST_FENCE_ATOMIC,					"fence_atomic",					"release fence, atomic acquire"			},
1365 		{ ST_ATOMIC_FENCE,					"atomic_fence",					"atomic release, acquire fence"			},
1366 		{ ST_ATOMIC_ATOMIC,					"atomic_atomic",				"atomic release, atomic acquire"		},
1367 		{ ST_CONTROL_BARRIER,				"control_barrier",				"control barrier"						},
1368 		{ ST_CONTROL_AND_MEMORY_BARRIER,	"control_and_memory_barrier",	"control barrier with release/acquire"	},
1369 	};
1370 
1371 	TestGroupCase rmwCases[] =
1372 	{
1373 		{ 0,	"atomicwrite",		"atomic write"		},
1374 		{ 1,	"atomicrmw",		"atomic rmw"		},
1375 	};
1376 
1377 	TestGroupCase scopeCases[] =
1378 	{
1379 		{ SCOPE_DEVICE,			"device",		"device scope"			},
1380 		{ SCOPE_QUEUEFAMILY,	"queuefamily",	"queuefamily scope"		},
1381 		{ SCOPE_WORKGROUP,		"workgroup",	"workgroup scope"		},
1382 		{ SCOPE_SUBGROUP,		"subgroup",		"subgroup scope"		},
1383 	};
1384 
1385 	TestGroupCase plCases[] =
1386 	{
1387 		{ 0,	"payload_nonlocal",		"payload variable in non-local memory"		},
1388 		{ 1,	"payload_local",		"payload variable in local memory"			},
1389 	};
1390 
1391 	TestGroupCase pscCases[] =
1392 	{
1393 		{ SC_BUFFER,	"buffer",		"payload variable in buffer memory"			},
1394 		{ SC_IMAGE,		"image",		"payload variable in image memory"			},
1395 		{ SC_WORKGROUP,	"workgroup",	"payload variable in workgroup memory"		},
1396 	};
1397 
1398 	TestGroupCase glCases[] =
1399 	{
1400 		{ 0,	"guard_nonlocal",		"guard variable in non-local memory"		},
1401 		{ 1,	"guard_local",			"guard variable in local memory"			},
1402 	};
1403 
1404 	TestGroupCase gscCases[] =
1405 	{
1406 		{ SC_BUFFER,	"buffer",		"guard variable in buffer memory"			},
1407 		{ SC_IMAGE,		"image",		"guard variable in image memory"			},
1408 		{ SC_WORKGROUP,	"workgroup",	"guard variable in workgroup memory"		},
1409 	};
1410 
1411 	TestGroupCase stageCases[] =
1412 	{
1413 		{ STAGE_COMPUTE,	"comp",		"compute shader"			},
1414 		{ STAGE_VERTEX,		"vert",		"vertex shader"				},
1415 		{ STAGE_FRAGMENT,	"frag",		"fragment shader"			},
1416 	};
1417 
1418 
1419 	for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1420 	{
1421 		de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description));
1422 		for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1423 		{
1424 			de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description));
1425 			for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1426 			{
1427 				de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description));
1428 				for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1429 				{
1430 					de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
1431 					for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1432 					{
1433 						de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
1434 						for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1435 						{
1436 							de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description));
1437 							for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1438 							{
1439 								de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description));
1440 								for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1441 								{
1442 									de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
1443 									for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1444 									{
1445 										de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
1446 										for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1447 										{
1448 											de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
1449 											for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1450 											{
1451 												de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
1452 												for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1453 												{
1454 													CaseDef c =
1455 													{
1456 														!!plCases[plNdx].value,					// bool payloadMemLocal;
1457 														!!glCases[glNdx].value,					// bool guardMemLocal;
1458 														!!cohCases[cohNdx].value,				// bool coherent;
1459 														!!core11Cases[core11Ndx].value,			// bool core11;
1460 														!!rmwCases[rmwNdx].value,				// bool atomicRMW;
1461 														(TestType)ttCases[ttNdx].value,			// TestType testType;
1462 														(StorageClass)pscCases[pscNdx].value,	// StorageClass payloadSC;
1463 														(StorageClass)gscCases[gscNdx].value,	// StorageClass guardSC;
1464 														(Scope)scopeCases[scopeNdx].value,		// Scope scope;
1465 														(SyncType)stCases[stNdx].value,			// SyncType syncType;
1466 														(Stage)stageCases[stageNdx].value,		// Stage stage;
1467 														(DataType)dtCases[dtNdx].value,			// DataType dataType;
1468 													};
1469 
1470 													// Mustpass11 tests should only exercise things we expect to work on
1471 													// existing implementations. Exclude noncoherent tests which require
1472 													// new extensions, and assume atomic synchronization wouldn't work
1473 													// (i.e. atomics may be implemented as relaxed atomics). Exclude
1474 													// queuefamily scope which doesn't exist in Vulkan 1.1.
1475 													if (c.core11 &&
1476 														(c.coherent == 0 ||
1477 														c.syncType == ST_FENCE_ATOMIC ||
1478 														c.syncType == ST_ATOMIC_FENCE ||
1479 														c.syncType == ST_ATOMIC_ATOMIC ||
1480 														c.dataType == DATA_TYPE_UINT64 ||
1481 														c.scope == SCOPE_QUEUEFAMILY))
1482 													{
1483 														continue;
1484 													}
1485 
1486 													if (c.stage != STAGE_COMPUTE &&
1487 														c.scope == SCOPE_WORKGROUP)
1488 													{
1489 														continue;
1490 													}
1491 
1492 													// Don't exercise local and non-local for workgroup memory
1493 													// Also don't exercise workgroup memory for non-compute stages
1494 													if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
1495 													{
1496 														continue;
1497 													}
1498 													if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
1499 													{
1500 														continue;
1501 													}
1502 													// Can't do control barrier with larger than workgroup scope, or non-compute stages
1503 													if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1504 														(c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
1505 													{
1506 														continue;
1507 													}
1508 
1509 													// Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
1510 													if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
1511 													{
1512 														continue;
1513 													}
1514 
1515 													// uint64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
1516 													if (c.dataType == DATA_TYPE_UINT64 && c.syncType != ST_ATOMIC_ATOMIC)
1517 													{
1518 														continue;
1519 													}
1520 
1521 													// No 64-bit image types, so skip tests with both payload and guard in image memory
1522 													if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
1523 													{
1524 														continue;
1525 													}
1526 
1527 													// Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
1528 													if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1529 														(c.guardSC != 0 || c.guardMemLocal != 0))
1530 													{
1531 														continue;
1532 													}
1533 
1534 													gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1535 												}
1536 												glGroup->addChild(gscGroup.release());
1537 											}
1538 											pscGroup->addChild(glGroup.release());
1539 										}
1540 										plGroup->addChild(pscGroup.release());
1541 									}
1542 									scopeGroup->addChild(plGroup.release());
1543 								}
1544 								rmwGroup->addChild(scopeGroup.release());
1545 							}
1546 							stGroup->addChild(rmwGroup.release());
1547 						}
1548 						cohGroup->addChild(stGroup.release());
1549 					}
1550 					dtGroup->addChild(cohGroup.release());
1551 				}
1552 				core11Group->addChild(dtGroup.release());
1553 			}
1554 			ttGroup->addChild(core11Group.release());
1555 		}
1556 		group->addChild(ttGroup.release());
1557 	}
1558 	return group.release();
1559 }
1560 
1561 }	// MemoryModel
1562 }	// vkt
1563