1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief SSBO layout case.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktSSBOLayoutCase.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "gluContextInfo.hpp"
29 #include "gluShaderUtil.hpp"
30 #include "gluVarType.hpp"
31 #include "gluVarTypeUtil.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deRandom.hpp"
34 #include "deStringUtil.hpp"
35 #include "deMemory.h"
36 #include "deString.h"
37 #include "deMath.h"
38 #include "deSharedPtr.hpp"
39 #include "deFloat16.h"
40 
41 #include "vkBuilderUtil.hpp"
42 #include "vkMemUtil.hpp"
43 #include "vkPrograms.hpp"
44 #include "vkQueryUtil.hpp"
45 #include "vkRef.hpp"
46 #include "vkRefUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 #include "vkCmdUtil.hpp"
49 
50 namespace vkt
51 {
52 namespace ssbo
53 {
54 
55 using tcu::TestLog;
56 using std::string;
57 using std::vector;
58 using glu::VarType;
59 using glu::StructType;
60 using glu::StructMember;
61 
62 struct LayoutFlagsFmt
63 {
64 	deUint32 flags;
LayoutFlagsFmtvkt::ssbo::LayoutFlagsFmt65 	LayoutFlagsFmt (deUint32 flags_) : flags(flags_) {}
66 };
67 
operator <<(std::ostream & str,const LayoutFlagsFmt & fmt)68 std::ostream& operator<< (std::ostream& str, const LayoutFlagsFmt& fmt)
69 {
70 	static const struct
71 	{
72 		deUint32	bit;
73 		const char*	token;
74 	} bitDesc[] =
75 	{
76 		{ LAYOUT_STD140,		"std140"		},
77 		{ LAYOUT_STD430,		"std430"		},
78 		{ LAYOUT_SCALAR,		"scalar"		},
79 		{ LAYOUT_ROW_MAJOR,		"row_major"		},
80 		{ LAYOUT_COLUMN_MAJOR,	"column_major"	}
81 	};
82 
83 	deUint32 remBits = fmt.flags;
84 	for (int descNdx = 0; descNdx < DE_LENGTH_OF_ARRAY(bitDesc); descNdx++)
85 	{
86 		if (remBits & bitDesc[descNdx].bit)
87 		{
88 			if (remBits != fmt.flags)
89 				str << ", ";
90 			str << bitDesc[descNdx].token;
91 			remBits &= ~bitDesc[descNdx].bit;
92 		}
93 	}
94 	DE_ASSERT(remBits == 0);
95 	return str;
96 }
97 
98 // BufferVar implementation.
99 
BufferVar(const char * name,const VarType & type,deUint32 flags)100 BufferVar::BufferVar (const char* name, const VarType& type, deUint32 flags)
101 	: m_name	(name)
102 	, m_type	(type)
103 	, m_flags	(flags)
104 	, m_offset	(~0u)
105 {
106 }
107 
108 // BufferBlock implementation.
109 
BufferBlock(const char * blockName)110 BufferBlock::BufferBlock (const char* blockName)
111 	: m_blockName		(blockName)
112 	, m_arraySize		(-1)
113 	, m_flags			(0)
114 {
115 	setArraySize(0);
116 }
117 
setArraySize(int arraySize)118 void BufferBlock::setArraySize (int arraySize)
119 {
120 	DE_ASSERT(arraySize >= 0);
121 	m_lastUnsizedArraySizes.resize(arraySize == 0 ? 1 : arraySize, 0);
122 	m_arraySize = arraySize;
123 }
124 
operator <<(std::ostream & stream,const BlockLayoutEntry & entry)125 std::ostream& operator<< (std::ostream& stream, const BlockLayoutEntry& entry)
126 {
127 	stream << entry.name << " { name = " << entry.name
128 		   << ", size = " << entry.size
129 		   << ", activeVarIndices = [";
130 
131 	for (vector<int>::const_iterator i = entry.activeVarIndices.begin(); i != entry.activeVarIndices.end(); i++)
132 	{
133 		if (i != entry.activeVarIndices.begin())
134 			stream << ", ";
135 		stream << *i;
136 	}
137 
138 	stream << "] }";
139 	return stream;
140 }
141 
isUnsizedArray(const BufferVarLayoutEntry & entry)142 static bool isUnsizedArray (const BufferVarLayoutEntry& entry)
143 {
144 	DE_ASSERT(entry.arraySize != 0 || entry.topLevelArraySize != 0);
145 	return entry.arraySize == 0 || entry.topLevelArraySize == 0;
146 }
147 
operator <<(std::ostream & stream,const BufferVarLayoutEntry & entry)148 std::ostream& operator<< (std::ostream& stream, const BufferVarLayoutEntry& entry)
149 {
150 	stream << entry.name << " { type = " << glu::getDataTypeName(entry.type)
151 		   << ", blockNdx = " << entry.blockNdx
152 		   << ", offset = " << entry.offset
153 		   << ", arraySize = " << entry.arraySize
154 		   << ", arrayStride = " << entry.arrayStride
155 		   << ", matrixStride = " << entry.matrixStride
156 		   << ", topLevelArraySize = " << entry.topLevelArraySize
157 		   << ", topLevelArrayStride = " << entry.topLevelArrayStride
158 		   << ", isRowMajor = " << (entry.isRowMajor ? "true" : "false")
159 		   << " }";
160 	return stream;
161 }
162 
163 // \todo [2012-01-24 pyry] Speed up lookups using hash.
164 
getVariableIndex(const string & name) const165 int BufferLayout::getVariableIndex (const string& name) const
166 {
167 	for (int ndx = 0; ndx < (int)bufferVars.size(); ndx++)
168 	{
169 		if (bufferVars[ndx].name == name)
170 			return ndx;
171 	}
172 	return -1;
173 }
174 
getBlockIndex(const string & name) const175 int BufferLayout::getBlockIndex (const string& name) const
176 {
177 	for (int ndx = 0; ndx < (int)blocks.size(); ndx++)
178 	{
179 		if (blocks[ndx].name == name)
180 			return ndx;
181 	}
182 	return -1;
183 }
184 
185 // ShaderInterface implementation.
186 
ShaderInterface(void)187 ShaderInterface::ShaderInterface (void)
188 {
189 }
190 
~ShaderInterface(void)191 ShaderInterface::~ShaderInterface (void)
192 {
193 	for (std::vector<StructType*>::iterator i = m_structs.begin(); i != m_structs.end(); i++)
194 		delete *i;
195 
196 	for (std::vector<BufferBlock*>::iterator i = m_bufferBlocks.begin(); i != m_bufferBlocks.end(); i++)
197 		delete *i;
198 }
199 
allocStruct(const char * name)200 StructType& ShaderInterface::allocStruct (const char* name)
201 {
202 	m_structs.reserve(m_structs.size()+1);
203 	m_structs.push_back(new StructType(name));
204 	return *m_structs.back();
205 }
206 
207 struct StructNameEquals
208 {
209 	std::string name;
210 
StructNameEqualsvkt::ssbo::StructNameEquals211 	StructNameEquals (const char* name_) : name(name_) {}
212 
operator ()vkt::ssbo::StructNameEquals213 	bool operator() (const StructType* type) const
214 	{
215 		return type->getTypeName() && name == type->getTypeName();
216 	}
217 };
218 
findStruct(const char * name) const219 const StructType* ShaderInterface::findStruct (const char* name) const
220 {
221 	std::vector<StructType*>::const_iterator pos = std::find_if(m_structs.begin(), m_structs.end(), StructNameEquals(name));
222 	return pos != m_structs.end() ? *pos : DE_NULL;
223 }
224 
getNamedStructs(std::vector<const StructType * > & structs) const225 void ShaderInterface::getNamedStructs (std::vector<const StructType*>& structs) const
226 {
227 	for (std::vector<StructType*>::const_iterator i = m_structs.begin(); i != m_structs.end(); i++)
228 	{
229 		if ((*i)->getTypeName() != DE_NULL)
230 			structs.push_back(*i);
231 	}
232 }
233 
allocBlock(const char * name)234 BufferBlock& ShaderInterface::allocBlock (const char* name)
235 {
236 	m_bufferBlocks.reserve(m_bufferBlocks.size()+1);
237 	m_bufferBlocks.push_back(new BufferBlock(name));
238 	return *m_bufferBlocks.back();
239 }
240 
241 namespace // Utilities
242 {
243 // Layout computation.
244 
getDataTypeByteSize(glu::DataType type)245 int getDataTypeByteSize (glu::DataType type)
246 {
247 	if (deInRange32(type, glu::TYPE_UINT8, glu::TYPE_UINT8_VEC4) || deInRange32(type, glu::TYPE_INT8, glu::TYPE_INT8_VEC4))
248 	{
249 		return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint8);
250 	}
251 	else if (deInRange32(type, glu::TYPE_UINT16, glu::TYPE_UINT16_VEC4) || deInRange32(type, glu::TYPE_INT16, glu::TYPE_INT16_VEC4) || deInRange32(type, glu::TYPE_FLOAT16, glu::TYPE_FLOAT16_VEC4))
252 	{
253 		return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint16);
254 	}
255 	else
256 	{
257 		return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint32);
258 	}
259 }
260 
getDataTypeByteAlignment(glu::DataType type)261 int getDataTypeByteAlignment (glu::DataType type)
262 {
263 	switch (type)
264 	{
265 		case glu::TYPE_FLOAT:
266 		case glu::TYPE_INT:
267 		case glu::TYPE_UINT:
268 		case glu::TYPE_BOOL:		return 1*(int)sizeof(deUint32);
269 
270 		case glu::TYPE_FLOAT_VEC2:
271 		case glu::TYPE_INT_VEC2:
272 		case glu::TYPE_UINT_VEC2:
273 		case glu::TYPE_BOOL_VEC2:	return 2*(int)sizeof(deUint32);
274 
275 		case glu::TYPE_FLOAT_VEC3:
276 		case glu::TYPE_INT_VEC3:
277 		case glu::TYPE_UINT_VEC3:
278 		case glu::TYPE_BOOL_VEC3:	// Fall-through to vec4
279 
280 		case glu::TYPE_FLOAT_VEC4:
281 		case glu::TYPE_INT_VEC4:
282 		case glu::TYPE_UINT_VEC4:
283 		case glu::TYPE_BOOL_VEC4:	return 4*(int)sizeof(deUint32);
284 
285 		case glu::TYPE_UINT8:
286 		case glu::TYPE_INT8	:			return 1*(int)sizeof(deUint8);
287 
288 		case glu::TYPE_UINT8_VEC2:
289 		case glu::TYPE_INT8_VEC2:		return 2*(int)sizeof(deUint8);
290 
291 		case glu::TYPE_UINT8_VEC3:
292 		case glu::TYPE_INT8_VEC3:		// Fall-through to vec4
293 
294 		case glu::TYPE_UINT8_VEC4:
295 		case glu::TYPE_INT8_VEC4:		return 4*(int)sizeof(deUint8);
296 
297 		case glu::TYPE_UINT16:
298 		case glu::TYPE_INT16:
299 		case glu::TYPE_FLOAT16:			return 1*(int)sizeof(deUint16);
300 
301 		case glu::TYPE_UINT16_VEC2:
302 		case glu::TYPE_INT16_VEC2:
303 		case glu::TYPE_FLOAT16_VEC2:	return 2*(int)sizeof(deUint16);
304 
305 		case glu::TYPE_UINT16_VEC3:
306 		case glu::TYPE_INT16_VEC3:
307 		case glu::TYPE_FLOAT16_VEC3:	// Fall-through to vec4
308 
309 		case glu::TYPE_UINT16_VEC4:
310 		case glu::TYPE_INT16_VEC4:
311 		case glu::TYPE_FLOAT16_VEC4:	return 4*(int)sizeof(deUint16);
312 
313 		default:
314 			DE_ASSERT(false);
315 			return 0;
316 	}
317 }
318 
deRoundUp32(int a,int b)319 static inline int deRoundUp32 (int a, int b)
320 {
321 	int d = a/b;
322 	return d*b == a ? a : (d+1)*b;
323 }
324 
computeStd140BaseAlignment(const VarType & type,deUint32 layoutFlags)325 int computeStd140BaseAlignment (const VarType& type, deUint32 layoutFlags)
326 {
327 	const int vec4Alignment = (int)sizeof(deUint32)*4;
328 
329 	if (type.isBasicType())
330 	{
331 		glu::DataType basicType = type.getBasicType();
332 
333 		if (glu::isDataTypeMatrix(basicType))
334 		{
335 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
336 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
337 												 : glu::getDataTypeMatrixNumRows(basicType);
338 			const int	vecAlign	= deAlign32(getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize)), vec4Alignment);
339 
340 			return vecAlign;
341 		}
342 		else
343 			return getDataTypeByteAlignment(basicType);
344 	}
345 	else if (type.isArrayType())
346 	{
347 		int elemAlignment = computeStd140BaseAlignment(type.getElementType(), layoutFlags);
348 
349 		// Round up to alignment of vec4
350 		return deAlign32(elemAlignment, vec4Alignment);
351 	}
352 	else
353 	{
354 		DE_ASSERT(type.isStructType());
355 
356 		int maxBaseAlignment = 0;
357 
358 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
359 			maxBaseAlignment = de::max(maxBaseAlignment, computeStd140BaseAlignment(memberIter->getType(), layoutFlags));
360 
361 		return deAlign32(maxBaseAlignment, vec4Alignment);
362 	}
363 }
364 
computeStd430BaseAlignment(const VarType & type,deUint32 layoutFlags)365 int computeStd430BaseAlignment (const VarType& type, deUint32 layoutFlags)
366 {
367 	// Otherwise identical to std140 except that alignment of structures and arrays
368 	// are not rounded up to alignment of vec4.
369 
370 	if (type.isBasicType())
371 	{
372 		glu::DataType basicType = type.getBasicType();
373 
374 		if (glu::isDataTypeMatrix(basicType))
375 		{
376 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
377 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
378 												 : glu::getDataTypeMatrixNumRows(basicType);
379 			const int	vecAlign	= getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
380 			return vecAlign;
381 		}
382 		else
383 			return getDataTypeByteAlignment(basicType);
384 	}
385 	else if (type.isArrayType())
386 	{
387 		return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
388 	}
389 	else
390 	{
391 		DE_ASSERT(type.isStructType());
392 
393 		int maxBaseAlignment = 0;
394 
395 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
396 			maxBaseAlignment = de::max(maxBaseAlignment, computeStd430BaseAlignment(memberIter->getType(), layoutFlags));
397 
398 		return maxBaseAlignment;
399 	}
400 }
401 
computeRelaxedBlockBaseAlignment(const VarType & type,deUint32 layoutFlags)402 int computeRelaxedBlockBaseAlignment (const VarType& type, deUint32 layoutFlags)
403 {
404 	if (type.isBasicType())
405 	{
406 		glu::DataType basicType = type.getBasicType();
407 
408 		if (glu::isDataTypeVector(basicType))
409 			return getDataTypeByteAlignment(glu::getDataTypeScalarType(basicType));
410 
411 		if (glu::isDataTypeMatrix(basicType))
412 		{
413 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
414 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
415 												 : glu::getDataTypeMatrixNumRows(basicType);
416 			const int	vecAlign	= getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
417 			return vecAlign;
418 		}
419 		else
420 			return getDataTypeByteAlignment(basicType);
421 	}
422 	else if (type.isArrayType())
423 		return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
424 	else
425 	{
426 		DE_ASSERT(type.isStructType());
427 
428 		int maxBaseAlignment = 0;
429 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
430 			maxBaseAlignment = de::max(maxBaseAlignment, computeRelaxedBlockBaseAlignment(memberIter->getType(), layoutFlags));
431 
432 		return maxBaseAlignment;
433 	}
434 }
435 
computeScalarBlockAlignment(const VarType & type,deUint32 layoutFlags)436 int computeScalarBlockAlignment (const VarType& type, deUint32 layoutFlags)
437 {
438 	if (type.isBasicType())
439 	{
440 		return getDataTypeByteAlignment(glu::getDataTypeScalarType(type.getBasicType()));
441 	}
442 	else if (type.isArrayType())
443 		return computeScalarBlockAlignment(type.getElementType(), layoutFlags);
444 	else
445 	{
446 		DE_ASSERT(type.isStructType());
447 
448 		int maxBaseAlignment = 0;
449 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
450 			maxBaseAlignment = de::max(maxBaseAlignment, computeScalarBlockAlignment(memberIter->getType(), layoutFlags));
451 
452 		return maxBaseAlignment;
453 	}
454 }
455 
mergeLayoutFlags(deUint32 prevFlags,deUint32 newFlags)456 inline deUint32 mergeLayoutFlags (deUint32 prevFlags, deUint32 newFlags)
457 {
458 	const deUint32	packingMask		= LAYOUT_STD430|LAYOUT_STD140|LAYOUT_RELAXED|LAYOUT_SCALAR;
459 	const deUint32	matrixMask		= LAYOUT_ROW_MAJOR|LAYOUT_COLUMN_MAJOR;
460 
461 	deUint32 mergedFlags = 0;
462 
463 	mergedFlags |= ((newFlags & packingMask)	? newFlags : prevFlags) & packingMask;
464 	mergedFlags |= ((newFlags & matrixMask)		? newFlags : prevFlags) & matrixMask;
465 
466 	return mergedFlags;
467 }
468 
469 //! Appends all child elements to layout, returns value that should be appended to offset.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,int baseOffset,const std::string & curPrefix,const VarType & type,deUint32 layoutFlags)470 int computeReferenceLayout (
471 	BufferLayout&		layout,
472 	int					curBlockNdx,
473 	int					baseOffset,
474 	const std::string&	curPrefix,
475 	const VarType&		type,
476 	deUint32			layoutFlags)
477 {
478 	// Reference layout uses std430 rules by default. std140 rules are
479 	// choosen only for blocks that have std140 layout.
480 	const int	baseAlignment		= (layoutFlags & LAYOUT_SCALAR)  != 0 ? computeScalarBlockAlignment(type, layoutFlags)			:
481 									  (layoutFlags & LAYOUT_STD140)  != 0 ? computeStd140BaseAlignment(type, layoutFlags)		:
482 									  (layoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(type, layoutFlags)	:
483 									  computeStd430BaseAlignment(type, layoutFlags);
484 	int			curOffset			= deAlign32(baseOffset, baseAlignment);
485 	const int	topLevelArraySize	= 1; // Default values
486 	const int	topLevelArrayStride	= 0;
487 
488 	if (type.isBasicType())
489 	{
490 		const glu::DataType		basicType	= type.getBasicType();
491 		BufferVarLayoutEntry	entry;
492 
493 		entry.name					= curPrefix;
494 		entry.type					= basicType;
495 		entry.arraySize				= 1;
496 		entry.arrayStride			= 0;
497 		entry.matrixStride			= 0;
498 		entry.topLevelArraySize		= topLevelArraySize;
499 		entry.topLevelArrayStride	= topLevelArrayStride;
500 		entry.blockNdx				= curBlockNdx;
501 
502 		if (glu::isDataTypeMatrix(basicType))
503 		{
504 			// Array of vectors as specified in rules 5 & 7.
505 			const bool	isRowMajor			= !!(layoutFlags & LAYOUT_ROW_MAJOR);
506 			const int	vecSize				= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
507 														 : glu::getDataTypeMatrixNumRows(basicType);
508 			const glu::DataType	vecType		= glu::getDataTypeFloatVec(vecSize);
509 			const int	numVecs				= isRowMajor ? glu::getDataTypeMatrixNumRows(basicType)
510 														 : glu::getDataTypeMatrixNumColumns(basicType);
511 			const int	vecStride			= (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
512 
513 			entry.offset		= curOffset;
514 			entry.matrixStride	= vecStride;
515 			entry.isRowMajor	= isRowMajor;
516 
517 			curOffset += numVecs*entry.matrixStride;
518 		}
519 		else
520 		{
521 			if (!(layoutFlags & LAYOUT_SCALAR) && (layoutFlags & LAYOUT_RELAXED) &&
522 				glu::isDataTypeVector(basicType) && (getDataTypeByteSize(basicType) <= 16 ? curOffset / 16 != (curOffset +  getDataTypeByteSize(basicType) - 1) / 16 : curOffset % 16 != 0))
523 				curOffset = deIntRoundToPow2(curOffset, 16);
524 
525 			// Scalar or vector.
526 			entry.offset = curOffset;
527 
528 			curOffset += getDataTypeByteSize(basicType);
529 		}
530 
531 		layout.bufferVars.push_back(entry);
532 	}
533 	else if (type.isArrayType())
534 	{
535 		const VarType&	elemType	= type.getElementType();
536 
537 		if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
538 		{
539 			// Array of scalars or vectors.
540 			const glu::DataType		elemBasicType	= elemType.getBasicType();
541 			const int				stride			= (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) : baseAlignment;
542 			BufferVarLayoutEntry	entry;
543 
544 			entry.name					= curPrefix + "[0]"; // Array variables are always postfixed with [0]
545 			entry.type					= elemBasicType;
546 			entry.blockNdx				= curBlockNdx;
547 			entry.offset				= curOffset;
548 			entry.arraySize				= type.getArraySize();
549 			entry.arrayStride			= stride;
550 			entry.matrixStride			= 0;
551 			entry.topLevelArraySize		= topLevelArraySize;
552 			entry.topLevelArrayStride	= topLevelArrayStride;
553 
554 			curOffset += stride*type.getArraySize();
555 
556 			layout.bufferVars.push_back(entry);
557 		}
558 		else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
559 		{
560 			// Array of matrices.
561 			const glu::DataType			elemBasicType	= elemType.getBasicType();
562 			const bool					isRowMajor		= !!(layoutFlags & LAYOUT_ROW_MAJOR);
563 			const int					vecSize			= isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType)
564 																	 : glu::getDataTypeMatrixNumRows(elemBasicType);
565 			const glu::DataType			vecType			= glu::getDataTypeFloatVec(vecSize);
566 			const int					numVecs			= isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
567 																	 : glu::getDataTypeMatrixNumColumns(elemBasicType);
568 			const int					vecStride		= (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
569 			BufferVarLayoutEntry		entry;
570 
571 			entry.name					= curPrefix + "[0]"; // Array variables are always postfixed with [0]
572 			entry.type					= elemBasicType;
573 			entry.blockNdx				= curBlockNdx;
574 			entry.offset				= curOffset;
575 			entry.arraySize				= type.getArraySize();
576 			entry.arrayStride			= vecStride*numVecs;
577 			entry.matrixStride			= vecStride;
578 			entry.isRowMajor			= isRowMajor;
579 			entry.topLevelArraySize		= topLevelArraySize;
580 			entry.topLevelArrayStride	= topLevelArrayStride;
581 
582 			curOffset += entry.arrayStride*type.getArraySize();
583 
584 			layout.bufferVars.push_back(entry);
585 		}
586 		else
587 		{
588 			DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
589 
590 			for (int elemNdx = 0; elemNdx < type.getArraySize(); elemNdx++)
591 				curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "[" + de::toString(elemNdx) + "]", type.getElementType(), layoutFlags);
592 		}
593 	}
594 	else
595 	{
596 		DE_ASSERT(type.isStructType());
597 
598 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
599 			curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "." + memberIter->getName(), memberIter->getType(), layoutFlags);
600 
601 		if (!(layoutFlags & LAYOUT_SCALAR))
602 			curOffset = deAlign32(curOffset, baseAlignment);
603 	}
604 
605 	return curOffset-baseOffset;
606 }
607 
608 //! Appends all child elements to layout, returns offset increment.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,const std::string & blockPrefix,int baseOffset,const BufferVar & bufVar,deUint32 blockLayoutFlags)609 int computeReferenceLayout (BufferLayout& layout, int curBlockNdx, const std::string& blockPrefix, int baseOffset, const BufferVar& bufVar, deUint32 blockLayoutFlags)
610 {
611 	const VarType&	varType			= bufVar.getType();
612 	const deUint32	combinedFlags	= mergeLayoutFlags(blockLayoutFlags, bufVar.getFlags());
613 
614 	if (varType.isArrayType())
615 	{
616 		// Top-level arrays need special care.
617 		const int		topLevelArraySize	= varType.getArraySize() == VarType::UNSIZED_ARRAY ? 0 : varType.getArraySize();
618 		const string	prefix				= blockPrefix + bufVar.getName() + "[0]";
619 		const bool		isStd140			= (blockLayoutFlags & LAYOUT_STD140) != 0;
620 		const int		vec4Align			= (int)sizeof(deUint32)*4;
621 		const int		baseAlignment		= (blockLayoutFlags & LAYOUT_SCALAR)  != 0 ? computeScalarBlockAlignment(varType, combinedFlags)			:
622 											isStd140									? computeStd140BaseAlignment(varType, combinedFlags)		:
623 											(blockLayoutFlags & LAYOUT_RELAXED) != 0	? computeRelaxedBlockBaseAlignment(varType, combinedFlags)	:
624 											computeStd430BaseAlignment(varType, combinedFlags);
625 		int				curOffset			= deAlign32(baseOffset, baseAlignment);
626 		const VarType&	elemType			= varType.getElementType();
627 
628 		if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
629 		{
630 			// Array of scalars or vectors.
631 			const glu::DataType		elemBasicType	= elemType.getBasicType();
632 			const int				elemBaseAlign	= getDataTypeByteAlignment(elemBasicType);
633 			const int				stride			= (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) :
634 													  isStd140 ? deAlign32(elemBaseAlign, vec4Align) :
635 													  elemBaseAlign;
636 
637 			BufferVarLayoutEntry	entry;
638 
639 			entry.name					= prefix;
640 			entry.topLevelArraySize		= 1;
641 			entry.topLevelArrayStride	= 0;
642 			entry.type					= elemBasicType;
643 			entry.blockNdx				= curBlockNdx;
644 			entry.offset				= curOffset;
645 			entry.arraySize				= topLevelArraySize;
646 			entry.arrayStride			= stride;
647 			entry.matrixStride			= 0;
648 
649 			layout.bufferVars.push_back(entry);
650 
651 			curOffset += stride*topLevelArraySize;
652 		}
653 		else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
654 		{
655 			// Array of matrices.
656 			const glu::DataType		elemBasicType	= elemType.getBasicType();
657 			const bool				isRowMajor		= !!(combinedFlags & LAYOUT_ROW_MAJOR);
658 			const int				vecSize			= isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType)
659 																 : glu::getDataTypeMatrixNumRows(elemBasicType);
660 			const int				numVecs			= isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
661 																 : glu::getDataTypeMatrixNumColumns(elemBasicType);
662 			const glu::DataType		vecType			= glu::getDataTypeFloatVec(vecSize);
663 			const int				vecBaseAlign	= getDataTypeByteAlignment(vecType);
664 			const int				stride			= (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) :
665 													  isStd140 ? deAlign32(vecBaseAlign, vec4Align) :
666 													  vecBaseAlign;
667 
668 			BufferVarLayoutEntry	entry;
669 
670 			entry.name					= prefix;
671 			entry.topLevelArraySize		= 1;
672 			entry.topLevelArrayStride	= 0;
673 			entry.type					= elemBasicType;
674 			entry.blockNdx				= curBlockNdx;
675 			entry.offset				= curOffset;
676 			entry.arraySize				= topLevelArraySize;
677 			entry.arrayStride			= stride*numVecs;
678 			entry.matrixStride			= stride;
679 			entry.isRowMajor			= isRowMajor;
680 
681 			layout.bufferVars.push_back(entry);
682 
683 			curOffset += entry.arrayStride*topLevelArraySize;
684 		}
685 		else
686 		{
687 			DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
688 
689 			// Struct base alignment is not added multiple times as curOffset supplied to computeReferenceLayout
690 			// was already aligned correctly. Thus computeReferenceLayout should not add any extra padding
691 			// before struct. Padding after struct will be added as it should.
692 			//
693 			// Stride could be computed prior to creating child elements, but it would essentially require running
694 			// the layout computation twice. Instead we fix stride to child elements afterwards.
695 
696 			const int	firstChildNdx	= (int)layout.bufferVars.size();
697 
698 			const int size = computeReferenceLayout(layout, curBlockNdx, deAlign32(curOffset, baseAlignment), prefix, varType.getElementType(), combinedFlags);
699 			const int stride = deAlign32(size, baseAlignment);
700 
701 			for (int childNdx = firstChildNdx; childNdx < (int)layout.bufferVars.size(); childNdx++)
702 			{
703 				layout.bufferVars[childNdx].topLevelArraySize	= topLevelArraySize;
704 				layout.bufferVars[childNdx].topLevelArrayStride	= stride;
705 			}
706 
707 			if (topLevelArraySize != 0)
708 				curOffset += stride*(topLevelArraySize - 1) + size;
709 		}
710 
711 		return curOffset-baseOffset;
712 	}
713 	else
714 		return computeReferenceLayout(layout, curBlockNdx, baseOffset, blockPrefix + bufVar.getName(), varType, combinedFlags);
715 }
716 
computeReferenceLayout(BufferLayout & layout,ShaderInterface & interface)717 void computeReferenceLayout (BufferLayout& layout, ShaderInterface& interface)
718 {
719 	int numBlocks = interface.getNumBlocks();
720 
721 	for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
722 	{
723 		BufferBlock&		block			= interface.getBlock(blockNdx);
724 		bool				hasInstanceName	= block.getInstanceName() != DE_NULL;
725 		std::string			blockPrefix		= hasInstanceName ? (std::string(block.getBlockName()) + ".") : std::string("");
726 		int					curOffset		= 0;
727 		int					activeBlockNdx	= (int)layout.blocks.size();
728 		int					firstVarNdx		= (int)layout.bufferVars.size();
729 
730 		size_t oldSize	= layout.bufferVars.size();
731 		for (BufferBlock::iterator varIter = block.begin(); varIter != block.end(); varIter++)
732 		{
733 			BufferVar& bufVar = *varIter;
734 			curOffset += computeReferenceLayout(layout, activeBlockNdx,  blockPrefix, curOffset, bufVar, block.getFlags());
735 			if (block.getFlags() & LAYOUT_RELAXED)
736 			{
737 				DE_ASSERT(!(layout.bufferVars.size() <= oldSize));
738 				bufVar.setOffset(layout.bufferVars[oldSize].offset);
739 			}
740 			oldSize	= layout.bufferVars.size();
741 		}
742 
743 		int	varIndicesEnd	= (int)layout.bufferVars.size();
744 		int	blockSize		= curOffset;
745 		int	numInstances	= block.isArray() ? block.getArraySize() : 1;
746 
747 		// Create block layout entries for each instance.
748 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
749 		{
750 			// Allocate entry for instance.
751 			layout.blocks.push_back(BlockLayoutEntry());
752 			BlockLayoutEntry& blockEntry = layout.blocks.back();
753 
754 			blockEntry.name = block.getBlockName();
755 			blockEntry.size = blockSize;
756 
757 			// Compute active variable set for block.
758 			for (int varNdx = firstVarNdx; varNdx < varIndicesEnd; varNdx++)
759 				blockEntry.activeVarIndices.push_back(varNdx);
760 
761 			if (block.isArray())
762 				blockEntry.name += "[" + de::toString(instanceNdx) + "]";
763 		}
764 	}
765 }
766 
767 // Value generator.
768 
generateValue(const BufferVarLayoutEntry & entry,int unsizedArraySize,void * basePtr,de::Random & rnd)769 void generateValue (const BufferVarLayoutEntry& entry, int unsizedArraySize, void* basePtr, de::Random& rnd)
770 {
771 	const glu::DataType	scalarType		= glu::getDataTypeScalarType(entry.type);
772 	const int			scalarSize		= glu::getDataTypeScalarSize(entry.type);
773 	const int			arraySize		= entry.arraySize == 0 ? unsizedArraySize : entry.arraySize;
774 	const int			arrayStride		= entry.arrayStride;
775 	const int			topLevelSize	= entry.topLevelArraySize == 0 ? unsizedArraySize : entry.topLevelArraySize;
776 	const int			topLevelStride	= entry.topLevelArrayStride;
777 	const bool			isMatrix		= glu::isDataTypeMatrix(entry.type);
778 	const int			numVecs			= isMatrix ? (entry.isRowMajor ? glu::getDataTypeMatrixNumRows(entry.type) : glu::getDataTypeMatrixNumColumns(entry.type)) : 1;
779 	const int			vecSize			= scalarSize / numVecs;
780 	const size_t		compSize		= getDataTypeByteSize(scalarType);
781 
782 	DE_ASSERT(scalarSize%numVecs == 0);
783 	DE_ASSERT(topLevelSize >= 0);
784 	DE_ASSERT(arraySize >= 0);
785 
786 	for (int topElemNdx = 0; topElemNdx < topLevelSize; topElemNdx++)
787 	{
788 		deUint8* const topElemPtr = (deUint8*)basePtr + entry.offset + topElemNdx*topLevelStride;
789 
790 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
791 		{
792 			deUint8* const elemPtr = topElemPtr + elemNdx*arrayStride;
793 
794 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
795 			{
796 				deUint8* const vecPtr = elemPtr + (isMatrix ? vecNdx*entry.matrixStride : 0);
797 
798 				for (int compNdx = 0; compNdx < vecSize; compNdx++)
799 				{
800 					deUint8* const compPtr = vecPtr + compSize*compNdx;
801 
802 					switch (scalarType)
803 					{
804 						case glu::TYPE_FLOAT:	*((float*)compPtr)		= (float)rnd.getInt(-9, 9);						break;
805 						case glu::TYPE_INT:		*((int*)compPtr)		= rnd.getInt(-9, 9);							break;
806 						case glu::TYPE_UINT:	*((deUint32*)compPtr)	= (deUint32)rnd.getInt(0, 9);					break;
807 						case glu::TYPE_INT8:	*((deInt8*)compPtr)		= (deInt8)rnd.getInt(-9, 9);					break;
808 						case glu::TYPE_UINT8:	*((deUint8*)compPtr)	= (deUint8)rnd.getInt(0, 9);					break;
809 						case glu::TYPE_INT16:	*((deInt16*)compPtr)	= (deInt16)rnd.getInt(-9, 9);					break;
810 						case glu::TYPE_UINT16:	*((deUint16*)compPtr)	= (deUint16)rnd.getInt(0, 9);					break;
811 						case glu::TYPE_FLOAT16:	*((deFloat16*)compPtr)	= deFloat32To16((float)rnd.getInt(-9, 9));		break;
812 						// \note Random bit pattern is used for true values. Spec states that all non-zero values are
813 						//       interpreted as true but some implementations fail this.
814 						case glu::TYPE_BOOL:	*((deUint32*)compPtr)	= rnd.getBool() ? rnd.getUint32()|1u : 0u;		break;
815 						default:
816 							DE_ASSERT(false);
817 					}
818 				}
819 			}
820 		}
821 	}
822 }
823 
generateValues(const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,deUint32 seed)824 void generateValues (const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, deUint32 seed)
825 {
826 	de::Random	rnd			(seed);
827 	const int	numBlocks	= (int)layout.blocks.size();
828 
829 	DE_ASSERT(numBlocks == (int)blockPointers.size());
830 
831 	for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
832 	{
833 		const BlockLayoutEntry&	blockLayout	= layout.blocks[blockNdx];
834 		const BlockDataPtr&		blockPtr	= blockPointers[blockNdx];
835 		const int				numEntries	= (int)layout.blocks[blockNdx].activeVarIndices.size();
836 
837 		for (int entryNdx = 0; entryNdx < numEntries; entryNdx++)
838 		{
839 			const int					varNdx		= blockLayout.activeVarIndices[entryNdx];
840 			const BufferVarLayoutEntry&	varEntry	= layout.bufferVars[varNdx];
841 
842 			generateValue(varEntry, blockPtr.lastUnsizedArraySize, blockPtr.ptr, rnd);
843 		}
844 	}
845 }
846 
847 // Shader generator.
848 
getCompareFuncForType(glu::DataType type)849 const char* getCompareFuncForType (glu::DataType type)
850 {
851 	switch (type)
852 	{
853 		case glu::TYPE_FLOAT:			return "bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }\n";
854 		case glu::TYPE_FLOAT_VEC2:		return "bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }\n";
855 		case glu::TYPE_FLOAT_VEC3:		return "bool compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }\n";
856 		case glu::TYPE_FLOAT_VEC4:		return "bool compare_vec4     (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }\n";
857 		case glu::TYPE_FLOAT_MAT2:		return "bool compare_mat2     (highp mat2 a, highp mat2 b)    { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }\n";
858 		case glu::TYPE_FLOAT_MAT2X3:	return "bool compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); }\n";
859 		case glu::TYPE_FLOAT_MAT2X4:	return "bool compare_mat2x4   (highp mat2x4 a, highp mat2x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1]); }\n";
860 		case glu::TYPE_FLOAT_MAT3X2:	return "bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }\n";
861 		case glu::TYPE_FLOAT_MAT3:		return "bool compare_mat3     (highp mat3 a, highp mat3 b)    { return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2]); }\n";
862 		case glu::TYPE_FLOAT_MAT3X4:	return "bool compare_mat3x4   (highp mat3x4 a, highp mat3x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2]); }\n";
863 		case glu::TYPE_FLOAT_MAT4X2:	return "bool compare_mat4x2   (highp mat4x2 a, highp mat4x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2])&&compare_vec2(a[3], b[3]); }\n";
864 		case glu::TYPE_FLOAT_MAT4X3:	return "bool compare_mat4x3   (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }\n";
865 		case glu::TYPE_FLOAT_MAT4:		return "bool compare_mat4     (highp mat4 a, highp mat4 b)    { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); }\n";
866 		case glu::TYPE_INT:				return "bool compare_int      (highp int a, highp int b)      { return a == b; }\n";
867 		case glu::TYPE_INT_VEC2:		return "bool compare_ivec2    (highp ivec2 a, highp ivec2 b)  { return a == b; }\n";
868 		case glu::TYPE_INT_VEC3:		return "bool compare_ivec3    (highp ivec3 a, highp ivec3 b)  { return a == b; }\n";
869 		case glu::TYPE_INT_VEC4:		return "bool compare_ivec4    (highp ivec4 a, highp ivec4 b)  { return a == b; }\n";
870 		case glu::TYPE_UINT:			return "bool compare_uint     (highp uint a, highp uint b)    { return a == b; }\n";
871 		case glu::TYPE_UINT_VEC2:		return "bool compare_uvec2    (highp uvec2 a, highp uvec2 b)  { return a == b; }\n";
872 		case glu::TYPE_UINT_VEC3:		return "bool compare_uvec3    (highp uvec3 a, highp uvec3 b)  { return a == b; }\n";
873 		case glu::TYPE_UINT_VEC4:		return "bool compare_uvec4    (highp uvec4 a, highp uvec4 b)  { return a == b; }\n";
874 		case glu::TYPE_BOOL:			return "bool compare_bool     (bool a, bool b)                { return a == b; }\n";
875 		case glu::TYPE_BOOL_VEC2:		return "bool compare_bvec2    (bvec2 a, bvec2 b)              { return a == b; }\n";
876 		case glu::TYPE_BOOL_VEC3:		return "bool compare_bvec3    (bvec3 a, bvec3 b)              { return a == b; }\n";
877 		case glu::TYPE_BOOL_VEC4:		return "bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }\n";
878 		case glu::TYPE_FLOAT16:			return "bool compare_float16_t(highp float a, highp float b)  { return abs(a - b) < 0.05; }\n";
879 		case glu::TYPE_FLOAT16_VEC2:	return "bool compare_f16vec2  (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }\n";
880 		case glu::TYPE_FLOAT16_VEC3:	return "bool compare_f16vec3  (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }\n";
881 		case glu::TYPE_FLOAT16_VEC4:	return "bool compare_f16vec4  (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }\n";
882 		case glu::TYPE_INT8:			return "bool compare_int8_t   (highp int a, highp int b)      { return a == b; }\n";
883 		case glu::TYPE_INT8_VEC2:		return "bool compare_i8vec2   (highp ivec2 a, highp ivec2 b)  { return a == b; }\n";
884 		case glu::TYPE_INT8_VEC3:		return "bool compare_i8vec3   (highp ivec3 a, highp ivec3 b)  { return a == b; }\n";
885 		case glu::TYPE_INT8_VEC4:		return "bool compare_i8vec4   (highp ivec4 a, highp ivec4 b)  { return a == b; }\n";
886 		case glu::TYPE_UINT8:			return "bool compare_uint8_t  (highp uint a, highp uint b)    { return a == b; }\n";
887 		case glu::TYPE_UINT8_VEC2:		return "bool compare_u8vec2   (highp uvec2 a, highp uvec2 b)  { return a == b; }\n";
888 		case glu::TYPE_UINT8_VEC3:		return "bool compare_u8vec3   (highp uvec3 a, highp uvec3 b)  { return a == b; }\n";
889 		case glu::TYPE_UINT8_VEC4:		return "bool compare_u8vec4   (highp uvec4 a, highp uvec4 b)  { return a == b; }\n";
890 		case glu::TYPE_INT16:			return "bool compare_int16_t  (highp int a, highp int b)      { return a == b; }\n";
891 		case glu::TYPE_INT16_VEC2:		return "bool compare_i16vec2  (highp ivec2 a, highp ivec2 b)  { return a == b; }\n";
892 		case glu::TYPE_INT16_VEC3:		return "bool compare_i16vec3  (highp ivec3 a, highp ivec3 b)  { return a == b; }\n";
893 		case glu::TYPE_INT16_VEC4:		return "bool compare_i16vec4  (highp ivec4 a, highp ivec4 b)  { return a == b; }\n";
894 		case glu::TYPE_UINT16:			return "bool compare_uint16_t (highp uint a, highp uint b)    { return a == b; }\n";
895 		case glu::TYPE_UINT16_VEC2:		return "bool compare_u16vec2  (highp uvec2 a, highp uvec2 b)  { return a == b; }\n";
896 		case glu::TYPE_UINT16_VEC3:		return "bool compare_u16vec3  (highp uvec3 a, highp uvec3 b)  { return a == b; }\n";
897 		case glu::TYPE_UINT16_VEC4:		return "bool compare_u16vec4  (highp uvec4 a, highp uvec4 b)  { return a == b; }\n";
898 		default:
899 			DE_ASSERT(false);
900 			return DE_NULL;
901 	}
902 }
903 
getCompareDependencies(std::set<glu::DataType> & compareFuncs,glu::DataType basicType)904 void getCompareDependencies (std::set<glu::DataType>& compareFuncs, glu::DataType basicType)
905 {
906 	switch (basicType)
907 	{
908 		case glu::TYPE_FLOAT_VEC2:
909 		case glu::TYPE_FLOAT_VEC3:
910 		case glu::TYPE_FLOAT_VEC4:
911 		case glu::TYPE_FLOAT16_VEC2:
912 		case glu::TYPE_FLOAT16_VEC3:
913 		case glu::TYPE_FLOAT16_VEC4:
914 			compareFuncs.insert(glu::TYPE_FLOAT);
915 			compareFuncs.insert(basicType);
916 			break;
917 
918 		case glu::TYPE_FLOAT_MAT2:
919 		case glu::TYPE_FLOAT_MAT2X3:
920 		case glu::TYPE_FLOAT_MAT2X4:
921 		case glu::TYPE_FLOAT_MAT3X2:
922 		case glu::TYPE_FLOAT_MAT3:
923 		case glu::TYPE_FLOAT_MAT3X4:
924 		case glu::TYPE_FLOAT_MAT4X2:
925 		case glu::TYPE_FLOAT_MAT4X3:
926 		case glu::TYPE_FLOAT_MAT4:
927 			compareFuncs.insert(glu::TYPE_FLOAT);
928 			compareFuncs.insert(glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType)));
929 			compareFuncs.insert(basicType);
930 			break;
931 
932 		default:
933 			compareFuncs.insert(basicType);
934 			break;
935 	}
936 }
937 
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const VarType & type)938 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const VarType& type)
939 {
940 	if (type.isStructType())
941 	{
942 		for (StructType::ConstIterator iter = type.getStructPtr()->begin(); iter != type.getStructPtr()->end(); ++iter)
943 			collectUniqueBasicTypes(basicTypes, iter->getType());
944 	}
945 	else if (type.isArrayType())
946 		collectUniqueBasicTypes(basicTypes, type.getElementType());
947 	else
948 	{
949 		DE_ASSERT(type.isBasicType());
950 		basicTypes.insert(type.getBasicType());
951 	}
952 }
953 
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const BufferBlock & bufferBlock)954 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const BufferBlock& bufferBlock)
955 {
956 	for (BufferBlock::const_iterator iter = bufferBlock.begin(); iter != bufferBlock.end(); ++iter)
957 		collectUniqueBasicTypes(basicTypes, iter->getType());
958 }
959 
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const ShaderInterface & interface)960 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const ShaderInterface& interface)
961 {
962 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
963 		collectUniqueBasicTypes(basicTypes, interface.getBlock(ndx));
964 }
965 
generateCompareFuncs(std::ostream & str,const ShaderInterface & interface)966 void generateCompareFuncs (std::ostream& str, const ShaderInterface& interface)
967 {
968 	std::set<glu::DataType> types;
969 	std::set<glu::DataType> compareFuncs;
970 
971 	// Collect unique basic types
972 	collectUniqueBasicTypes(types, interface);
973 
974 	// Set of compare functions required
975 	for (std::set<glu::DataType>::const_iterator iter = types.begin(); iter != types.end(); ++iter)
976 	{
977 		getCompareDependencies(compareFuncs, *iter);
978 	}
979 
980 	for (int type = 0; type < glu::TYPE_LAST; ++type)
981 	{
982 		if (compareFuncs.find(glu::DataType(type)) != compareFuncs.end())
983 			str << getCompareFuncForType(glu::DataType(type));
984 	}
985 }
986 
usesRelaxedLayout(const ShaderInterface & interface)987 bool usesRelaxedLayout (const ShaderInterface& interface)
988 {
989 	//If any of blocks has LAYOUT_RELAXED flag
990 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
991 	{
992 		if (interface.getBlock(ndx).getFlags() & LAYOUT_RELAXED)
993 			return true;
994 	}
995 	return false;
996 }
997 
uses16BitStorage(const ShaderInterface & interface)998 bool uses16BitStorage (const ShaderInterface& interface)
999 {
1000 	// If any of blocks has LAYOUT_16BIT_STORAGE flag
1001 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
1002 	{
1003 		if (interface.getBlock(ndx).getFlags() & LAYOUT_16BIT_STORAGE)
1004 			return true;
1005 	}
1006 	return false;
1007 }
1008 
uses8BitStorage(const ShaderInterface & interface)1009 bool uses8BitStorage (const ShaderInterface& interface)
1010 {
1011 	// If any of blocks has LAYOUT_8BIT_STORAGE flag
1012 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
1013 	{
1014 		if (interface.getBlock(ndx).getFlags() & LAYOUT_8BIT_STORAGE)
1015 			return true;
1016 	}
1017 	return false;
1018 }
1019 
usesScalarLayout(const ShaderInterface & interface)1020 bool usesScalarLayout (const ShaderInterface& interface)
1021 {
1022 	// If any of blocks has LAYOUT_SCALAR flag
1023 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
1024 	{
1025 		if (interface.getBlock(ndx).getFlags() & LAYOUT_SCALAR)
1026 			return true;
1027 	}
1028 	return false;
1029 }
1030 
1031 struct Indent
1032 {
1033 	int level;
Indentvkt::ssbo::__anon62b757cb0211::Indent1034 	Indent (int level_) : level(level_) {}
1035 };
1036 
operator <<(std::ostream & str,const Indent & indent)1037 std::ostream& operator<< (std::ostream& str, const Indent& indent)
1038 {
1039 	for (int i = 0; i < indent.level; i++)
1040 		str << "\t";
1041 	return str;
1042 }
1043 
getPromoteType(glu::DataType type)1044 glu::DataType getPromoteType(glu::DataType type)
1045 {
1046 	switch (type)
1047 	{
1048 	case glu::TYPE_UINT8:			return glu::TYPE_UINT;
1049 	case glu::TYPE_UINT8_VEC2:		return glu::TYPE_UINT_VEC2;
1050 	case glu::TYPE_UINT8_VEC3:		return glu::TYPE_UINT_VEC3;
1051 	case glu::TYPE_UINT8_VEC4:		return glu::TYPE_UINT_VEC4;
1052 	case glu::TYPE_INT8:			return glu::TYPE_INT;
1053 	case glu::TYPE_INT8_VEC2:		return glu::TYPE_INT_VEC2;
1054 	case glu::TYPE_INT8_VEC3:		return glu::TYPE_INT_VEC3;
1055 	case glu::TYPE_INT8_VEC4:		return glu::TYPE_INT_VEC4;
1056 	case glu::TYPE_UINT16:			return glu::TYPE_UINT;
1057 	case glu::TYPE_UINT16_VEC2:		return glu::TYPE_UINT_VEC2;
1058 	case glu::TYPE_UINT16_VEC3:		return glu::TYPE_UINT_VEC3;
1059 	case glu::TYPE_UINT16_VEC4:		return glu::TYPE_UINT_VEC4;
1060 	case glu::TYPE_INT16:			return glu::TYPE_INT;
1061 	case glu::TYPE_INT16_VEC2:		return glu::TYPE_INT_VEC2;
1062 	case glu::TYPE_INT16_VEC3:		return glu::TYPE_INT_VEC3;
1063 	case glu::TYPE_INT16_VEC4:		return glu::TYPE_INT_VEC4;
1064 	case glu::TYPE_FLOAT16:			return glu::TYPE_FLOAT;
1065 	case glu::TYPE_FLOAT16_VEC2:	return glu::TYPE_FLOAT_VEC2;
1066 	case glu::TYPE_FLOAT16_VEC3:	return glu::TYPE_FLOAT_VEC3;
1067 	case glu::TYPE_FLOAT16_VEC4:	return glu::TYPE_FLOAT_VEC4;
1068 	default: return type;
1069 	}
1070 }
1071 
generateDeclaration(std::ostream & src,const BufferVar & bufferVar,int indentLevel)1072 void generateDeclaration (std::ostream& src, const BufferVar& bufferVar, int indentLevel)
1073 {
1074 	// \todo [pyry] Qualifiers
1075 	if ((bufferVar.getFlags() & LAYOUT_MASK) != 0)
1076 		src << "layout(" << LayoutFlagsFmt(bufferVar.getFlags() & LAYOUT_MASK) << ") ";
1077 	else if (bufferVar.getOffset()!= ~0u)
1078 		src << "layout(offset = "<<bufferVar.getOffset()<<") ";
1079 
1080 	src << glu::declare(bufferVar.getType(), bufferVar.getName(), indentLevel);
1081 }
1082 
generateDeclaration(std::ostream & src,const BufferBlock & block,int bindingPoint)1083 void generateDeclaration (std::ostream& src, const BufferBlock& block, int bindingPoint)
1084 {
1085 	src << "layout(";
1086 	if ((block.getFlags() & LAYOUT_MASK) != 0)
1087 		src << LayoutFlagsFmt(block.getFlags() & LAYOUT_MASK) << ", ";
1088 
1089 	src << "binding = " << bindingPoint;
1090 
1091 	src << ") ";
1092 
1093 	src << "buffer " << block.getBlockName();
1094 	src << "\n{\n";
1095 
1096 	for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1097 	{
1098 		src << Indent(1);
1099 
1100 		generateDeclaration(src, *varIter, 1 /* indent level */);
1101 		src << ";\n";
1102 	}
1103 
1104 	src << "}";
1105 
1106 	if (block.getInstanceName() != DE_NULL)
1107 	{
1108 		src << " " << block.getInstanceName();
1109 		if (block.isArray())
1110 			src << "[" << block.getArraySize() << "]";
1111 	}
1112 	else
1113 		DE_ASSERT(!block.isArray());
1114 
1115 	src << ";\n";
1116 }
1117 
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,const void * valuePtr)1118 void generateImmMatrixSrc (std::ostream& src, glu::DataType basicType, int matrixStride, bool isRowMajor, const void* valuePtr)
1119 {
1120 	DE_ASSERT(glu::isDataTypeMatrix(basicType));
1121 
1122 	const int		compSize		= sizeof(deUint32);
1123 	const int		numRows			= glu::getDataTypeMatrixNumRows(basicType);
1124 	const int		numCols			= glu::getDataTypeMatrixNumColumns(basicType);
1125 
1126 	src << glu::getDataTypeName(basicType) << "(";
1127 
1128 	// Constructed in column-wise order.
1129 	for (int colNdx = 0; colNdx < numCols; colNdx++)
1130 	{
1131 		for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1132 		{
1133 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? rowNdx*matrixStride + colNdx*compSize
1134 																				: colNdx*matrixStride + rowNdx*compSize);
1135 
1136 			if (colNdx > 0 || rowNdx > 0)
1137 				src << ", ";
1138 
1139 			src << de::floatToString(*((const float*)compPtr), 1);
1140 		}
1141 	}
1142 
1143 	src << ")";
1144 }
1145 
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,const void * valuePtr,const char * resultVar,const char * typeName,const string shaderName)1146 void generateImmMatrixSrc (std::ostream& src,
1147 						   glu::DataType basicType,
1148 						   int matrixStride,
1149 						   bool isRowMajor,
1150 						   const void* valuePtr,
1151 						   const char* resultVar,
1152 						   const char* typeName,
1153 						   const string shaderName)
1154 {
1155 	const int		compSize		= sizeof(deUint32);
1156 	const int		numRows			= glu::getDataTypeMatrixNumRows(basicType);
1157 	const int		numCols			= glu::getDataTypeMatrixNumColumns(basicType);
1158 
1159 	typeName = "float";
1160 	for (int colNdex = 0; colNdex < numCols; colNdex++)
1161 	{
1162 		for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1163 		{
1164 			src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << "[" << colNdex << "][" << rowNdex << "], ";
1165 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? rowNdex*matrixStride + colNdex*compSize
1166 																						: colNdex*matrixStride + rowNdex*compSize);
1167 
1168 			src << de::floatToString(*((const float*)compPtr), 1);
1169 			src << ");\n";
1170 		}
1171 	}
1172 
1173 	typeName = "vec";
1174 	for (int colNdex = 0; colNdex < numCols; colNdex++)
1175 	{
1176 		src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << numRows << "(" << shaderName << "[" << colNdex << "], " << typeName << numRows << "(";
1177 		for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1178 		{
1179 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? (rowNdex * matrixStride + colNdex * compSize)
1180 																  : (colNdex * matrixStride + rowNdex * compSize));
1181 			src << de::floatToString(*((const float*)compPtr), 1);
1182 
1183 			if (rowNdex < numRows-1)
1184 				src << ", ";
1185 		}
1186 		src << "));\n";
1187 	}
1188 }
1189 
generateImmScalarVectorSrc(std::ostream & src,glu::DataType basicType,const void * valuePtr)1190 void generateImmScalarVectorSrc (std::ostream& src, glu::DataType basicType, const void* valuePtr)
1191 {
1192 	DE_ASSERT(glu::isDataTypeFloatOrVec(basicType)	||
1193 			  glu::isDataTypeIntOrIVec(basicType)	||
1194 			  glu::isDataTypeUintOrUVec(basicType)	||
1195 			  glu::isDataTypeBoolOrBVec(basicType)  ||
1196 			  glu::isDataTypeExplicitPrecision(basicType));
1197 
1198 	const glu::DataType		scalarType		= glu::getDataTypeScalarType(basicType);
1199 	const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1200 	const size_t			compSize		= getDataTypeByteSize(scalarType);
1201 
1202 	if (scalarSize > 1)
1203 		src << glu::getDataTypeName(getPromoteType(basicType)) << "(";
1204 
1205 	for (int scalarNdx = 0; scalarNdx < scalarSize; scalarNdx++)
1206 	{
1207 		const deUint8* compPtr = (const deUint8*)valuePtr + scalarNdx*compSize;
1208 
1209 		if (scalarNdx > 0)
1210 			src << ", ";
1211 
1212 		switch (scalarType)
1213 		{
1214 			case glu::TYPE_FLOAT16:	src << de::floatToString(deFloat16To32(*((const deFloat16*)compPtr)), 1);	break;
1215 			case glu::TYPE_FLOAT:	src << de::floatToString(*((const float*)compPtr), 1);			break;
1216 			case glu::TYPE_INT8:	src << (deUint32)*((const deInt8*)compPtr);						break;
1217 			case glu::TYPE_INT16:	src << *((const deInt16*)compPtr);								break;
1218 			case glu::TYPE_INT:		src << *((const int*)compPtr);									break;
1219 			case glu::TYPE_UINT8:	src << (deUint32)*((const deUint8*)compPtr) << "u";				break;
1220 			case glu::TYPE_UINT16:	src << *((const deUint16*)compPtr) << "u";						break;
1221 			case glu::TYPE_UINT:	src << *((const deUint32*)compPtr) << "u";						break;
1222 			case glu::TYPE_BOOL:	src << (*((const deUint32*)compPtr) != 0u ? "true" : "false");	break;
1223 			default:
1224 				DE_ASSERT(false);
1225 		}
1226 	}
1227 
1228 	if (scalarSize > 1)
1229 		src << ")";
1230 }
1231 
getAPIName(const BufferBlock & block,const BufferVar & var,const glu::TypeComponentVector & accessPath)1232 string getAPIName (const BufferBlock& block, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1233 {
1234 	std::ostringstream name;
1235 
1236 	if (block.getInstanceName())
1237 		name << block.getBlockName() << ".";
1238 
1239 	name << var.getName();
1240 
1241 	for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1242 	{
1243 		if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1244 		{
1245 			const VarType		curType		= glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1246 			const StructType*	structPtr	= curType.getStructPtr();
1247 
1248 			name << "." << structPtr->getMember(pathComp->index).getName();
1249 		}
1250 		else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1251 		{
1252 			if (pathComp == accessPath.begin() || (pathComp+1) == accessPath.end())
1253 				name << "[0]"; // Top- / bottom-level array
1254 			else
1255 				name << "[" << pathComp->index << "]";
1256 		}
1257 		else
1258 			DE_ASSERT(false);
1259 	}
1260 
1261 	return name.str();
1262 }
1263 
getShaderName(const BufferBlock & block,int instanceNdx,const BufferVar & var,const glu::TypeComponentVector & accessPath)1264 string getShaderName (const BufferBlock& block, int instanceNdx, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1265 {
1266 	std::ostringstream name;
1267 
1268 	if (block.getInstanceName())
1269 	{
1270 		name << block.getInstanceName();
1271 
1272 		if (block.isArray())
1273 			name << "[" << instanceNdx << "]";
1274 
1275 		name << ".";
1276 	}
1277 	else
1278 		DE_ASSERT(instanceNdx == 0);
1279 
1280 	name << var.getName();
1281 
1282 	for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1283 	{
1284 		if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1285 		{
1286 			const VarType		curType		= glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1287 			const StructType*	structPtr	= curType.getStructPtr();
1288 
1289 			name << "." << structPtr->getMember(pathComp->index).getName();
1290 		}
1291 		else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1292 			name << "[" << pathComp->index << "]";
1293 		else
1294 			DE_ASSERT(false);
1295 	}
1296 
1297 	return name.str();
1298 }
1299 
computeOffset(const BufferVarLayoutEntry & varLayout,const glu::TypeComponentVector & accessPath)1300 int computeOffset (const BufferVarLayoutEntry& varLayout, const glu::TypeComponentVector& accessPath)
1301 {
1302 	const int	topLevelNdx		= (accessPath.size() > 1 && accessPath.front().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.front().index : 0;
1303 	const int	bottomLevelNdx	= (!accessPath.empty() && accessPath.back().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.back().index : 0;
1304 
1305 	return varLayout.offset + varLayout.topLevelArrayStride*topLevelNdx + varLayout.arrayStride*bottomLevelNdx;
1306 }
1307 
generateCompareSrc(std::ostream & src,const char * resultVar,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath,MatrixLoadFlags matrixLoadFlag)1308 void generateCompareSrc (
1309 	std::ostream&				src,
1310 	const char*					resultVar,
1311 	const BufferLayout&			bufferLayout,
1312 	const BufferBlock&			block,
1313 	int							instanceNdx,
1314 	const BlockDataPtr&			blockPtr,
1315 	const BufferVar&			bufVar,
1316 	const glu::SubTypeAccess&	accessPath,
1317 	MatrixLoadFlags				matrixLoadFlag)
1318 {
1319 	const VarType curType = accessPath.getType();
1320 
1321 	if (curType.isArrayType())
1322 	{
1323 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1324 
1325 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1326 			generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx), LOAD_FULL_MATRIX);
1327 	}
1328 	else if (curType.isStructType())
1329 	{
1330 		const int numMembers = curType.getStructPtr()->getNumMembers();
1331 
1332 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1333 			generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx), LOAD_FULL_MATRIX);
1334 	}
1335 	else
1336 	{
1337 		DE_ASSERT(curType.isBasicType());
1338 
1339 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
1340 		const int		varNdx	= bufferLayout.getVariableIndex(apiName);
1341 
1342 		DE_ASSERT(varNdx >= 0);
1343 		{
1344 			const BufferVarLayoutEntry&	varLayout		= bufferLayout.bufferVars[varNdx];
1345 			const string				shaderName		= getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1346 			const glu::DataType			basicType		= curType.getBasicType();
1347 			const bool					isMatrix		= glu::isDataTypeMatrix(basicType);
1348 			const char*					typeName		= glu::getDataTypeName(basicType);
1349 			const void*					valuePtr		= (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1350 
1351 
1352 			if (isMatrix)
1353 			{
1354 				if (matrixLoadFlag == LOAD_MATRIX_COMPONENTS)
1355 					generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr, resultVar, typeName, shaderName);
1356 				else
1357 				{
1358 					src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << ", ";
1359 					generateImmMatrixSrc (src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
1360 					src << ");\n";
1361 				}
1362 			}
1363 			else
1364 			{
1365 				const char* castName = "";
1366 				glu::DataType promoteType = getPromoteType(basicType);
1367 				if (basicType != promoteType)
1368 					castName = glu::getDataTypeName(promoteType);
1369 
1370 				src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << castName << "(" << shaderName << "), ";
1371 				generateImmScalarVectorSrc(src, basicType, valuePtr);
1372 				src << ");\n";
1373 			}
1374 		}
1375 	}
1376 }
1377 
generateCompareSrc(std::ostream & src,const char * resultVar,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,MatrixLoadFlags matrixLoadFlag)1378 void generateCompareSrc (std::ostream& src, const char* resultVar, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, MatrixLoadFlags matrixLoadFlag)
1379 {
1380 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1381 	{
1382 		const BufferBlock&	block			= interface.getBlock(declNdx);
1383 		const bool			isArray			= block.isArray();
1384 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1385 
1386 		DE_ASSERT(!isArray || block.getInstanceName());
1387 
1388 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1389 		{
1390 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1391 			const int			blockNdx		= layout.getBlockIndex(instanceName);
1392 			const BlockDataPtr&	blockPtr		= blockPointers[blockNdx];
1393 
1394 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1395 			{
1396 				const BufferVar& bufVar = *varIter;
1397 
1398 				if ((bufVar.getFlags() & ACCESS_READ) == 0)
1399 					continue; // Don't read from that variable.
1400 
1401 				generateCompareSrc(src, resultVar, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()), matrixLoadFlag);
1402 			}
1403 		}
1404 	}
1405 }
1406 
1407 // \todo [2013-10-14 pyry] Almost identical to generateCompareSrc - unify?
1408 
generateWriteSrc(std::ostream & src,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath)1409 void generateWriteSrc (
1410 	std::ostream&				src,
1411 	const BufferLayout&			bufferLayout,
1412 	const BufferBlock&			block,
1413 	int							instanceNdx,
1414 	const BlockDataPtr&			blockPtr,
1415 	const BufferVar&			bufVar,
1416 	const glu::SubTypeAccess&	accessPath)
1417 {
1418 	const VarType curType = accessPath.getType();
1419 
1420 	if (curType.isArrayType())
1421 	{
1422 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1423 
1424 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1425 			generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx));
1426 	}
1427 	else if (curType.isStructType())
1428 	{
1429 		const int numMembers = curType.getStructPtr()->getNumMembers();
1430 
1431 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1432 			generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx));
1433 	}
1434 	else
1435 	{
1436 		DE_ASSERT(curType.isBasicType());
1437 
1438 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
1439 		const int		varNdx	= bufferLayout.getVariableIndex(apiName);
1440 
1441 		DE_ASSERT(varNdx >= 0);
1442 		{
1443 			const BufferVarLayoutEntry&	varLayout		= bufferLayout.bufferVars[varNdx];
1444 			const string				shaderName		= getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1445 			const glu::DataType			basicType		= curType.getBasicType();
1446 			const bool					isMatrix		= glu::isDataTypeMatrix(basicType);
1447 			const void*					valuePtr		= (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1448 
1449 			const char* castName = "";
1450 			glu::DataType promoteType = getPromoteType(basicType);
1451 			if (basicType != promoteType)
1452 				castName = glu::getDataTypeName(basicType);
1453 
1454 			src << "\t" << shaderName << " = " << castName << "(";
1455 
1456 			if (isMatrix)
1457 				generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
1458 			else
1459 				generateImmScalarVectorSrc(src, basicType, valuePtr);
1460 
1461 			src << ");\n";
1462 		}
1463 	}
1464 }
1465 
generateWriteSrc(std::ostream & src,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers)1466 void generateWriteSrc (std::ostream& src, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers)
1467 {
1468 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1469 	{
1470 		const BufferBlock&	block			= interface.getBlock(declNdx);
1471 		const bool			isArray			= block.isArray();
1472 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1473 
1474 		DE_ASSERT(!isArray || block.getInstanceName());
1475 
1476 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1477 		{
1478 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1479 			const int			blockNdx		= layout.getBlockIndex(instanceName);
1480 			const BlockDataPtr&	blockPtr		= blockPointers[blockNdx];
1481 
1482 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1483 			{
1484 				const BufferVar& bufVar = *varIter;
1485 
1486 				if ((bufVar.getFlags() & ACCESS_WRITE) == 0)
1487 					continue; // Don't write to that variable.
1488 
1489 				generateWriteSrc(src, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
1490 			}
1491 		}
1492 	}
1493 }
1494 
generateComputeShader(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & comparePtrs,const vector<BlockDataPtr> & writePtrs,MatrixLoadFlags matrixLoadFlag)1495 string generateComputeShader (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& comparePtrs, const vector<BlockDataPtr>& writePtrs, MatrixLoadFlags matrixLoadFlag)
1496 {
1497 	std::ostringstream src;
1498 
1499 	if (uses16BitStorage(interface) || uses8BitStorage(interface) || usesRelaxedLayout(interface) || usesScalarLayout(interface))
1500 		src << "#version 450\n";
1501 	else
1502 		src << "#version 310 es\n";
1503 
1504 	src << "#extension GL_EXT_shader_16bit_storage : enable\n";
1505 	src << "#extension GL_EXT_shader_8bit_storage : enable\n";
1506 	src << "#extension GL_EXT_scalar_block_layout : enable\n";
1507 	src << "layout(local_size_x = 1) in;\n";
1508 	src << "\n";
1509 
1510 	// Atomic counter for counting passed invocations.
1511 	src << "layout(std140, binding = 0) buffer AcBlock { highp uint ac_numPassed; };\n\n";
1512 
1513 	std::vector<const StructType*> namedStructs;
1514 	interface.getNamedStructs(namedStructs);
1515 	for (std::vector<const StructType*>::const_iterator structIter = namedStructs.begin(); structIter != namedStructs.end(); structIter++)
1516 		src << glu::declare(*structIter) << ";\n";
1517 
1518 	{
1519 		for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1520 		{
1521 			const BufferBlock& block = interface.getBlock(blockNdx);
1522 			generateDeclaration(src, block, 1 + blockNdx);
1523 		}
1524 	}
1525 
1526 	// Comparison utilities.
1527 	src << "\n";
1528 	generateCompareFuncs(src, interface);
1529 
1530 	src << "\n"
1531 		   "void main (void)\n"
1532 		   "{\n"
1533 		   "	bool allOk = true;\n";
1534 
1535 	// Value compare.
1536 	generateCompareSrc(src, "allOk", interface, layout, comparePtrs, matrixLoadFlag);
1537 
1538 	src << "	if (allOk)\n"
1539 		<< "		ac_numPassed++;\n"
1540 		<< "\n";
1541 
1542 	// Value write.
1543 	generateWriteSrc(src, interface, layout, writePtrs);
1544 
1545 	src << "}\n";
1546 
1547 	return src.str();
1548 }
1549 
copyBufferVarData(const BufferVarLayoutEntry & dstEntry,const BlockDataPtr & dstBlockPtr,const BufferVarLayoutEntry & srcEntry,const BlockDataPtr & srcBlockPtr)1550 void copyBufferVarData (const BufferVarLayoutEntry& dstEntry, const BlockDataPtr& dstBlockPtr, const BufferVarLayoutEntry& srcEntry, const BlockDataPtr& srcBlockPtr)
1551 {
1552 	DE_ASSERT(dstEntry.arraySize <= srcEntry.arraySize);
1553 	DE_ASSERT(dstEntry.topLevelArraySize <= srcEntry.topLevelArraySize);
1554 	DE_ASSERT(dstBlockPtr.lastUnsizedArraySize <= srcBlockPtr.lastUnsizedArraySize);
1555 	DE_ASSERT(dstEntry.type == srcEntry.type);
1556 
1557 	deUint8* const			dstBasePtr			= (deUint8*)dstBlockPtr.ptr + dstEntry.offset;
1558 	const deUint8* const	srcBasePtr			= (const deUint8*)srcBlockPtr.ptr + srcEntry.offset;
1559 	const int				scalarSize			= glu::getDataTypeScalarSize(dstEntry.type);
1560 	const bool				isMatrix			= glu::isDataTypeMatrix(dstEntry.type);
1561 	glu::DataType			scalarType			= glu::getDataTypeScalarType(dstEntry.type);
1562 	const size_t			compSize			= getDataTypeByteSize(scalarType);
1563 	const int				dstArraySize		= dstEntry.arraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.arraySize;
1564 	const int				dstArrayStride		= dstEntry.arrayStride;
1565 	const int				dstTopLevelSize		= dstEntry.topLevelArraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.topLevelArraySize;
1566 	const int				dstTopLevelStride	= dstEntry.topLevelArrayStride;
1567 	const int				srcArraySize		= srcEntry.arraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.arraySize;
1568 	const int				srcArrayStride		= srcEntry.arrayStride;
1569 	const int				srcTopLevelSize		= srcEntry.topLevelArraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.topLevelArraySize;
1570 	const int				srcTopLevelStride	= srcEntry.topLevelArrayStride;
1571 
1572 	DE_ASSERT(dstArraySize <= srcArraySize && dstTopLevelSize <= srcTopLevelSize);
1573 	DE_UNREF(srcArraySize && srcTopLevelSize);
1574 
1575 	for (int topElemNdx = 0; topElemNdx < dstTopLevelSize; topElemNdx++)
1576 	{
1577 		deUint8* const			dstTopPtr	= dstBasePtr + topElemNdx*dstTopLevelStride;
1578 		const deUint8* const	srcTopPtr	= srcBasePtr + topElemNdx*srcTopLevelStride;
1579 
1580 		for (int elementNdx = 0; elementNdx < dstArraySize; elementNdx++)
1581 		{
1582 			deUint8* const			dstElemPtr	= dstTopPtr + elementNdx*dstArrayStride;
1583 			const deUint8* const	srcElemPtr	= srcTopPtr + elementNdx*srcArrayStride;
1584 
1585 			if (isMatrix)
1586 			{
1587 				const int	numRows	= glu::getDataTypeMatrixNumRows(dstEntry.type);
1588 				const int	numCols	= glu::getDataTypeMatrixNumColumns(dstEntry.type);
1589 
1590 				for (int colNdx = 0; colNdx < numCols; colNdx++)
1591 				{
1592 					for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1593 					{
1594 						deUint8*		dstCompPtr	= dstElemPtr + (dstEntry.isRowMajor ? rowNdx*dstEntry.matrixStride + colNdx*compSize
1595 																						: colNdx*dstEntry.matrixStride + rowNdx*compSize);
1596 						const deUint8*	srcCompPtr	= srcElemPtr + (srcEntry.isRowMajor ? rowNdx*srcEntry.matrixStride + colNdx*compSize
1597 																						: colNdx*srcEntry.matrixStride + rowNdx*compSize);
1598 
1599 						DE_ASSERT((deIntptr)(srcCompPtr + compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1600 						DE_ASSERT((deIntptr)(dstCompPtr + compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1601 						deMemcpy(dstCompPtr, srcCompPtr, compSize);
1602 					}
1603 				}
1604 			}
1605 			else
1606 			{
1607 				DE_ASSERT((deIntptr)(srcElemPtr + scalarSize*compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1608 				DE_ASSERT((deIntptr)(dstElemPtr + scalarSize*compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1609 				deMemcpy(dstElemPtr, srcElemPtr, scalarSize*compSize);
1610 			}
1611 		}
1612 	}
1613 }
1614 
copyData(const BufferLayout & dstLayout,const vector<BlockDataPtr> & dstBlockPointers,const BufferLayout & srcLayout,const vector<BlockDataPtr> & srcBlockPointers)1615 void copyData (const BufferLayout& dstLayout, const vector<BlockDataPtr>& dstBlockPointers, const BufferLayout& srcLayout, const vector<BlockDataPtr>& srcBlockPointers)
1616 {
1617 	// \note Src layout is used as reference in case of activeVarIndices happens to be incorrect in dstLayout blocks.
1618 	int numBlocks = (int)srcLayout.blocks.size();
1619 
1620 	for (int srcBlockNdx = 0; srcBlockNdx < numBlocks; srcBlockNdx++)
1621 	{
1622 		const BlockLayoutEntry&		srcBlock	= srcLayout.blocks[srcBlockNdx];
1623 		const BlockDataPtr&			srcBlockPtr	= srcBlockPointers[srcBlockNdx];
1624 		int							dstBlockNdx	= dstLayout.getBlockIndex(srcBlock.name.c_str());
1625 
1626 		if (dstBlockNdx >= 0)
1627 		{
1628 			DE_ASSERT(de::inBounds(dstBlockNdx, 0, (int)dstBlockPointers.size()));
1629 
1630 			const BlockDataPtr& dstBlockPtr = dstBlockPointers[dstBlockNdx];
1631 
1632 			for (vector<int>::const_iterator srcVarNdxIter = srcBlock.activeVarIndices.begin(); srcVarNdxIter != srcBlock.activeVarIndices.end(); srcVarNdxIter++)
1633 			{
1634 				const BufferVarLayoutEntry&	srcEntry	= srcLayout.bufferVars[*srcVarNdxIter];
1635 				int							dstVarNdx	= dstLayout.getVariableIndex(srcEntry.name.c_str());
1636 
1637 				if (dstVarNdx >= 0)
1638 					copyBufferVarData(dstLayout.bufferVars[dstVarNdx], dstBlockPtr, srcEntry, srcBlockPtr);
1639 			}
1640 		}
1641 	}
1642 }
1643 
copyNonWrittenData(const BufferLayout & layout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & srcBlockPtr,const BlockDataPtr & dstBlockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath)1644 void copyNonWrittenData (
1645 	const BufferLayout&			layout,
1646 	const BufferBlock&			block,
1647 	int							instanceNdx,
1648 	const BlockDataPtr&			srcBlockPtr,
1649 	const BlockDataPtr&			dstBlockPtr,
1650 	const BufferVar&			bufVar,
1651 	const glu::SubTypeAccess&	accessPath)
1652 {
1653 	const VarType curType = accessPath.getType();
1654 
1655 	if (curType.isArrayType())
1656 	{
1657 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1658 
1659 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1660 			copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.element(elemNdx));
1661 	}
1662 	else if (curType.isStructType())
1663 	{
1664 		const int numMembers = curType.getStructPtr()->getNumMembers();
1665 
1666 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1667 			copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.member(memberNdx));
1668 	}
1669 	else
1670 	{
1671 		DE_ASSERT(curType.isBasicType());
1672 
1673 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
1674 		const int		varNdx	= layout.getVariableIndex(apiName);
1675 
1676 		DE_ASSERT(varNdx >= 0);
1677 		{
1678 			const BufferVarLayoutEntry& varLayout = layout.bufferVars[varNdx];
1679 			copyBufferVarData(varLayout, dstBlockPtr, varLayout, srcBlockPtr);
1680 		}
1681 	}
1682 }
1683 
copyNonWrittenData(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & srcPtrs,const vector<BlockDataPtr> & dstPtrs)1684 void copyNonWrittenData (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& srcPtrs, const vector<BlockDataPtr>& dstPtrs)
1685 {
1686 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1687 	{
1688 		const BufferBlock&	block			= interface.getBlock(declNdx);
1689 		const bool			isArray			= block.isArray();
1690 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1691 
1692 		DE_ASSERT(!isArray || block.getInstanceName());
1693 
1694 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1695 		{
1696 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1697 			const int			blockNdx		= layout.getBlockIndex(instanceName);
1698 			const BlockDataPtr&	srcBlockPtr		= srcPtrs[blockNdx];
1699 			const BlockDataPtr&	dstBlockPtr		= dstPtrs[blockNdx];
1700 
1701 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1702 			{
1703 				const BufferVar& bufVar = *varIter;
1704 
1705 				if (bufVar.getFlags() & ACCESS_WRITE)
1706 					continue;
1707 
1708 				copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
1709 			}
1710 		}
1711 	}
1712 }
1713 
compareComponents(glu::DataType scalarType,const void * ref,const void * res,int numComps)1714 bool compareComponents (glu::DataType scalarType, const void* ref, const void* res, int numComps)
1715 {
1716 	if (scalarType == glu::TYPE_FLOAT)
1717 	{
1718 		const float threshold = 0.05f; // Same as used in shaders - should be fine for values being used.
1719 
1720 		for (int ndx = 0; ndx < numComps; ndx++)
1721 		{
1722 			const float		refVal		= *((const float*)ref + ndx);
1723 			const float		resVal		= *((const float*)res + ndx);
1724 
1725 			if (deFloatAbs(resVal - refVal) >= threshold)
1726 				return false;
1727 		}
1728 	}
1729 	else if (scalarType == glu::TYPE_BOOL)
1730 	{
1731 		for (int ndx = 0; ndx < numComps; ndx++)
1732 		{
1733 			const deUint32	refVal		= *((const deUint32*)ref + ndx);
1734 			const deUint32	resVal		= *((const deUint32*)res + ndx);
1735 
1736 			if ((refVal != 0) != (resVal != 0))
1737 				return false;
1738 		}
1739 	}
1740 	else if (scalarType == glu::TYPE_INT8 || scalarType == glu::TYPE_UINT8)
1741 	{
1742 		return deMemCmp(ref, res, numComps*sizeof(deUint8)) == 0;
1743 	}
1744 	else if (scalarType == glu::TYPE_INT16 || scalarType == glu::TYPE_UINT16 || scalarType == glu::TYPE_FLOAT16)
1745 	{
1746 		return deMemCmp(ref, res, numComps*sizeof(deUint16)) == 0;
1747 	}
1748 	else
1749 	{
1750 		DE_ASSERT(scalarType == glu::TYPE_INT || scalarType == glu::TYPE_UINT);
1751 
1752 		return deMemCmp(ref, res, numComps*sizeof(deUint32)) == 0;
1753 	}
1754 
1755 	return true;
1756 }
1757 
compareBufferVarData(tcu::TestLog & log,const BufferVarLayoutEntry & refEntry,const BlockDataPtr & refBlockPtr,const BufferVarLayoutEntry & resEntry,const BlockDataPtr & resBlockPtr)1758 bool compareBufferVarData (tcu::TestLog& log, const BufferVarLayoutEntry& refEntry, const BlockDataPtr& refBlockPtr, const BufferVarLayoutEntry& resEntry, const BlockDataPtr& resBlockPtr)
1759 {
1760 	DE_ASSERT(resEntry.arraySize <= refEntry.arraySize);
1761 	DE_ASSERT(resEntry.topLevelArraySize <= refEntry.topLevelArraySize);
1762 	DE_ASSERT(resBlockPtr.lastUnsizedArraySize <= refBlockPtr.lastUnsizedArraySize);
1763 	DE_ASSERT(resEntry.type == refEntry.type);
1764 
1765 	deUint8* const			resBasePtr			= (deUint8*)resBlockPtr.ptr + resEntry.offset;
1766 	const deUint8* const	refBasePtr			= (const deUint8*)refBlockPtr.ptr + refEntry.offset;
1767 	const glu::DataType		scalarType			= glu::getDataTypeScalarType(refEntry.type);
1768 	const int				scalarSize			= glu::getDataTypeScalarSize(resEntry.type);
1769 	const bool				isMatrix			= glu::isDataTypeMatrix(resEntry.type);
1770 	const size_t			compSize			= getDataTypeByteSize(scalarType);
1771 	const int				maxPrints			= 3;
1772 	int						numFailed			= 0;
1773 
1774 	const int				resArraySize		= resEntry.arraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.arraySize;
1775 	const int				resArrayStride		= resEntry.arrayStride;
1776 	const int				resTopLevelSize		= resEntry.topLevelArraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.topLevelArraySize;
1777 	const int				resTopLevelStride	= resEntry.topLevelArrayStride;
1778 	const int				refArraySize		= refEntry.arraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.arraySize;
1779 	const int				refArrayStride		= refEntry.arrayStride;
1780 	const int				refTopLevelSize		= refEntry.topLevelArraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.topLevelArraySize;
1781 	const int				refTopLevelStride	= refEntry.topLevelArrayStride;
1782 
1783 	DE_ASSERT(resArraySize <= refArraySize && resTopLevelSize <= refTopLevelSize);
1784 	DE_UNREF(refArraySize && refTopLevelSize);
1785 
1786 	for (int topElemNdx = 0; topElemNdx < resTopLevelSize; topElemNdx++)
1787 	{
1788 		deUint8* const			resTopPtr	= resBasePtr + topElemNdx*resTopLevelStride;
1789 		const deUint8* const	refTopPtr	= refBasePtr + topElemNdx*refTopLevelStride;
1790 
1791 		for (int elementNdx = 0; elementNdx < resArraySize; elementNdx++)
1792 		{
1793 			deUint8* const			resElemPtr	= resTopPtr + elementNdx*resArrayStride;
1794 			const deUint8* const	refElemPtr	= refTopPtr + elementNdx*refArrayStride;
1795 
1796 			if (isMatrix)
1797 			{
1798 				const int	numRows	= glu::getDataTypeMatrixNumRows(resEntry.type);
1799 				const int	numCols	= glu::getDataTypeMatrixNumColumns(resEntry.type);
1800 				bool		isOk	= true;
1801 
1802 				for (int colNdx = 0; colNdx < numCols; colNdx++)
1803 				{
1804 					for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1805 					{
1806 						deUint8*		resCompPtr	= resElemPtr + (resEntry.isRowMajor ? rowNdx*resEntry.matrixStride + colNdx*compSize
1807 																						: colNdx*resEntry.matrixStride + rowNdx*compSize);
1808 						const deUint8*	refCompPtr	= refElemPtr + (refEntry.isRowMajor ? rowNdx*refEntry.matrixStride + colNdx*compSize
1809 																						: colNdx*refEntry.matrixStride + rowNdx*compSize);
1810 
1811 						DE_ASSERT((deIntptr)(refCompPtr + compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1812 						DE_ASSERT((deIntptr)(resCompPtr + compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1813 
1814 						isOk = isOk && compareComponents(scalarType, resCompPtr, refCompPtr, 1);
1815 					}
1816 				}
1817 
1818 				if (!isOk)
1819 				{
1820 					numFailed += 1;
1821 					if (numFailed < maxPrints)
1822 					{
1823 						std::ostringstream expected, got;
1824 						generateImmMatrixSrc(expected, refEntry.type, refEntry.matrixStride, refEntry.isRowMajor, refElemPtr);
1825 						generateImmMatrixSrc(got, resEntry.type, resEntry.matrixStride, resEntry.isRowMajor, resElemPtr);
1826 						log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1827 												<< "  expected " << expected.str() << "\n"
1828 												<< "  got " << got.str()
1829 							<< TestLog::EndMessage;
1830 					}
1831 				}
1832 			}
1833 			else
1834 			{
1835 				DE_ASSERT((deIntptr)(refElemPtr + scalarSize*compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1836 				DE_ASSERT((deIntptr)(resElemPtr + scalarSize*compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1837 
1838 				const bool isOk = compareComponents(scalarType, resElemPtr, refElemPtr, scalarSize);
1839 
1840 				if (!isOk)
1841 				{
1842 					numFailed += 1;
1843 					if (numFailed < maxPrints)
1844 					{
1845 						std::ostringstream expected, got;
1846 						generateImmScalarVectorSrc(expected, refEntry.type, refElemPtr);
1847 						generateImmScalarVectorSrc(got, resEntry.type, resElemPtr);
1848 						log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1849 												<< "  expected " << expected.str() << "\n"
1850 												<< "  got " << got.str()
1851 							<< TestLog::EndMessage;
1852 					}
1853 				}
1854 			}
1855 		}
1856 	}
1857 
1858 	if (numFailed >= maxPrints)
1859 		log << TestLog::Message << "... (" << numFailed << " failures for " << refEntry.name << " in total)" << TestLog::EndMessage;
1860 
1861 	return numFailed == 0;
1862 }
1863 
compareData(tcu::TestLog & log,const BufferLayout & refLayout,const vector<BlockDataPtr> & refBlockPointers,const BufferLayout & resLayout,const vector<BlockDataPtr> & resBlockPointers)1864 bool compareData (tcu::TestLog& log, const BufferLayout& refLayout, const vector<BlockDataPtr>& refBlockPointers, const BufferLayout& resLayout, const vector<BlockDataPtr>& resBlockPointers)
1865 {
1866 	const int	numBlocks	= (int)refLayout.blocks.size();
1867 	bool		allOk		= true;
1868 
1869 	for (int refBlockNdx = 0; refBlockNdx < numBlocks; refBlockNdx++)
1870 	{
1871 		const BlockLayoutEntry&		refBlock	= refLayout.blocks[refBlockNdx];
1872 		const BlockDataPtr&			refBlockPtr	= refBlockPointers[refBlockNdx];
1873 		int							resBlockNdx	= resLayout.getBlockIndex(refBlock.name.c_str());
1874 
1875 		if (resBlockNdx >= 0)
1876 		{
1877 			DE_ASSERT(de::inBounds(resBlockNdx, 0, (int)resBlockPointers.size()));
1878 
1879 			const BlockDataPtr& resBlockPtr = resBlockPointers[resBlockNdx];
1880 
1881 			for (vector<int>::const_iterator refVarNdxIter = refBlock.activeVarIndices.begin(); refVarNdxIter != refBlock.activeVarIndices.end(); refVarNdxIter++)
1882 			{
1883 				const BufferVarLayoutEntry&	refEntry	= refLayout.bufferVars[*refVarNdxIter];
1884 				int							resVarNdx	= resLayout.getVariableIndex(refEntry.name.c_str());
1885 
1886 				if (resVarNdx >= 0)
1887 				{
1888 					const BufferVarLayoutEntry& resEntry = resLayout.bufferVars[resVarNdx];
1889 					allOk = compareBufferVarData(log, refEntry, refBlockPtr, resEntry, resBlockPtr) && allOk;
1890 				}
1891 			}
1892 		}
1893 	}
1894 
1895 	return allOk;
1896 }
1897 
getBlockAPIName(const BufferBlock & block,int instanceNdx)1898 string getBlockAPIName (const BufferBlock& block, int instanceNdx)
1899 {
1900 	DE_ASSERT(block.isArray() || instanceNdx == 0);
1901 	return block.getBlockName() + (block.isArray() ? ("[" + de::toString(instanceNdx) + "]") : string());
1902 }
1903 
1904 // \note Some implementations don't report block members in the order they are declared.
1905 //		 For checking whether size has to be adjusted by some top-level array actual size,
1906 //		 we only need to know a) whether there is a unsized top-level array, and b)
1907 //		 what is stride of that array.
1908 
hasUnsizedArray(const BufferLayout & layout,const BlockLayoutEntry & entry)1909 static bool hasUnsizedArray (const BufferLayout& layout, const BlockLayoutEntry& entry)
1910 {
1911 	for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1912 	{
1913 		if (isUnsizedArray(layout.bufferVars[*varNdx]))
1914 			return true;
1915 	}
1916 
1917 	return false;
1918 }
1919 
getUnsizedArrayStride(const BufferLayout & layout,const BlockLayoutEntry & entry)1920 static int getUnsizedArrayStride (const BufferLayout& layout, const BlockLayoutEntry& entry)
1921 {
1922 	for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1923 	{
1924 		const BufferVarLayoutEntry& varEntry = layout.bufferVars[*varNdx];
1925 
1926 		if (varEntry.arraySize == 0)
1927 			return varEntry.arrayStride;
1928 		else if (varEntry.topLevelArraySize == 0)
1929 			return varEntry.topLevelArrayStride;
1930 	}
1931 
1932 	return 0;
1933 }
1934 
computeBufferSizes(const ShaderInterface & interface,const BufferLayout & layout)1935 vector<int> computeBufferSizes (const ShaderInterface& interface, const BufferLayout& layout)
1936 {
1937 	vector<int> sizes(layout.blocks.size());
1938 
1939 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1940 	{
1941 		const BufferBlock&	block			= interface.getBlock(declNdx);
1942 		const bool			isArray			= block.isArray();
1943 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1944 
1945 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1946 		{
1947 			const string	apiName		= getBlockAPIName(block, instanceNdx);
1948 			const int		blockNdx	= layout.getBlockIndex(apiName);
1949 
1950 			if (blockNdx >= 0)
1951 			{
1952 				const BlockLayoutEntry&		blockLayout		= layout.blocks[blockNdx];
1953 				const int					baseSize		= blockLayout.size;
1954 				const bool					isLastUnsized	= hasUnsizedArray(layout, blockLayout);
1955 				const int					lastArraySize	= isLastUnsized ? block.getLastUnsizedArraySize(instanceNdx) : 0;
1956 				const int					stride			= isLastUnsized ? getUnsizedArrayStride(layout, blockLayout) : 0;
1957 
1958 				sizes[blockNdx] = baseSize + lastArraySize*stride;
1959 			}
1960 		}
1961 	}
1962 
1963 	return sizes;
1964 }
1965 
getBlockDataPtr(const BufferLayout & layout,const BlockLayoutEntry & blockLayout,void * ptr,int bufferSize)1966 BlockDataPtr getBlockDataPtr (const BufferLayout& layout, const BlockLayoutEntry& blockLayout, void* ptr, int bufferSize)
1967 {
1968 	const bool	isLastUnsized	= hasUnsizedArray(layout, blockLayout);
1969 	const int	baseSize		= blockLayout.size;
1970 
1971 	if (isLastUnsized)
1972 	{
1973 		const int		lastArrayStride	= getUnsizedArrayStride(layout, blockLayout);
1974 		const int		lastArraySize	= (bufferSize-baseSize) / (lastArrayStride ? lastArrayStride : 1);
1975 
1976 		DE_ASSERT(baseSize + lastArraySize*lastArrayStride == bufferSize);
1977 
1978 		return BlockDataPtr(ptr, bufferSize, lastArraySize);
1979 	}
1980 	else
1981 		return BlockDataPtr(ptr, bufferSize, 0);
1982 }
1983 
1984 struct Buffer
1985 {
1986 	deUint32				buffer;
1987 	int						size;
1988 
Buffervkt::ssbo::__anon62b757cb0211::Buffer1989 	Buffer (deUint32 buffer_, int size_) : buffer(buffer_), size(size_) {}
Buffervkt::ssbo::__anon62b757cb0211::Buffer1990 	Buffer (void) : buffer(0), size(0) {}
1991 };
1992 
1993 struct BlockLocation
1994 {
1995 	int						index;
1996 	int						offset;
1997 	int						size;
1998 
BlockLocationvkt::ssbo::__anon62b757cb0211::BlockLocation1999 	BlockLocation (int index_, int offset_, int size_) : index(index_), offset(offset_), size(size_) {}
BlockLocationvkt::ssbo::__anon62b757cb0211::BlockLocation2000 	BlockLocation (void) : index(0), offset(0), size(0) {}
2001 };
2002 
initRefDataStorage(const ShaderInterface & interface,const BufferLayout & layout,RefDataStorage & storage)2003 void initRefDataStorage (const ShaderInterface& interface, const BufferLayout& layout, RefDataStorage& storage)
2004 {
2005 	DE_ASSERT(storage.data.empty() && storage.pointers.empty());
2006 
2007 	const vector<int>	bufferSizes		= computeBufferSizes(interface, layout);
2008 	int					totalSize		= 0;
2009 	const int			vec4Alignment	= (int)sizeof(deUint32)*4;
2010 
2011 	for (vector<int>::const_iterator sizeIter = bufferSizes.begin(); sizeIter != bufferSizes.end(); ++sizeIter)
2012 	{
2013 		// Include enough space for alignment of individual blocks
2014 		totalSize += deRoundUp32(*sizeIter, vec4Alignment);
2015 	}
2016 
2017 	storage.data.resize(totalSize);
2018 
2019 	// Pointers for each block.
2020 	{
2021 		deUint8*	basePtr		= storage.data.empty() ? DE_NULL : &storage.data[0];
2022 		int			curOffset	= 0;
2023 
2024 		DE_ASSERT(bufferSizes.size() == layout.blocks.size());
2025 		DE_ASSERT(totalSize == 0 || basePtr);
2026 
2027 		storage.pointers.resize(layout.blocks.size());
2028 
2029 		for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
2030 		{
2031 			const BlockLayoutEntry&	blockLayout		= layout.blocks[blockNdx];
2032 			const int				bufferSize		= bufferSizes[blockNdx];
2033 
2034 			storage.pointers[blockNdx] = getBlockDataPtr(layout, blockLayout, basePtr + curOffset, bufferSize);
2035 
2036 			// Ensure each new block starts fully aligned to avoid unaligned host accesses
2037 			curOffset += deRoundUp32(bufferSize, vec4Alignment);
2038 		}
2039 	}
2040 }
2041 
2042 
blockLocationsToPtrs(const BufferLayout & layout,const vector<BlockLocation> & blockLocations,const vector<void * > & bufPtrs)2043 vector<BlockDataPtr> blockLocationsToPtrs (const BufferLayout& layout, const vector<BlockLocation>& blockLocations, const vector<void*>& bufPtrs)
2044 {
2045 	vector<BlockDataPtr> blockPtrs(blockLocations.size());
2046 
2047 	DE_ASSERT(layout.blocks.size() == blockLocations.size());
2048 
2049 	for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
2050 	{
2051 		const BlockLayoutEntry&	blockLayout		= layout.blocks[blockNdx];
2052 		const BlockLocation&	location		= blockLocations[blockNdx];
2053 
2054 		blockPtrs[blockNdx] = getBlockDataPtr(layout, blockLayout, (deUint8*)bufPtrs[location.index] + location.offset, location.size);
2055 	}
2056 
2057 	return blockPtrs;
2058 }
2059 
2060 } // anonymous (utilities)
2061 
allocateAndBindMemory(Context & context,vk::VkBuffer buffer,vk::MemoryRequirement memReqs)2062 de::MovePtr<vk::Allocation> allocateAndBindMemory (Context& context, vk::VkBuffer buffer, vk::MemoryRequirement memReqs)
2063 {
2064 	const vk::DeviceInterface&		vkd		= context.getDeviceInterface();
2065 	const vk::VkMemoryRequirements	bufReqs	= vk::getBufferMemoryRequirements(vkd, context.getDevice(), buffer);
2066 	de::MovePtr<vk::Allocation>		memory	= context.getDefaultAllocator().allocate(bufReqs, memReqs);
2067 
2068 	vkd.bindBufferMemory(context.getDevice(), buffer, memory->getMemory(), memory->getOffset());
2069 
2070 	return memory;
2071 }
2072 
createBuffer(Context & context,vk::VkDeviceSize bufferSize,vk::VkBufferUsageFlags usageFlags)2073 vk::Move<vk::VkBuffer> createBuffer (Context& context, vk::VkDeviceSize bufferSize, vk::VkBufferUsageFlags usageFlags)
2074 {
2075 	const vk::VkDevice			vkDevice			= context.getDevice();
2076 	const vk::DeviceInterface&	vk					= context.getDeviceInterface();
2077 	const deUint32			queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
2078 
2079 	const vk::VkBufferCreateInfo	bufferInfo		=
2080 	{
2081 		vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
2082 		DE_NULL,									// const void*			pNext;
2083 		0u,											// VkBufferCreateFlags	flags;
2084 		bufferSize,									// VkDeviceSize			size;
2085 		usageFlags,									// VkBufferUsageFlags	usage;
2086 		vk::VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
2087 		1u,											// deUint32				queueFamilyCount;
2088 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
2089 	};
2090 
2091 	return vk::createBuffer(vk, vkDevice, &bufferInfo);
2092 }
2093 
2094 // SSBOLayoutCaseInstance
2095 
2096 class SSBOLayoutCaseInstance : public TestInstance
2097 {
2098 public:
2099 								SSBOLayoutCaseInstance	(Context&					context,
2100 														SSBOLayoutCase::BufferMode	bufferMode,
2101 														const ShaderInterface&		interface,
2102 														const BufferLayout&			refLayout,
2103 														const RefDataStorage&		initialData,
2104 														const RefDataStorage&		writeData);
2105 	virtual						~SSBOLayoutCaseInstance	(void);
2106 	virtual tcu::TestStatus		iterate						(void);
2107 
2108 private:
2109 	SSBOLayoutCase::BufferMode	m_bufferMode;
2110 	const ShaderInterface&		m_interface;
2111 	const BufferLayout&			m_refLayout;
2112 	const RefDataStorage&		m_initialData;	// Initial data stored in buffer.
2113 	const RefDataStorage&		m_writeData;	// Data written by compute shader.
2114 
2115 
2116 	typedef de::SharedPtr<vk::Unique<vk::VkBuffer> >	VkBufferSp;
2117 	typedef de::SharedPtr<vk::Allocation>				AllocationSp;
2118 
2119 	std::vector<VkBufferSp>		m_uniformBuffers;
2120 	std::vector<AllocationSp>	m_uniformAllocs;
2121 };
2122 
SSBOLayoutCaseInstance(Context & context,SSBOLayoutCase::BufferMode bufferMode,const ShaderInterface & interface,const BufferLayout & refLayout,const RefDataStorage & initialData,const RefDataStorage & writeData)2123 SSBOLayoutCaseInstance::SSBOLayoutCaseInstance (Context&					context,
2124 												SSBOLayoutCase::BufferMode	bufferMode,
2125 												const ShaderInterface&		interface,
2126 												const BufferLayout&			refLayout,
2127 												const RefDataStorage&		initialData,
2128 												const RefDataStorage&		writeData)
2129 	: TestInstance	(context)
2130 	, m_bufferMode	(bufferMode)
2131 	, m_interface	(interface)
2132 	, m_refLayout	(refLayout)
2133 	, m_initialData	(initialData)
2134 	, m_writeData	(writeData)
2135 {
2136 }
2137 
~SSBOLayoutCaseInstance(void)2138 SSBOLayoutCaseInstance::~SSBOLayoutCaseInstance (void)
2139 {
2140 }
2141 
iterate(void)2142 tcu::TestStatus SSBOLayoutCaseInstance::iterate (void)
2143 {
2144 	// todo: add compute stage availability check
2145 	const vk::DeviceInterface&	vk					= m_context.getDeviceInterface();
2146 	const vk::VkDevice			device				= m_context.getDevice();
2147 	const vk::VkQueue			queue				= m_context.getUniversalQueue();
2148 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2149 
2150 	// Create descriptor set
2151 	const deUint32 acBufferSize = 1024;
2152 	vk::Move<vk::VkBuffer> acBuffer (createBuffer(m_context, acBufferSize, vk:: VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
2153 	de::UniquePtr<vk::Allocation> acBufferAlloc (allocateAndBindMemory(m_context, *acBuffer, vk::MemoryRequirement::HostVisible));
2154 
2155 	deMemset(acBufferAlloc->getHostPtr(), 0, acBufferSize);
2156 	flushMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
2157 
2158 	vk::DescriptorSetLayoutBuilder setLayoutBuilder;
2159 	vk::DescriptorPoolBuilder poolBuilder;
2160 
2161 	setLayoutBuilder
2162 		.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2163 
2164 	int numBlocks = 0;
2165 	const int numBindings = m_interface.getNumBlocks();
2166 	for (int bindingNdx = 0; bindingNdx < numBindings; bindingNdx++)
2167 	{
2168 		const BufferBlock& block = m_interface.getBlock(bindingNdx);
2169 		if (block.isArray())
2170 		{
2171 			setLayoutBuilder
2172 				.addArrayBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, block.getArraySize(), vk::VK_SHADER_STAGE_COMPUTE_BIT);
2173 			numBlocks += block.getArraySize();
2174 		}
2175 		else
2176 		{
2177 			setLayoutBuilder
2178 				.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2179 			numBlocks += 1;
2180 		}
2181 	}
2182 
2183 	poolBuilder
2184 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)(1 + numBlocks));
2185 
2186 	const vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(setLayoutBuilder.build(vk, device));
2187 	const vk::Unique<vk::VkDescriptorPool> descriptorPool(poolBuilder.build(vk, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2188 
2189 	const vk::VkDescriptorSetAllocateInfo allocInfo =
2190 	{
2191 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2192 		DE_NULL,
2193 		*descriptorPool,
2194 		1u,
2195 		&descriptorSetLayout.get(),
2196 	};
2197 
2198 	const vk::Unique<vk::VkDescriptorSet> descriptorSet(allocateDescriptorSet(vk, device, &allocInfo));
2199 	const vk::VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*acBuffer, 0ull, acBufferSize);
2200 
2201 	vk::DescriptorSetUpdateBuilder setUpdateBuilder;
2202 	std::vector<vk::VkDescriptorBufferInfo>	descriptors(numBlocks);
2203 
2204 	setUpdateBuilder
2205 		.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo);
2206 
2207 	vector<BlockDataPtr>  mappedBlockPtrs;
2208 
2209 	// Upload base buffers
2210 	const std::vector<int> bufferSizes	= computeBufferSizes(m_interface, m_refLayout);
2211 	{
2212 		std::vector<void*>				mapPtrs;
2213 		std::vector<BlockLocation>		blockLocations	(numBlocks);
2214 
2215 		DE_ASSERT(bufferSizes.size() == m_refLayout.blocks.size());
2216 
2217 		if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2218 		{
2219 			mapPtrs.resize(numBlocks);
2220 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2221 			{
2222 				const deUint32 bufferSize = bufferSizes[blockNdx];
2223 				DE_ASSERT(bufferSize > 0);
2224 
2225 				blockLocations[blockNdx] = BlockLocation(blockNdx, 0, bufferSize);
2226 
2227 				vk::Move<vk::VkBuffer>				buffer		= createBuffer(m_context, bufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2228 				de::MovePtr<vk::Allocation>			alloc		= allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
2229 
2230 				descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, 0ull, bufferSize);
2231 
2232 				mapPtrs[blockNdx] = alloc->getHostPtr();
2233 
2234 				m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2235 				m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2236 			}
2237 		}
2238 		else
2239 		{
2240 			DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2241 
2242 			vk::VkPhysicalDeviceProperties properties;
2243 			m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
2244 			const int	bindingAlignment	= (int)properties.limits.minStorageBufferOffsetAlignment;
2245 			int			curOffset			= 0;
2246 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2247 			{
2248 				const int bufferSize = bufferSizes[blockNdx];
2249 				DE_ASSERT(bufferSize > 0);
2250 
2251 				if (bindingAlignment > 0)
2252 					curOffset = deRoundUp32(curOffset, bindingAlignment);
2253 
2254 				blockLocations[blockNdx] = BlockLocation(0, curOffset, bufferSize);
2255 				curOffset += bufferSize;
2256 			}
2257 
2258 			const int						totalBufferSize = curOffset;
2259 			vk::Move<vk::VkBuffer>			buffer			= createBuffer(m_context, totalBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2260 			de::MovePtr<vk::Allocation>		alloc			= allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
2261 
2262 			mapPtrs.push_back(alloc->getHostPtr());
2263 
2264 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2265 			{
2266 				const deUint32						bufferSize	= bufferSizes[blockNdx];
2267 				const deUint32						offset		= blockLocations[blockNdx].offset;
2268 
2269 				descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, offset, bufferSize);
2270 			}
2271 
2272 			m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2273 			m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2274 		}
2275 
2276 		// Update remaining bindings
2277 		{
2278 			int blockNdx = 0;
2279 			for (int bindingNdx = 0; bindingNdx < numBindings; ++bindingNdx)
2280 			{
2281 				const BufferBlock&	block				= m_interface.getBlock(bindingNdx);
2282 				const int			numBlocksInBinding	= (block.isArray() ? block.getArraySize() : 1);
2283 
2284 				setUpdateBuilder.writeArray(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(bindingNdx + 1),
2285 					vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBlocksInBinding, &descriptors[blockNdx]);
2286 
2287 				blockNdx += numBlocksInBinding;
2288 			}
2289 		}
2290 
2291 		// Copy the initial data to the storage buffers
2292 		{
2293 			mappedBlockPtrs = blockLocationsToPtrs(m_refLayout, blockLocations, mapPtrs);
2294 			copyData(m_refLayout, mappedBlockPtrs, m_refLayout, m_initialData.pointers);
2295 
2296 			for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2297 			{
2298 				vk::Allocation* alloc = m_uniformAllocs[allocNdx].get();
2299 				flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), VK_WHOLE_SIZE);
2300 			}
2301 		}
2302 	}
2303 
2304 	setUpdateBuilder.update(vk, device);
2305 
2306 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams =
2307 	{
2308 		vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// VkStructureType				sType;
2309 		DE_NULL,											// const void*					pNext;
2310 		(vk::VkPipelineLayoutCreateFlags)0,
2311 		1u,													// deUint32						descriptorSetCount;
2312 		&*descriptorSetLayout,								// const VkDescriptorSetLayout*	pSetLayouts;
2313 		0u,													// deUint32						pushConstantRangeCount;
2314 		DE_NULL,											// const VkPushConstantRange*	pPushConstantRanges;
2315 	};
2316 	vk::Move<vk::VkPipelineLayout> pipelineLayout(createPipelineLayout(vk, device, &pipelineLayoutParams));
2317 
2318 	vk::Move<vk::VkShaderModule> shaderModule (createShaderModule(vk, device, m_context.getBinaryCollection().get("compute"), 0));
2319 	const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
2320 	{
2321 		vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,// VkStructureType				sType;
2322 		DE_NULL,												// const void*					pNext;
2323 		(vk::VkPipelineShaderStageCreateFlags)0,
2324 		vk::VK_SHADER_STAGE_COMPUTE_BIT,						// VkShaderStage				stage;
2325 		*shaderModule,											// VkShader						shader;
2326 		"main",													//
2327 		DE_NULL,												// const VkSpecializationInfo*	pSpecializationInfo;
2328 	};
2329 	const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
2330 	{
2331 		vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType					sType;
2332 		DE_NULL,											// const void*						pNext;
2333 		0,													// VkPipelineCreateFlags			flags;
2334 		pipelineShaderStageParams,							// VkPipelineShaderStageCreateInfo	stage;
2335 		*pipelineLayout,									// VkPipelineLayout					layout;
2336 		DE_NULL,											// VkPipeline						basePipelineHandle;
2337 		0,													// deInt32							basePipelineIndex;
2338 	};
2339 	vk::Move<vk::VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
2340 
2341 	vk::Move<vk::VkCommandPool> cmdPool (createCommandPool(vk, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
2342 	vk::Move<vk::VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2343 
2344 	beginCommandBuffer(vk, *cmdBuffer, 0u);
2345 
2346 	vk.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2347 	vk.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2348 
2349 	vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
2350 
2351 	// Add barriers for shader writes to storage buffers before host access
2352 	std::vector<vk::VkBufferMemoryBarrier> barriers;
2353 	if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2354 	{
2355 		for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2356 		{
2357 			const vk::VkBuffer uniformBuffer = m_uniformBuffers[blockNdx].get()->get();
2358 
2359 			const vk::VkBufferMemoryBarrier barrier	=
2360 			{
2361 				vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2362 				DE_NULL,
2363 				vk::VK_ACCESS_SHADER_WRITE_BIT,
2364 				vk::VK_ACCESS_HOST_READ_BIT,
2365 				VK_QUEUE_FAMILY_IGNORED,
2366 				VK_QUEUE_FAMILY_IGNORED,
2367 				uniformBuffer,
2368 				0u,
2369 				static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])
2370 			};
2371 			barriers.push_back(barrier);
2372 		}
2373 	}
2374 	else
2375 	{
2376 		const vk::VkBuffer uniformBuffer = m_uniformBuffers[0].get()->get();
2377 
2378 		vk::VkDeviceSize totalSize	= 0;
2379 		for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2380 			totalSize += bufferSizes[bufferNdx];
2381 
2382 		const vk::VkBufferMemoryBarrier barrier	=
2383 		{
2384 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2385 			DE_NULL,
2386 			vk::VK_ACCESS_SHADER_WRITE_BIT,
2387 			vk::VK_ACCESS_HOST_READ_BIT,
2388 			VK_QUEUE_FAMILY_IGNORED,
2389 			VK_QUEUE_FAMILY_IGNORED,
2390 			uniformBuffer,
2391 			0u,
2392 			totalSize
2393 		};
2394 		barriers.push_back(barrier);
2395 	}
2396 	vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
2397 						  0u, DE_NULL, static_cast<deUint32>(barriers.size()), &barriers[0], 0u, DE_NULL);
2398 
2399 	endCommandBuffer(vk, *cmdBuffer);
2400 
2401 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
2402 
2403 	// Read back ac_numPassed data
2404 	bool counterOk;
2405 	{
2406 		const int refCount = 1;
2407 		int resCount = 0;
2408 
2409 		invalidateMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
2410 
2411 		resCount = *((const int*)acBufferAlloc->getHostPtr());
2412 
2413 		counterOk = (refCount == resCount);
2414 		if (!counterOk)
2415 		{
2416 			m_context.getTestContext().getLog() << TestLog::Message << "Error: ac_numPassed = " << resCount << ", expected " << refCount << TestLog::EndMessage;
2417 		}
2418 	}
2419 
2420 	for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2421 	{
2422 		vk::Allocation *alloc = m_uniformAllocs[allocNdx].get();
2423 		invalidateMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), VK_WHOLE_SIZE);
2424 	}
2425 
2426 	// Validate result
2427 	const bool compareOk = compareData(m_context.getTestContext().getLog(), m_refLayout, m_writeData.pointers, m_refLayout, mappedBlockPtrs);
2428 
2429 	if (compareOk && counterOk)
2430 		return tcu::TestStatus::pass("Result comparison and counter values are OK");
2431 	else if (!compareOk && counterOk)
2432 		return tcu::TestStatus::fail("Result comparison failed");
2433 	else if (compareOk && !counterOk)
2434 		return tcu::TestStatus::fail("Counter value incorrect");
2435 	else
2436 		return tcu::TestStatus::fail("Result comparison and counter values are incorrect");
2437 }
2438 
2439 // SSBOLayoutCase.
2440 
SSBOLayoutCase(tcu::TestContext & testCtx,const char * name,const char * description,BufferMode bufferMode,MatrixLoadFlags matrixLoadFlag)2441 SSBOLayoutCase::SSBOLayoutCase (tcu::TestContext& testCtx, const char* name, const char* description, BufferMode bufferMode, MatrixLoadFlags matrixLoadFlag)
2442 	: TestCase			(testCtx, name, description)
2443 	, m_bufferMode		(bufferMode)
2444 	, m_matrixLoadFlag	(matrixLoadFlag)
2445 {
2446 }
2447 
~SSBOLayoutCase(void)2448 SSBOLayoutCase::~SSBOLayoutCase (void)
2449 {
2450 }
2451 
initPrograms(vk::SourceCollections & programCollection) const2452 void SSBOLayoutCase::initPrograms (vk::SourceCollections& programCollection) const
2453 {
2454 	DE_ASSERT(!m_computeShaderSrc.empty());
2455 
2456 	// Valid scalar layouts are a superset of valid relaxed layouts.  So check scalar layout first.
2457 	if (usesScalarLayout(m_interface))
2458 	{
2459 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc)
2460 			<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0, vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS);
2461 	}
2462 	else if (usesRelaxedLayout(m_interface))
2463 	{
2464 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc)
2465 			<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0, vk::ShaderBuildOptions::FLAG_ALLOW_RELAXED_OFFSETS);
2466 	}
2467 	else
2468 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc);
2469 }
2470 
createInstance(Context & context) const2471 TestInstance* SSBOLayoutCase::createInstance (Context& context) const
2472 {
2473 	if (!vk::isDeviceExtensionSupported(context.getUsedApiVersion(), context.getDeviceExtensions(), "VK_KHR_relaxed_block_layout") && usesRelaxedLayout(m_interface))
2474 		TCU_THROW(NotSupportedError, "VK_KHR_relaxed_block_layout not supported");
2475 	if (!context.get16BitStorageFeatures().storageBuffer16BitAccess && uses16BitStorage(m_interface))
2476 		TCU_THROW(NotSupportedError, "storageBuffer16BitAccess not supported");
2477 	if (!context.get8BitStorageFeatures().storageBuffer8BitAccess && uses8BitStorage(m_interface))
2478 		TCU_THROW(NotSupportedError, "storageBuffer8BitAccess not supported");
2479 	if (!context.getScalarBlockLayoutFeatures().scalarBlockLayout && usesScalarLayout(m_interface))
2480 		TCU_THROW(NotSupportedError, "scalarBlockLayout not supported");
2481 
2482 	return new SSBOLayoutCaseInstance(context, m_bufferMode, m_interface, m_refLayout, m_initialData, m_writeData);
2483 }
2484 
init()2485 void SSBOLayoutCase::init ()
2486 {
2487 	computeReferenceLayout	(m_refLayout, m_interface);
2488 	initRefDataStorage		(m_interface, m_refLayout, m_initialData);
2489 	initRefDataStorage		(m_interface, m_refLayout, m_writeData);
2490 	generateValues			(m_refLayout, m_initialData.pointers, deStringHash(getName()) ^ 0xad2f7214);
2491 	generateValues			(m_refLayout, m_writeData.pointers, deStringHash(getName()) ^ 0x25ca4e7);
2492 	copyNonWrittenData		(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers);
2493 
2494 	m_computeShaderSrc = generateComputeShader(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers, m_matrixLoadFlag);
2495 }
2496 
2497 } // ssbo
2498 } // vkt
2499