1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 Google Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief SPIR-V Assembly Tests for the VK_KHR_16bit_storage
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSpvAsm16bitStorageTests.hpp"
25 
26 #include "tcuFloat.hpp"
27 #include "tcuRGBA.hpp"
28 #include "tcuStringTemplate.hpp"
29 #include "tcuTestLog.hpp"
30 #include "tcuVectorUtil.hpp"
31 
32 #include "vkDefs.hpp"
33 #include "vkDeviceUtil.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkStrUtil.hpp"
41 #include "vkTypeUtil.hpp"
42 
43 #include "deRandom.hpp"
44 #include "deStringUtil.hpp"
45 #include "deUniquePtr.hpp"
46 #include "deMath.h"
47 
48 #include "vktSpvAsmComputeShaderCase.hpp"
49 #include "vktSpvAsmComputeShaderTestUtil.hpp"
50 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
51 #include "vktSpvAsmUtils.hpp"
52 #include "vktTestCaseUtil.hpp"
53 #include "vktTestGroupUtil.hpp"
54 
55 #include <limits>
56 #include <map>
57 #include <string>
58 #include <sstream>
59 #include <utility>
60 
61 namespace vkt
62 {
63 namespace SpirVAssembly
64 {
65 
66 using namespace vk;
67 using std::map;
68 using std::string;
69 using std::vector;
70 using tcu::Float16;
71 using tcu::IVec3;
72 using tcu::IVec4;
73 using tcu::RGBA;
74 using tcu::TestLog;
75 using tcu::TestStatus;
76 using tcu::Vec4;
77 using de::UniquePtr;
78 using tcu::StringTemplate;
79 using tcu::Vec4;
80 
81 namespace
82 {
83 
84 enum ShaderTemplate
85 {
86 	SHADERTEMPLATE_TYPES = 0,
87 	SHADERTEMPLATE_STRIDE32BIT_STD140,
88 	SHADERTEMPLATE_STRIDE32BIT_STD430,
89 	SHADERTEMPLATE_STRIDE16BIT_STD140,
90 	SHADERTEMPLATE_STRIDE16BIT_STD430,
91 	SHADERTEMPLATE_STRIDEMIX_STD140,
92 	SHADERTEMPLATE_STRIDEMIX_STD430
93 };
94 
compare16Bit(float original,deUint16 returned,RoundingModeFlags flags,tcu::TestLog & log)95 bool compare16Bit (float original, deUint16 returned, RoundingModeFlags flags, tcu::TestLog& log)
96 {
97 	return compare16BitFloat (original, returned, flags, log);
98 }
99 
compare16Bit(deUint16 original,float returned,RoundingModeFlags flags,tcu::TestLog & log)100 bool compare16Bit (deUint16 original, float returned, RoundingModeFlags flags, tcu::TestLog& log)
101 {
102 	DE_UNREF(flags);
103 	return compare16BitFloat (original, returned, log);
104 }
105 
compare16Bit(deInt16 original,deInt16 returned,RoundingModeFlags flags,tcu::TestLog & log)106 bool compare16Bit (deInt16 original, deInt16 returned, RoundingModeFlags flags, tcu::TestLog& log)
107 {
108 	DE_UNREF(flags);
109 	DE_UNREF(log);
110 	return (returned == original);
111 }
112 
113 struct StructTestData
114 {
115 	const int structArraySize; //Size of Struct Array
116 	const int nestedArraySize; //Max size of any nested arrays
117 };
118 
119 struct Capability
120 {
121 	const char*				name;
122 	const char*				cap;
123 	const char*				decor;
124 	vk::VkDescriptorType	dtype;
125 };
126 
127 static const Capability	CAPABILITIES[]	=
128 {
129 	{"uniform_buffer_block",	"StorageUniformBufferBlock16",	"BufferBlock",	VK_DESCRIPTOR_TYPE_STORAGE_BUFFER},
130 	{"uniform",					"StorageUniform16",				"Block",		VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER},
131 };
132 
133 static const StructTestData structData = {7, 11};
134 
135 enum TestDefDataType
136 {
137 	DATATYPE_FLOAT,
138 	DATATYPE_VEC2,
139 	DATATYPE_INT,
140 	DATATYPE_UINT,
141 	DATATYPE_IVEC2,
142 	DATATYPE_UVEC2
143 };
144 
145 struct TestDefinition
146 {
147 	InstanceContext	instanceContext;
148 	TestDefDataType	dataType;
149 };
150 
get16BitStorageFeatures(const char * cap)151 VulkanFeatures	get16BitStorageFeatures	(const char* cap)
152 {
153 	VulkanFeatures features;
154 	if (string(cap) == "uniform_buffer_block")
155 		features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
156 	else if (string(cap) == "uniform")
157 		features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM;
158 	else
159 		DE_ASSERT(false && "not supported");
160 
161 	return features;
162 }
163 
getStructSize(const ShaderTemplate shaderTemplate)164 int getStructSize(const ShaderTemplate  shaderTemplate)
165 {
166 	switch (shaderTemplate)
167 	{
168 	case SHADERTEMPLATE_STRIDE16BIT_STD140:
169 		return 600 * structData.structArraySize;		//size of struct in f16 with offsets
170 	case SHADERTEMPLATE_STRIDE16BIT_STD430:
171 		return 184 * structData.structArraySize;		//size of struct in f16 with offsets
172 	case SHADERTEMPLATE_STRIDE32BIT_STD140:
173 		return 304 * structData.structArraySize;		//size of struct in f32 with offsets
174 	case SHADERTEMPLATE_STRIDE32BIT_STD430:
175 		return 184 * structData.structArraySize;		//size of struct in f32 with offset
176 	case SHADERTEMPLATE_STRIDEMIX_STD140:
177 		return 4480 * structData.structArraySize / 2;	//size of struct in 16b with offset
178 	case SHADERTEMPLATE_STRIDEMIX_STD430:
179 		return 1216 * structData.structArraySize / 2;	//size of struct in 16b with offset
180 	default:
181 		DE_ASSERT(0);
182 	}
183 	return 0;
184 }
185 
186 // Batch function to check arrays of 16-bit floats.
187 //
188 // For comparing 16-bit floats, we need to consider both RTZ and RTE. So we can only recalculate
189 // the expected values here instead of get the expected values directly from the test case.
190 // Thus we need original floats here but not expected outputs.
191 template<RoundingModeFlags RoundingMode>
graphicsCheck16BitFloats(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)192 bool graphicsCheck16BitFloats (const std::vector<Resource>&	originalFloats,
193 							   const vector<AllocationSp>&	outputAllocs,
194 							   const std::vector<Resource>&	expectedOutputs,
195 							   tcu::TestLog&				log)
196 {
197 	if (outputAllocs.size() != originalFloats.size())
198 		return false;
199 
200 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
201 	{
202 		vector<deUint8>	originalBytes;
203 		originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
204 
205 		const deUint16*	returned	= static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
206 		const float*	original	= reinterpret_cast<const float*>(&originalBytes.front());
207 		const deUint32	count		= static_cast<deUint32>(expectedOutputs[outputNdx].getByteSize() / sizeof(deUint16));
208 		const deUint32	inputStride	= static_cast<deUint32>(originalBytes.size() / sizeof(float)) / count;
209 
210 		for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
211 			if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
212 				return false;
213 	}
214 
215 	return true;
216 }
217 
218 template<RoundingModeFlags RoundingMode>
graphicsCheck16BitFloats64(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)219 bool graphicsCheck16BitFloats64 (const std::vector<Resource>&	originalFloats,
220 								 const vector<AllocationSp>&	outputAllocs,
221 								 const std::vector<Resource>&	/* expectedOutputs */,
222 								 tcu::TestLog&				log)
223 {
224 	if (outputAllocs.size() != originalFloats.size())
225 		return false;
226 
227 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
228 	{
229 		vector<deUint8>	originalBytes;
230 		originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
231 
232 		const deUint16*	returned	= static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
233 		const double*	original	= reinterpret_cast<const double*>(&originalBytes.front());
234 		const deUint32	count		= static_cast<deUint32>(originalBytes.size() / sizeof(double));
235 
236 		for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
237 			if (!compare16BitFloat64(original[numNdx], returned[numNdx], RoundingMode, log))
238 				return false;
239 	}
240 
241 	return true;
242 }
243 
computeCheckBuffersFloats(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog &)244 bool computeCheckBuffersFloats (const std::vector<Resource>&	originalFloats,
245 								const vector<AllocationSp>&		outputAllocs,
246 								const std::vector<Resource>&	/*expectedOutputs*/,
247 								tcu::TestLog&					/*log*/)
248 {
249 	std::vector<deUint8> result;
250 	originalFloats.front().getBuffer()->getPackedBytes(result);
251 
252 	const deUint16 * results = reinterpret_cast<const deUint16 *>(&result[0]);
253 	const deUint16 * expected = reinterpret_cast<const deUint16 *>(outputAllocs.front()->getHostPtr());
254 
255 	for (size_t i = 0; i < result.size() / sizeof (deUint16); ++i)
256 	{
257 		if (results[i] == expected[i])
258 			continue;
259 
260 		if (Float16(results[i]).isNaN() && Float16(expected[i]).isNaN())
261 			continue;
262 
263 		return false;
264 	}
265 
266 	return true;
267 }
268 
269 template<RoundingModeFlags RoundingMode>
computeCheck16BitFloats(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)270 bool computeCheck16BitFloats (const std::vector<Resource>&	originalFloats,
271 							  const vector<AllocationSp>&	outputAllocs,
272 							  const std::vector<Resource>&	expectedOutputs,
273 							  tcu::TestLog&					log)
274 {
275 	if (outputAllocs.size() != originalFloats.size())
276 		return false;
277 
278 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
279 	{
280 		vector<deUint8>	originalBytes;
281 		originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
282 
283 		const deUint16*	returned	= static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
284 		const float*	original	= reinterpret_cast<const float*>(&originalBytes.front());
285 		const deUint32	count		= static_cast<deUint32>(expectedOutputs[outputNdx].getByteSize() / sizeof(deUint16));
286 		const deUint32	inputStride	= static_cast<deUint32>(originalBytes.size() / sizeof(float)) / count;
287 
288 		for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
289 			if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
290 				return false;
291 	}
292 
293 	return true;
294 }
295 
296 template<RoundingModeFlags RoundingMode>
computeCheck16BitFloats64(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)297 bool computeCheck16BitFloats64 (const std::vector<Resource>&	originalFloats,
298 								const vector<AllocationSp>&		outputAllocs,
299 								const std::vector<Resource>&	/* expectedOutputs */,
300 								tcu::TestLog&					log)
301 {
302 	if (outputAllocs.size() != originalFloats.size())
303 		return false;
304 
305 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
306 	{
307 		vector<deUint8>	originalBytes;
308 		originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
309 
310 		const deUint16*	returned	= static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
311 		const double*	original	= reinterpret_cast<const double*>(&originalBytes.front());
312 		const deUint32	count		= static_cast<deUint32>(originalBytes.size() / sizeof(double));
313 
314 		for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
315 			if (!compare16BitFloat64(original[numNdx], returned[numNdx], RoundingMode, log))
316 				return false;
317 	}
318 
319 	return true;
320 }
321 
322 // Batch function to check arrays of 64-bit floats.
323 //
324 // For comparing 64-bit floats, we just need the expected value precomputed in the test case.
325 // So we need expected outputs here but not original floats.
check64BitFloats(const std::vector<Resource> &,const std::vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)326 bool check64BitFloats (const std::vector<Resource>&		/* originalFloats */,
327 					   const std::vector<AllocationSp>& outputAllocs,
328 					   const std::vector<Resource>&		expectedOutputs,
329 					   tcu::TestLog&					log)
330 {
331 	if (outputAllocs.size() != expectedOutputs.size())
332 		return false;
333 
334 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
335 	{
336 		vector<deUint8>	expectedBytes;
337 		expectedOutputs[outputNdx].getBuffer()->getPackedBytes(expectedBytes);
338 
339 		const double*	returnedAsDouble	= static_cast<const double*>(outputAllocs[outputNdx]->getHostPtr());
340 		const double*	expectedAsDouble	= reinterpret_cast<const double*>(&expectedBytes.front());
341 		const deUint32	count				= static_cast<deUint32>(expectedBytes.size() / sizeof(double));
342 
343 		for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
344 			if (!compare64BitFloat(expectedAsDouble[numNdx], returnedAsDouble[numNdx], log))
345 				return false;
346 	}
347 
348 	return true;
349 }
350 
351 // Batch function to check arrays of 32-bit floats.
352 //
353 // For comparing 32-bit floats, we just need the expected value precomputed in the test case.
354 // So we need expected outputs here but not original floats.
check32BitFloats(const std::vector<Resource> &,const std::vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)355 bool check32BitFloats (const std::vector<Resource>&		/* originalFloats */,
356 					   const std::vector<AllocationSp>& outputAllocs,
357 					   const std::vector<Resource>&		expectedOutputs,
358 					   tcu::TestLog&					log)
359 {
360 	if (outputAllocs.size() != expectedOutputs.size())
361 		return false;
362 
363 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
364 	{
365 		vector<deUint8>	expectedBytes;
366 		expectedOutputs[outputNdx].getBuffer()->getPackedBytes(expectedBytes);
367 
368 		const float*	returnedAsFloat	= static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
369 		const float*	expectedAsFloat	= reinterpret_cast<const float*>(&expectedBytes.front());
370 		const deUint32	count			= static_cast<deUint32>(expectedBytes.size() / sizeof(float));
371 
372 		for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
373 			if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
374 				return false;
375 	}
376 
377 	return true;
378 }
379 
addInfo(vector<bool> & info,int & ndx,const int count,bool isData)380 void addInfo(vector<bool>& info, int& ndx, const int count, bool isData)
381 {
382 	for (int index = 0; index < count; ++index)
383 		info[ndx++] = isData;
384 }
385 
data16bitStd140(de::Random & rnd)386 vector<deFloat16> data16bitStd140 (de::Random& rnd)
387 {
388 	return getFloat16s(rnd, getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD140));
389 }
390 
info16bitStd140(void)391 vector<bool> info16bitStd140 (void)
392 {
393 	int				ndx			= 0u;
394 	vector<bool>	infoData	(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD140));
395 
396 	for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
397 	{
398 		infoData[ndx++] = true;						//f16
399 		infoData[ndx++] = false;					//offset
400 
401 		infoData[ndx++] = true;						//v2f16
402 		infoData[ndx++] = true;						//v2f16
403 
404 		addInfo(infoData, ndx, 3, true);			//v3f16
405 		infoData[ndx++] = false;					//offset
406 
407 		addInfo(infoData, ndx, 4, true);			//v4f16
408 		addInfo(infoData, ndx, 4, false);			//offset
409 
410 		//f16[3];
411 		for (int i = 0; i < 3; ++i)
412 		{
413 			infoData[ndx++] = true;					//f16[0];
414 			addInfo(infoData, ndx, 7, false);		//offset
415 		}
416 
417 		//struct {f16, v2f16[3]} [11]
418 		for (int i = 0; i < structData.nestedArraySize; ++i)
419 		{
420 			//struct.f16
421 			infoData[ndx++] = true;					//f16
422 			addInfo(infoData, ndx, 7, false);		//offset
423 			//struct.f16.v2f16[3]
424 			for (int j = 0; j < 3; ++j)
425 			{
426 				infoData[ndx++] = true;				//v2f16
427 				infoData[ndx++] = true;				//v2f16
428 				addInfo(infoData, ndx, 6, false);	//offset
429 			}
430 		}
431 
432 		//vec2[11];
433 		for (int i = 0; i < structData.nestedArraySize; ++i)
434 		{
435 			infoData[ndx++] = true;					//v2f16
436 			infoData[ndx++] = true;					//v2f16
437 			addInfo(infoData, ndx, 6, false);		//offset
438 		}
439 
440 		//f16
441 		infoData[ndx++] = true;						//f16
442 		addInfo(infoData, ndx, 7, false);			//offset
443 
444 		//vec3[11]
445 		for (int i = 0; i < structData.nestedArraySize; ++i)
446 		{
447 			addInfo(infoData, ndx, 3, true);		//vec3
448 			addInfo(infoData, ndx, 5, false);		//offset
449 		}
450 
451 		//vec4[3]
452 		for (int i = 0; i < 3; ++i)
453 		{
454 			addInfo(infoData, ndx, 4, true);		//vec4
455 			addInfo(infoData, ndx, 4, false);		//offset
456 		}
457 	}
458 
459 	//Please check the data and offset
460 	DE_ASSERT(ndx == static_cast<int>(infoData.size()));
461 
462 	return infoData;
463 }
464 
data16bitStd430(de::Random & rnd)465 vector<deFloat16> data16bitStd430 (de::Random& rnd)
466 {
467 	return getFloat16s(rnd, getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430));
468 }
469 
info16bitStd430(void)470 vector<bool> info16bitStd430 (void)
471 {
472 	int				ndx			= 0u;
473 	vector<bool>	infoData	(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430));
474 
475 	for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
476 	{
477 		infoData[ndx++] = true;					//f16
478 		infoData[ndx++] = false;				//offset
479 
480 		infoData[ndx++] = true;					//v2f16
481 		infoData[ndx++] = true;					//v2f16
482 
483 		addInfo(infoData, ndx, 3, true);		//v3f16
484 		infoData[ndx++] = false;				//offset
485 
486 		addInfo(infoData, ndx, 4, true);		//v4f16
487 
488 		//f16[3];
489 		for (int i = 0; i < 3; ++i)
490 		{
491 			infoData[ndx++] = true;				//f16;
492 		}
493 		addInfo(infoData, ndx, 1, false);		//offset
494 
495 		//struct {f16, v2f16[3]} [11]
496 		for (int i = 0; i < structData.nestedArraySize; ++i)
497 		{
498 			//struct.f16
499 			infoData[ndx++] = true;				//f16
500 			infoData[ndx++] = false;			//offset
501 			//struct.f16.v2f16[3]
502 			for (int j = 0; j < 3; ++j)
503 			{
504 				infoData[ndx++] = true;			//v2f16
505 				infoData[ndx++] = true;			//v2f16
506 			}
507 		}
508 
509 		//vec2[11];
510 		for (int i = 0; i < structData.nestedArraySize; ++i)
511 		{
512 			infoData[ndx++] = true;				//v2f16
513 			infoData[ndx++] = true;				//v2f16
514 		}
515 
516 		//f16
517 		infoData[ndx++] = true;					//f16
518 		infoData[ndx++] = false;				//offset
519 
520 		//vec3[11]
521 		for (int i = 0; i < structData.nestedArraySize; ++i)
522 		{
523 			addInfo(infoData, ndx, 3, true);	//vec3
524 			infoData[ndx++] = false;			//offset
525 		}
526 
527 		//vec4[3]
528 		for (int i = 0; i < 3; ++i)
529 		{
530 			addInfo(infoData, ndx, 4, true);	//vec4
531 		}
532 	}
533 
534 	//Please check the data and offset
535 	DE_ASSERT(ndx == static_cast<int>(infoData.size()));
536 	return infoData;
537 }
538 
data32bitStd140(de::Random & rnd)539 vector<float> data32bitStd140 (de::Random& rnd)
540 {
541 	return getFloat32s(rnd, getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD140));
542 }
543 
info32bitStd140(void)544 vector<bool> info32bitStd140 (void)
545 {
546 	int				ndx			= 0u;
547 	vector<bool>	infoData	(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD140));
548 
549 	for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
550 	{
551 		infoData[ndx++] = true;					//f32
552 		infoData[ndx++] = false;				//offset
553 
554 		infoData[ndx++] = true;					//v2f32
555 		infoData[ndx++] = true;					//v2f32
556 
557 		addInfo(infoData, ndx, 3, true);		//v3f32
558 		infoData[ndx++] = false;				//offset
559 
560 		addInfo(infoData, ndx, 4, true);		//v4f16
561 
562 		//f32[3];
563 		for (int i = 0; i < 3; ++i)
564 		{
565 			infoData[ndx++] = true;				//f32;
566 			addInfo(infoData, ndx, 3, false);	//offset
567 		}
568 
569 		//struct {f32, v2f32[3]} [11]
570 		for (int i = 0; i < structData.nestedArraySize; ++i)
571 		{
572 			//struct.f32
573 			infoData[ndx++] = true;				//f32
574 			addInfo(infoData, ndx, 3, false);	//offset
575 			//struct.f32.v2f16[3]
576 			for (int j = 0; j < 3; ++j)
577 			{
578 				infoData[ndx++] = true;			//v2f32
579 				infoData[ndx++] = true;			//v2f32
580 				infoData[ndx++] = false;		//offset
581 				infoData[ndx++] = false;		//offset
582 			}
583 		}
584 
585 		//v2f32[11];
586 		for (int i = 0; i < structData.nestedArraySize; ++i)
587 		{
588 			infoData[ndx++] = true;				//v2f32
589 			infoData[ndx++] = true;				//v2f32
590 			infoData[ndx++] = false;			//offset
591 			infoData[ndx++] = false;			//offset
592 		}
593 
594 		//f16
595 		infoData[ndx++] = true;					//f16
596 		addInfo(infoData, ndx, 3, false);		//offset
597 
598 		//vec3[11]
599 		for (int i = 0; i < structData.nestedArraySize; ++i)
600 		{
601 			addInfo(infoData, ndx, 3, true);	//v3f32
602 			infoData[ndx++] = false;			//offset
603 		}
604 
605 		//vec4[3]
606 		for (int i = 0; i < 3; ++i)
607 		{
608 			addInfo(infoData, ndx, 4, true);	//vec4
609 		}
610 	}
611 
612 	//Please check the data and offset
613 	DE_ASSERT(ndx == static_cast<int>(infoData.size()));
614 	return infoData;
615 }
616 
data32bitStd430(de::Random & rnd)617 vector<float> data32bitStd430 (de::Random& rnd)
618 {
619 	return getFloat32s(rnd, getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430));
620 }
621 
info32bitStd430(void)622 vector<bool> info32bitStd430 (void)
623 {
624 	int				ndx			= 0u;
625 	vector<bool>	infoData	(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430));
626 
627 	for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
628 	{
629 		infoData[ndx++] = true;					//f32
630 		infoData[ndx++] = false;				//offset
631 
632 		infoData[ndx++] = true;					//v2f32
633 		infoData[ndx++] = true;					//v2f32
634 
635 		addInfo(infoData, ndx, 3, true);		//v3f32
636 		infoData[ndx++] = false;				//offset
637 
638 		addInfo(infoData, ndx, 4, true);		//v4f16
639 
640 		//f32[3];
641 		for (int i = 0; i < 3; ++i)
642 		{
643 			infoData[ndx++] = true;				//f32;
644 		}
645 		infoData[ndx++] = false;				//offset
646 
647 		//struct {f32, v2f32[3]} [11]
648 		for (int i = 0; i < structData.nestedArraySize; ++i)
649 		{
650 			//struct.f32
651 			infoData[ndx++] = true;				//f32
652 			infoData[ndx++] = false;			//offset
653 			//struct.f32.v2f16[3]
654 			for (int j = 0; j < 3; ++j)
655 			{
656 				infoData[ndx++] = true;			//v2f32
657 				infoData[ndx++] = true;			//v2f32
658 			}
659 		}
660 
661 		//v2f32[11];
662 		for (int i = 0; i < structData.nestedArraySize; ++i)
663 		{
664 			infoData[ndx++] = true;				//v2f32
665 			infoData[ndx++] = true;				//v2f32
666 		}
667 
668 		//f32
669 		infoData[ndx++] = true;					//f32
670 		infoData[ndx++] = false;				//offset
671 
672 		//vec3[11]
673 		for (int i = 0; i < structData.nestedArraySize; ++i)
674 		{
675 			addInfo(infoData, ndx, 3, true);	//v3f32
676 			infoData[ndx++] = false;			//offset
677 		}
678 
679 		//vec4[3]
680 		for (int i = 0; i < 3; ++i)
681 		{
682 			addInfo(infoData, ndx, 4, true);	//vec4
683 		}
684 	}
685 
686 	//Please check the data and offset
687 	DE_ASSERT(ndx == static_cast<int>(infoData.size()));
688 	return infoData;
689 }
690 
dataMixStd140(de::Random & rnd)691 vector<deInt16> dataMixStd140 (de::Random& rnd)
692 {
693 	return getInt16s(rnd, getStructSize(SHADERTEMPLATE_STRIDEMIX_STD140));
694 }
695 
infoMixStd140(void)696 vector<bool> infoMixStd140 (void)
697 {
698 	int				ndx			= 0u;
699 	vector<bool>	infoData	(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD140));
700 	for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
701 	{
702 		infoData[ndx++] = true;				//16b
703 		addInfo(infoData, ndx, 1, false);		//offset
704 
705 		addInfo(infoData, ndx, 2, true);		//32b
706 
707 		addInfo(infoData, ndx, 2, true);		//v2b16
708 		addInfo(infoData, ndx, 2, false);		//offset
709 
710 		addInfo(infoData, ndx, 4, true);		//v2b32
711 
712 		addInfo(infoData, ndx, 3, true);		//v3b16
713 		addInfo(infoData, ndx, 1, false);		//offset
714 
715 		addInfo(infoData, ndx, 6, true);		//v3b32
716 		addInfo(infoData, ndx, 2, false);		//offset
717 
718 		addInfo(infoData, ndx, 4, true);		//v4b16
719 		addInfo(infoData, ndx, 4, false);		//offset
720 
721 		addInfo(infoData, ndx, 8, true);		//v4b32
722 
723 		//strut {b16, b32, v2b16[11], b32[11]}
724 		for (int i = 0; i < structData.nestedArraySize; ++i)
725 		{
726 			infoData[ndx++] = true;				//16b
727 			addInfo(infoData, ndx, 1, false);	//offset
728 
729 			addInfo(infoData, ndx, 2, true);	//32b
730 			addInfo(infoData, ndx, 4, false);	//offset
731 
732 			for (int j = 0; j < structData.nestedArraySize; ++j)
733 			{
734 				addInfo(infoData, ndx, 2, true);	//v2b16[11]
735 				addInfo(infoData, ndx, 6, false);	//offset
736 			}
737 
738 			for (int j = 0; j < structData.nestedArraySize; ++j)
739 			{
740 				addInfo(infoData, ndx, 2, true);	//b32[11]
741 				addInfo(infoData, ndx, 6, false);	//offset
742 			}
743 		}
744 
745 		for (int i = 0; i < structData.nestedArraySize; ++i)
746 		{
747 			infoData[ndx++] = true;				//16b[11]
748 			addInfo(infoData, ndx, 7, false);		//offset
749 		}
750 
751 		for (int i = 0; i < structData.nestedArraySize; ++i)
752 		{
753 			addInfo(infoData, ndx, 2, true);	//b32bIn[11]
754 			addInfo(infoData, ndx, 6, false);	//offset
755 		}
756 	}
757 
758 	//Please check the data and offset
759 	DE_ASSERT(ndx == static_cast<int>(infoData.size()));
760 	return infoData;
761 }
762 
dataMixStd430(de::Random & rnd)763 vector<deInt16> dataMixStd430 (de::Random& rnd)
764 {
765 	return getInt16s(rnd, getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430));
766 }
767 
infoMixStd430(void)768 vector<bool> infoMixStd430 (void)
769 {
770 	int				ndx			= 0u;
771 	vector<bool>	infoData	(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430));
772 	for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
773 	{
774 		infoData[ndx++] = true;				//16b
775 		addInfo(infoData, ndx, 1, false);		//offset
776 
777 		addInfo(infoData, ndx, 2, true);		//32b
778 
779 		addInfo(infoData, ndx, 2, true);		//v2b16
780 		addInfo(infoData, ndx, 2, false);		//offset
781 
782 		addInfo(infoData, ndx, 4, true);		//v2b32
783 
784 		addInfo(infoData, ndx, 3, true);		//v3b16
785 		addInfo(infoData, ndx, 1, false);		//offset
786 
787 		addInfo(infoData, ndx, 6, true);		//v3b32
788 		addInfo(infoData, ndx, 2, false);		//offset
789 
790 		addInfo(infoData, ndx, 4, true);		//v4b16
791 		addInfo(infoData, ndx, 4, false);		//offset
792 
793 		addInfo(infoData, ndx, 8, true);		//v4b32
794 
795 		//strut {b16, b32, v2b16[11], b32[11]}
796 		for (int i = 0; i < structData.nestedArraySize; ++i)
797 		{
798 			infoData[ndx++] = true;			//16b
799 			addInfo(infoData, ndx, 1, false);	//offset
800 
801 			addInfo(infoData, ndx, 2, true);	//32b
802 
803 			addInfo(infoData, ndx, 22, true);	//v2b16[11]
804 
805 			addInfo(infoData, ndx, 22, true);	//b32[11]
806 		}
807 
808 		addInfo(infoData, ndx, 11, true);		//16b[11]
809 		infoData[ndx++] = false;				//offset
810 
811 		addInfo(infoData, ndx, 22, true);		//32b[11]
812 		addInfo(infoData, ndx, 6, false);		//offset
813 	}
814 
815 	//Please check the data and offset
816 	DE_ASSERT(ndx == static_cast<int>(infoData.size()));
817 	return infoData;
818 }
819 
820 template<typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
compareStruct(const resultType * returned,const originType * original,tcu::TestLog & log)821 bool compareStruct(const resultType* returned, const originType* original, tcu::TestLog& log)
822 {
823 		vector<bool>		resultInfo;
824 		vector<bool>		originInfo;
825 		vector<resultType>	resultToCompare;
826 		vector<originType>	originToCompare;
827 
828 		switch(funcOrigin)
829 		{
830 		case SHADERTEMPLATE_STRIDE16BIT_STD140:
831 			originInfo = info16bitStd140();
832 			break;
833 		case SHADERTEMPLATE_STRIDE16BIT_STD430:
834 			originInfo = info16bitStd430();
835 			break;
836 		case SHADERTEMPLATE_STRIDE32BIT_STD140:
837 			originInfo = info32bitStd140();
838 			break;
839 		case SHADERTEMPLATE_STRIDE32BIT_STD430:
840 			originInfo = info32bitStd430();
841 			break;
842 		case SHADERTEMPLATE_STRIDEMIX_STD140:
843 			originInfo = infoMixStd140();
844 			break;
845 		case SHADERTEMPLATE_STRIDEMIX_STD430:
846 			originInfo = infoMixStd430();
847 			break;
848 		default:
849 			DE_ASSERT(0);
850 		}
851 
852 		switch(funcResult)
853 		{
854 		case SHADERTEMPLATE_STRIDE16BIT_STD140:
855 			resultInfo = info16bitStd140();
856 			break;
857 		case SHADERTEMPLATE_STRIDE16BIT_STD430:
858 			resultInfo = info16bitStd430();
859 			break;
860 		case SHADERTEMPLATE_STRIDE32BIT_STD140:
861 			resultInfo = info32bitStd140();
862 			break;
863 		case SHADERTEMPLATE_STRIDE32BIT_STD430:
864 			resultInfo = info32bitStd430();
865 			break;
866 		case SHADERTEMPLATE_STRIDEMIX_STD140:
867 			resultInfo = infoMixStd140();
868 			break;
869 		case SHADERTEMPLATE_STRIDEMIX_STD430:
870 			resultInfo = infoMixStd430();
871 			break;
872 		default:
873 			DE_ASSERT(0);
874 		}
875 
876 		for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(resultInfo.size()); ++ndx)
877 		{
878 			if (resultInfo[ndx])
879 				resultToCompare.push_back(returned[ndx]);
880 		}
881 
882 		for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(originInfo.size()); ++ndx)
883 		{
884 			if (originInfo[ndx])
885 				originToCompare.push_back(original[ndx]);
886 		}
887 
888 		//Different offset but that same amount of data
889 		DE_ASSERT(originToCompare.size() == resultToCompare.size());
890 		for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(originToCompare.size()); ++ndx)
891 		{
892 			if (!compare16Bit(originToCompare[ndx], resultToCompare[ndx], RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ), log))
893 				return false;
894 		}
895 		return true;
896 }
897 
898 template<typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
computeCheckStruct(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)899 bool computeCheckStruct (const std::vector<Resource>&	originalFloats,
900 						 const vector<AllocationSp>&	outputAllocs,
901 						 const std::vector<Resource>&	/* expectedOutputs */,
902 						 tcu::TestLog&					log)
903 {
904 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
905 	{
906 		vector<deUint8>	originalBytes;
907 		originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
908 
909 		const resultType*	returned	= static_cast<const resultType*>(outputAllocs[outputNdx]->getHostPtr());
910 		const originType*	original	= reinterpret_cast<const originType*>(&originalBytes.front());
911 
912 		if(!compareStruct<originType, resultType, funcOrigin, funcResult>(returned, original, log))
913 			return false;
914 	}
915 	return true;
916 }
917 
918 template<typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
graphicsCheckStruct(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)919 bool graphicsCheckStruct (const std::vector<Resource>&	originalFloats,
920 							   const vector<AllocationSp>&	outputAllocs,
921 							   const std::vector<Resource>&	/* expectedOutputs */,
922 							   tcu::TestLog&				log)
923 {
924 	for (deUint32 outputNdx = 0; outputNdx < static_cast<deUint32>(outputAllocs.size()); ++outputNdx)
925 	{
926 		vector<deUint8>	originalBytes;
927 		originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
928 
929 		const resultType*	returned	= static_cast<const resultType*>(outputAllocs[outputNdx]->getHostPtr());
930 		const originType*	original	= reinterpret_cast<const originType*>(&originalBytes.front());
931 
932 		if(!compareStruct<originType, resultType, funcOrigin, funcResult>(returned, original, log))
933 			return false;
934 	}
935 	return true;
936 }
937 
getStructShaderComponet(const ShaderTemplate component)938 string getStructShaderComponet (const ShaderTemplate component)
939 {
940 	switch(component)
941 	{
942 	case SHADERTEMPLATE_TYPES:
943 		return string(
944 		"%f16       = OpTypeFloat 16\n"
945 		"%v2f16     = OpTypeVector %f16 2\n"
946 		"%v3f16     = OpTypeVector %f16 3\n"
947 		"%v4f16     = OpTypeVector %f16 4\n"
948 		"%f16ptr    = OpTypePointer Uniform %f16\n"
949 		"%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
950 		"%v3f16ptr  = OpTypePointer Uniform %v3f16\n"
951 		"%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
952 		"\n"
953 		"%f32ptr   = OpTypePointer Uniform %f32\n"
954 		"%v2f32ptr = OpTypePointer Uniform %v2f32\n"
955 		"%v3f32ptr = OpTypePointer Uniform %v3f32\n"
956 		"%v4f32ptr = OpTypePointer Uniform %v4f32\n");
957 	case SHADERTEMPLATE_STRIDE16BIT_STD140:
958 		return string(
959 		//struct {f16, v2f16[3]} [11]
960 		"OpDecorate %v2f16arr3 ArrayStride 16\n"
961 		"OpMemberDecorate %struct16 0 Offset 0\n"
962 		"OpMemberDecorate %struct16 1 Offset 16\n"
963 		"OpDecorate %struct16arr11 ArrayStride 64\n"
964 
965 		"OpDecorate %f16arr3       ArrayStride 16\n"
966 		"OpDecorate %v2f16arr11    ArrayStride 16\n"
967 		"OpDecorate %v3f16arr11    ArrayStride 16\n"
968 		"OpDecorate %v4f16arr3     ArrayStride 16\n"
969 		"OpDecorate %f16StructArr7 ArrayStride 1200\n"
970 		"\n"
971 		"OpMemberDecorate %f16Struct 0 Offset 0\n"		//f16
972 		"OpMemberDecorate %f16Struct 1 Offset 4\n"		//v2f16
973 		"OpMemberDecorate %f16Struct 2 Offset 8\n"		//v3f16
974 		"OpMemberDecorate %f16Struct 3 Offset 16\n"		//v4f16
975 		"OpMemberDecorate %f16Struct 4 Offset 32\n"		//f16[3]
976 		"OpMemberDecorate %f16Struct 5 Offset 80\n"		//struct {f16, v2f16[3]} [11]
977 		"OpMemberDecorate %f16Struct 6 Offset 784\n"	//v2f16[11]
978 		"OpMemberDecorate %f16Struct 7 Offset 960\n"	//f16
979 		"OpMemberDecorate %f16Struct 8 Offset 976\n"	//v3f16[11]
980 		"OpMemberDecorate %f16Struct 9 Offset 1152\n");	//v4f16[3]
981 
982 	case SHADERTEMPLATE_STRIDE16BIT_STD430:
983 		return string(
984 		//struct {f16, v2f16[3]} [11]
985 		"OpDecorate %v2f16arr3 ArrayStride 4\n"
986 		"OpMemberDecorate %struct16 0 Offset 0\n"
987 		"OpMemberDecorate %struct16 1 Offset 4\n"
988 		"OpDecorate %struct16arr11 ArrayStride 16\n"
989 
990 		"OpDecorate %f16arr3    ArrayStride 2\n"
991 		"OpDecorate %v2f16arr11 ArrayStride 4\n"
992 		"OpDecorate %v3f16arr11 ArrayStride 8\n"
993 		"OpDecorate %v4f16arr3  ArrayStride 8\n"
994 		"OpDecorate %f16StructArr7 ArrayStride 368\n"
995 		"\n"
996 		"OpMemberDecorate %f16Struct 0 Offset 0\n"		//f16
997 		"OpMemberDecorate %f16Struct 1 Offset 4\n"		//v2f16
998 		"OpMemberDecorate %f16Struct 2 Offset 8\n"		//v3f16
999 		"OpMemberDecorate %f16Struct 3 Offset 16\n"		//v4f16
1000 		"OpMemberDecorate %f16Struct 4 Offset 24\n"		//f16[3]
1001 		"OpMemberDecorate %f16Struct 5 Offset 32\n"		//struct {f16, v2f16[3]} [11]
1002 		"OpMemberDecorate %f16Struct 6 Offset 208\n"	//v2f16[11]
1003 		"OpMemberDecorate %f16Struct 7 Offset 252\n"	//f16
1004 		"OpMemberDecorate %f16Struct 8 Offset 256\n"	//v3f16[11]
1005 		"OpMemberDecorate %f16Struct 9 Offset 344\n");	//v4f16[3]
1006 	case SHADERTEMPLATE_STRIDE32BIT_STD140:
1007 		return string (
1008 		//struct {f32, v2f32[3]} [11]
1009 		"OpDecorate %v2f32arr3 ArrayStride 16\n"
1010 		"OpMemberDecorate %struct32 0 Offset 0\n"
1011 		"OpMemberDecorate %struct32 1 Offset 16\n"
1012 		"OpDecorate %struct32arr11 ArrayStride 64\n"
1013 
1014 		"OpDecorate %f32arr3   ArrayStride 16\n"
1015 		"OpDecorate %v2f32arr11 ArrayStride 16\n"
1016 		"OpDecorate %v3f32arr11 ArrayStride 16\n"
1017 		"OpDecorate %v4f32arr3 ArrayStride 16\n"
1018 		"OpDecorate %f32StructArr7 ArrayStride 1216\n"
1019 		"\n"
1020 
1021 		"OpMemberDecorate %f32Struct 0 Offset 0\n"		//f32
1022 		"OpMemberDecorate %f32Struct 1 Offset 8\n"		//v2f32
1023 		"OpMemberDecorate %f32Struct 2 Offset 16\n"		//v3f32
1024 		"OpMemberDecorate %f32Struct 3 Offset 32\n"		//v4f32
1025 		"OpMemberDecorate %f32Struct 4 Offset 48\n"		//f32[3]
1026 		"OpMemberDecorate %f32Struct 5 Offset 96\n"		//struct {f32, v2f32[3]} [11]
1027 		"OpMemberDecorate %f32Struct 6 Offset 800\n"	//v2f32[11]
1028 		"OpMemberDecorate %f32Struct 7 Offset 976\n"	//f32
1029 		"OpMemberDecorate %f32Struct 8 Offset 992\n"	//v3f32[11]
1030 		"OpMemberDecorate %f32Struct 9 Offset 1168\n");	//v4f32[3]
1031 
1032 	case SHADERTEMPLATE_STRIDE32BIT_STD430:
1033 		return string(
1034 		//struct {f32, v2f32[3]} [11]
1035 		"OpDecorate %v2f32arr3 ArrayStride 8\n"
1036 		"OpMemberDecorate %struct32 0 Offset 0\n"
1037 		"OpMemberDecorate %struct32 1 Offset 8\n"
1038 		"OpDecorate %struct32arr11 ArrayStride 32\n"
1039 
1040 		"OpDecorate %f32arr3    ArrayStride 4\n"
1041 		"OpDecorate %v2f32arr11 ArrayStride 8\n"
1042 		"OpDecorate %v3f32arr11 ArrayStride 16\n"
1043 		"OpDecorate %v4f32arr3  ArrayStride 16\n"
1044 		"OpDecorate %f32StructArr7 ArrayStride 736\n"
1045 		"\n"
1046 
1047 		"OpMemberDecorate %f32Struct 0 Offset 0\n"		//f32
1048 		"OpMemberDecorate %f32Struct 1 Offset 8\n"		//v2f32
1049 		"OpMemberDecorate %f32Struct 2 Offset 16\n"		//v3f32
1050 		"OpMemberDecorate %f32Struct 3 Offset 32\n"		//v4f32
1051 		"OpMemberDecorate %f32Struct 4 Offset 48\n"		//f32[3]
1052 		"OpMemberDecorate %f32Struct 5 Offset 64\n"		//struct {f32, v2f32[3]}[11]
1053 		"OpMemberDecorate %f32Struct 6 Offset 416\n"	//v2f32[11]
1054 		"OpMemberDecorate %f32Struct 7 Offset 504\n"	//f32
1055 		"OpMemberDecorate %f32Struct 8 Offset 512\n"	//v3f32[11]
1056 		"OpMemberDecorate %f32Struct 9 Offset 688\n");	//v4f32[3]
1057 	case SHADERTEMPLATE_STRIDEMIX_STD140:
1058 		return string(
1059 		"\n"//strutNestedIn {b16, b32, v2b16[11], b32[11]}
1060 		"OpDecorate %v2b16NestedArr11${InOut} ArrayStride 16\n"	//v2b16[11]
1061 		"OpDecorate %b32NestedArr11${InOut} ArrayStride 16\n"	//b32[11]
1062 		"OpMemberDecorate %sNested${InOut} 0 Offset 0\n"		//b16
1063 		"OpMemberDecorate %sNested${InOut} 1 Offset 4\n"		//b32
1064 		"OpMemberDecorate %sNested${InOut} 2 Offset 16\n"		//v2b16[11]
1065 		"OpMemberDecorate %sNested${InOut} 3 Offset 192\n"		//b32[11]
1066 		"OpDecorate %sNestedArr11${InOut} ArrayStride 368\n"	//strutNestedIn[11]
1067 		"\n"//strutIn {b16, b32, v2b16, v2b32, v3b16, v3b32, v4b16, v4b32, strutNestedIn[11], b16In[11], b32bIn[11]}
1068 		"OpDecorate %sb16Arr11${InOut} ArrayStride 16\n"		//b16In[11]
1069 		"OpDecorate %sb32Arr11${InOut} ArrayStride 16\n"		//b32bIn[11]
1070 		"OpMemberDecorate %struct${InOut} 0 Offset 0\n"			//b16
1071 		"OpMemberDecorate %struct${InOut} 1 Offset 4\n"			//b32
1072 		"OpMemberDecorate %struct${InOut} 2 Offset 8\n"			//v2b16
1073 		"OpMemberDecorate %struct${InOut} 3 Offset 16\n"		//v2b32
1074 		"OpMemberDecorate %struct${InOut} 4 Offset 24\n"		//v3b16
1075 		"OpMemberDecorate %struct${InOut} 5 Offset 32\n"		//v3b32
1076 		"OpMemberDecorate %struct${InOut} 6 Offset 48\n"		//v4b16
1077 		"OpMemberDecorate %struct${InOut} 7 Offset 64\n"		//v4b32
1078 		"OpMemberDecorate %struct${InOut} 8 Offset 80\n"		//strutNestedIn[11]
1079 		"OpMemberDecorate %struct${InOut} 9 Offset 4128\n"		//b16In[11]
1080 		"OpMemberDecorate %struct${InOut} 10 Offset 4304\n"		//b32bIn[11]
1081 		"OpDecorate %structArr7${InOut} ArrayStride 4480\n");	//strutIn[7]
1082 	case SHADERTEMPLATE_STRIDEMIX_STD430:
1083 		return string(
1084 		"\n"//strutNestedOut {b16, b32, v2b16[11], b32[11]}
1085 		"OpDecorate %v2b16NestedArr11${InOut} ArrayStride 4\n"	//v2b16[11]
1086 		"OpDecorate %b32NestedArr11${InOut}  ArrayStride 4\n"	//b32[11]
1087 		"OpMemberDecorate %sNested${InOut} 0 Offset 0\n"		//b16
1088 		"OpMemberDecorate %sNested${InOut} 1 Offset 4\n"		//b32
1089 		"OpMemberDecorate %sNested${InOut} 2 Offset 8\n"		//v2b16[11]
1090 		"OpMemberDecorate %sNested${InOut} 3 Offset 52\n"		//b32[11]
1091 		"OpDecorate %sNestedArr11${InOut} ArrayStride 96\n"		//strutNestedOut[11]
1092 		"\n"//strutOut {b16, b32, v2b16, v2b32, v3b16, v3b32, v4b16, v4b32, strutNestedOut[11], b16Out[11], b32bOut[11]}
1093 		"OpDecorate %sb16Arr11${InOut} ArrayStride 2\n"			//b16Out[11]
1094 		"OpDecorate %sb32Arr11${InOut} ArrayStride 4\n"			//b32bOut[11]
1095 		"OpMemberDecorate %struct${InOut} 0 Offset 0\n"			//b16
1096 		"OpMemberDecorate %struct${InOut} 1 Offset 4\n"			//b32
1097 		"OpMemberDecorate %struct${InOut} 2 Offset 8\n"			//v2b16
1098 		"OpMemberDecorate %struct${InOut} 3 Offset 16\n"		//v2b32
1099 		"OpMemberDecorate %struct${InOut} 4 Offset 24\n"		//v3b16
1100 		"OpMemberDecorate %struct${InOut} 5 Offset 32\n"		//v3b32
1101 		"OpMemberDecorate %struct${InOut} 6 Offset 48\n"		//v4b16
1102 		"OpMemberDecorate %struct${InOut} 7 Offset 64\n"		//v4b32
1103 		"OpMemberDecorate %struct${InOut} 8 Offset 80\n"		//strutNestedOut[11]
1104 		"OpMemberDecorate %struct${InOut} 9 Offset 1136\n"		//b16Out[11]
1105 		"OpMemberDecorate %struct${InOut} 10 Offset 1160\n"		//b32bOut[11]
1106 		"OpDecorate %structArr7${InOut} ArrayStride 1216\n");	//strutOut[7]
1107 
1108 	default:
1109 		return string("");
1110 	}
1111 }
1112 
1113 /*Return string contains spirv loop begin.
1114  the spec should contains "exeCount" - with name of const i32, it is number of executions
1115  the spec should contains "loopName" - suffix for all local names
1116  %Val${loopName} - index which can be used inside loop
1117  "%ndxArr${loopName}   = OpVariable %fp_i32  Function\n" - has to be defined outside
1118  The function should be always use with endLoop function*/
beginLoop(const std::map<std::string,std::string> & spec)1119 std::string beginLoop(const std::map<std::string, std::string>& spec)
1120 {
1121 	const tcu::StringTemplate	loopBegin	(
1122 	"OpStore %ndxArr${loopName} %zero\n"
1123 	"OpBranch %Loop${loopName}\n"
1124 	"%Loop${loopName} = OpLabel\n"
1125 	"OpLoopMerge %MergeLabel1${loopName} %MergeLabel2${loopName} None\n"
1126 	"OpBranch %Label1${loopName}\n"
1127 	"%Label1${loopName} = OpLabel\n"
1128 	"%Val${loopName} = OpLoad %i32 %ndxArr${loopName}\n"
1129 	"%LessThan${loopName} = OpSLessThan %bool %Val${loopName} %${exeCount}\n"
1130 	"OpBranchConditional %LessThan${loopName} %ifLabel${loopName} %MergeLabel1${loopName}\n"
1131 	"%ifLabel${loopName} = OpLabel\n");
1132 	return loopBegin.specialize(spec);
1133 }
1134 /*Return string contains spirv loop end.
1135  the spec should contains "loopName" - suffix for all local names, suffix should be the same in beginLoop
1136 The function should be always use with beginLoop function*/
endLoop(const std::map<std::string,std::string> & spec)1137 std::string endLoop(const std::map<std::string, std::string>& spec)
1138 {
1139 	const tcu::StringTemplate	loopEnd	(
1140 	"OpBranch %MergeLabel2${loopName}\n"
1141 	"%MergeLabel2${loopName} = OpLabel\n"
1142 	"%plusOne${loopName} = OpIAdd %i32 %Val${loopName} %c_i32_1\n"
1143 	"OpStore %ndxArr${loopName} %plusOne${loopName}\n"
1144 	"OpBranch %Loop${loopName}\n"
1145 	"%MergeLabel1${loopName} = OpLabel\n");
1146 	return loopEnd.specialize(spec);
1147 }
1148 
addCompute16bitStorageUniform16To32Group(tcu::TestCaseGroup * group)1149 void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group)
1150 {
1151 	tcu::TestContext&				testCtx			= group->getTestContext();
1152 	de::Random						rnd				(deStringHash(group->getName()));
1153 	const int						numElements		= 128;
1154 
1155 	const StringTemplate			shaderTemplate	(
1156 		"OpCapability Shader\n"
1157 		"OpCapability ${capability}\n"
1158 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
1159 		"OpMemoryModel Logical GLSL450\n"
1160 		"OpEntryPoint GLCompute %main \"main\" %id\n"
1161 		"OpExecutionMode %main LocalSize 1 1 1\n"
1162 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1163 
1164 		"${stride}\n"
1165 
1166 		"OpMemberDecorate %SSBO32 0 Offset 0\n"
1167 		"OpMemberDecorate %SSBO16 0 Offset 0\n"
1168 		"OpDecorate %SSBO32 BufferBlock\n"
1169 		"OpDecorate %SSBO16 ${storage}\n"
1170 		"OpDecorate %ssbo32 DescriptorSet 0\n"
1171 		"OpDecorate %ssbo16 DescriptorSet 0\n"
1172 		"OpDecorate %ssbo32 Binding 1\n"
1173 		"OpDecorate %ssbo16 Binding 0\n"
1174 
1175 		"${matrix_decor:opt}\n"
1176 
1177 		"%bool      = OpTypeBool\n"
1178 		"%void      = OpTypeVoid\n"
1179 		"%voidf     = OpTypeFunction %void\n"
1180 		"%u32       = OpTypeInt 32 0\n"
1181 		"%i32       = OpTypeInt 32 1\n"
1182 		"%f32       = OpTypeFloat 32\n"
1183 		"%v3u32     = OpTypeVector %u32 3\n"
1184 		"%uvec3ptr  = OpTypePointer Input %v3u32\n"
1185 		"%i32ptr    = OpTypePointer Uniform %i32\n"
1186 		"%f32ptr    = OpTypePointer Uniform %f32\n"
1187 
1188 		"%zero      = OpConstant %i32 0\n"
1189 		"%c_i32_1   = OpConstant %i32 1\n"
1190 		"%c_i32_2   = OpConstant %i32 2\n"
1191 		"%c_i32_3   = OpConstant %i32 3\n"
1192 		"%c_i32_16  = OpConstant %i32 16\n"
1193 		"%c_i32_32  = OpConstant %i32 32\n"
1194 		"%c_i32_64  = OpConstant %i32 64\n"
1195 		"%c_i32_128 = OpConstant %i32 128\n"
1196 		"%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
1197 
1198 		"%i32arr    = OpTypeArray %i32 %c_i32_128\n"
1199 		"%f32arr    = OpTypeArray %f32 %c_i32_128\n"
1200 
1201 		"${types}\n"
1202 		"${matrix_types:opt}\n"
1203 
1204 		"%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
1205 		"%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
1206 		"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1207 		"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1208 		"%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
1209 		"%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
1210 
1211 		"%id        = OpVariable %uvec3ptr Input\n"
1212 
1213 		"%main      = OpFunction %void None %voidf\n"
1214 		"%label     = OpLabel\n"
1215 		"%idval     = OpLoad %v3u32 %id\n"
1216 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1217 		"%inloc     = OpAccessChain %${base16}ptr %ssbo16 %zero %${arrayindex} ${index0:opt}\n"
1218 		"%val16     = OpLoad %${base16} %inloc\n"
1219 		"%val32     = ${convert} %${base32} %val16\n"
1220 		"%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
1221 		"             OpStore %outloc %val32\n"
1222 		"${matrix_store:opt}\n"
1223 		"             OpReturn\n"
1224 		"             OpFunctionEnd\n");
1225 
1226 	{  // floats
1227 		const char										floatTypes[]	=
1228 			"%f16       = OpTypeFloat 16\n"
1229 			"%f16ptr    = OpTypePointer Uniform %f16\n"
1230 			"%f16arr    = OpTypeArray %f16 %c_i32_128\n"
1231 			"%v2f16     = OpTypeVector %f16 2\n"
1232 			"%v2f32     = OpTypeVector %f32 2\n"
1233 			"%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
1234 			"%v2f32ptr  = OpTypePointer Uniform %v2f32\n"
1235 			"%v2f16arr  = OpTypeArray %v2f16 %c_i32_64\n"
1236 			"%v2f32arr  = OpTypeArray %v2f32 %c_i32_64\n";
1237 
1238 		struct CompositeType
1239 		{
1240 			const char*	name;
1241 			const char*	base32;
1242 			const char*	base16;
1243 			const char*	stride;
1244 			bool		useConstantIndex;
1245 			unsigned	constantIndex;
1246 			unsigned	count;
1247 			unsigned	inputStride;
1248 		};
1249 
1250 		const CompositeType	cTypes[2][5]		=
1251 		{
1252 			{
1253 				{"scalar",				"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",					false,	0,	numElements,		1},
1254 				{"scalar_const_idx_5",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",					true,	5,	numElements,		1},
1255 				{"scalar_const_idx_8",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",					true,	8,	numElements,		1},
1256 				{"vector",				"v2f32",	"v2f16",	"OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n",				false,	0,	numElements / 2,	2},
1257 				{"matrix",				"v2f32",	"v2f16",	"OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n",		false,	0,	numElements / 8,	8}
1258 			},
1259 			{
1260 				{"scalar",				"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n",				false,	0,	numElements,		8},
1261 				{"scalar_const_idx_5",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n",				true,	5,	numElements,		8},
1262 				{"scalar_const_idx_8",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n",				true,	8,	numElements,		8},
1263 				{"vector",				"v2f32",	"v2f16",	"OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 16\n",			false,	0,	numElements / 2,	8},
1264 				{"matrix",				"v2f32",	"v2f16",	"OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n",		false,	0,	numElements / 8,	8}
1265 			}
1266 		};
1267 
1268 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1269 			for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
1270 			{
1271 				ComputeShaderSpec		spec;
1272 				map<string, string>		specs;
1273 				string					testName		= string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float";
1274 
1275 				specs["capability"]		= CAPABILITIES[capIdx].cap;
1276 				specs["storage"]		= CAPABILITIES[capIdx].decor;
1277 				specs["stride"]			= cTypes[capIdx][tyIdx].stride;
1278 				specs["base32"]			= cTypes[capIdx][tyIdx].base32;
1279 				specs["base16"]			= cTypes[capIdx][tyIdx].base16;
1280 				specs["types"]			= floatTypes;
1281 				specs["convert"]		= "OpFConvert";
1282 				specs["constarrayidx"]	= de::toString(cTypes[capIdx][tyIdx].constantIndex);
1283 				if (cTypes[capIdx][tyIdx].useConstantIndex)
1284 					specs["arrayindex"] = "c_i32_ci";
1285 				else
1286 					specs["arrayindex"] = "x";
1287 
1288 				const deUint32			inputStride		= cTypes[capIdx][tyIdx].inputStride;
1289 				const deUint32			count			= cTypes[capIdx][tyIdx].count;
1290 				const deUint32			scalarsPerItem	= numElements / count;
1291 				vector<deFloat16>		float16Data		= getFloat16s(rnd, numElements * inputStride);
1292 				vector<float>			float32Data;
1293 
1294 				float32Data.reserve(numElements);
1295 				for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
1296 					for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
1297 						float32Data.push_back(deFloat16To32(float16Data[numIdx * inputStride + scalarIdx]));
1298 
1299 				vector<float>			float32DataConstIdx;
1300 				if (cTypes[capIdx][tyIdx].useConstantIndex)
1301 				{
1302 					const deUint32 numFloats = numElements / cTypes[capIdx][tyIdx].count;
1303 					for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
1304 						float32DataConstIdx.push_back(float32Data[cTypes[capIdx][tyIdx].constantIndex * numFloats + numIdx % numFloats]);
1305 				}
1306 
1307 				if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
1308 				{
1309 					specs["index0"]			= "%zero";
1310 					specs["matrix_prefix"]	= "m4";
1311 					specs["matrix_types"]	=
1312 						"%m4v2f16 = OpTypeMatrix %v2f16 4\n"
1313 						"%m4v2f32 = OpTypeMatrix %v2f32 4\n"
1314 						"%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
1315 						"%m4v2f32arr = OpTypeArray %m4v2f32 %c_i32_16\n";
1316 					specs["matrix_decor"]	=
1317 						"OpMemberDecorate %SSBO32 0 ColMajor\n"
1318 						"OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
1319 						"OpMemberDecorate %SSBO16 0 ColMajor\n"
1320 						"OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
1321 					specs["matrix_store"]	=
1322 						"%inloc_1  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
1323 						"%val16_1  = OpLoad %v2f16 %inloc_1\n"
1324 						"%val32_1  = OpFConvert %v2f32 %val16_1\n"
1325 						"%outloc_1 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_1\n"
1326 						"            OpStore %outloc_1 %val32_1\n"
1327 
1328 						"%inloc_2  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
1329 						"%val16_2  = OpLoad %v2f16 %inloc_2\n"
1330 						"%val32_2  = OpFConvert %v2f32 %val16_2\n"
1331 						"%outloc_2 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_2\n"
1332 						"            OpStore %outloc_2 %val32_2\n"
1333 
1334 						"%inloc_3  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
1335 						"%val16_3  = OpLoad %v2f16 %inloc_3\n"
1336 						"%val32_3  = OpFConvert %v2f32 %val16_3\n"
1337 						"%outloc_3 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_3\n"
1338 						"            OpStore %outloc_3 %val32_3\n";
1339 				}
1340 
1341 				spec.assembly			= shaderTemplate.specialize(specs);
1342 				spec.numWorkGroups		= IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
1343 				spec.verifyIO			= check32BitFloats;
1344 
1345 				spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), CAPABILITIES[capIdx].dtype));
1346 				spec.outputs.push_back(Resource(BufferSp(new Float32Buffer(cTypes[capIdx][tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))));
1347 				spec.extensions.push_back("VK_KHR_16bit_storage");
1348 				spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1349 
1350 				group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1351 			}
1352 	}
1353 
1354 	{  // Integers
1355 		const char		sintTypes[]		=
1356 			"%i16       = OpTypeInt 16 1\n"
1357 			"%i16ptr    = OpTypePointer Uniform %i16\n"
1358 			"%i16arr    = OpTypeArray %i16 %c_i32_128\n"
1359 			"%v4i16     = OpTypeVector %i16 4\n"
1360 			"%v4i32     = OpTypeVector %i32 4\n"
1361 			"%v4i16ptr  = OpTypePointer Uniform %v4i16\n"
1362 			"%v4i32ptr  = OpTypePointer Uniform %v4i32\n"
1363 			"%v4i16arr  = OpTypeArray %v4i16 %c_i32_32\n"
1364 			"%v4i32arr  = OpTypeArray %v4i32 %c_i32_32\n";
1365 
1366 		const char		uintTypes[]		=
1367 			"%u16       = OpTypeInt 16 0\n"
1368 			"%u16ptr    = OpTypePointer Uniform %u16\n"
1369 			"%u32ptr    = OpTypePointer Uniform %u32\n"
1370 			"%u16arr    = OpTypeArray %u16 %c_i32_128\n"
1371 			"%u32arr    = OpTypeArray %u32 %c_i32_128\n"
1372 			"%v4u16     = OpTypeVector %u16 4\n"
1373 			"%v4u32     = OpTypeVector %u32 4\n"
1374 			"%v4u16ptr  = OpTypePointer Uniform %v4u16\n"
1375 			"%v4u32ptr  = OpTypePointer Uniform %v4u32\n"
1376 			"%v4u16arr  = OpTypeArray %v4u16 %c_i32_32\n"
1377 			"%v4u32arr  = OpTypeArray %v4u32 %c_i32_32\n";
1378 
1379 		struct CompositeType
1380 		{
1381 			const char*	name;
1382 			bool		isSigned;
1383 			const char* types;
1384 			const char*	base32;
1385 			const char*	base16;
1386 			const char* opcode;
1387 			const char*	stride;
1388 			bool		useConstantIndex;
1389 			unsigned	constantIndex;
1390 			unsigned	count;
1391 			unsigned	inputStride;
1392 		};
1393 
1394 		const CompositeType	cTypes[2][8]	=
1395 		{
1396 			{
1397 				{"scalar_sint",				true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",			false,	0,	numElements,		1},
1398 				{"scalar_sint_const_idx_5",	true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",			true,	5,	numElements,		1},
1399 				{"scalar_sint_const_idx_8",	true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",			true,	8,	numElements,		1},
1400 				{"scalar_uint",				false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",			false,	0,	numElements,		1},
1401 				{"scalar_uint_const_idx_5",	false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",			true,	5,	numElements,		1},
1402 				{"scalar_uint_const_idx_8",	false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",			true,	8,	numElements,		1},
1403 				{"vector_sint",				true,	sintTypes,	"v4i32",	"v4i16",	"OpSConvert",	"OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n",	false,	0,	numElements / 4,	4},
1404 				{"vector_uint",				false,	uintTypes,	"v4u32",	"v4u16",	"OpUConvert",	"OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n",	false,	0,	numElements / 4,	4}
1405 			},
1406 			{
1407 				{"scalar_sint",				true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n",		false,	0,	numElements,		8},
1408 				{"scalar_sint_const_idx_5",	true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n",		true,	5,	numElements,		8},
1409 				{"scalar_sint_const_idx_8",	true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n",		true,	8,	numElements,		8},
1410 				{"scalar_uint",				false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n",		false,	0,	numElements,		8},
1411 				{"scalar_uint_const_idx_5",	false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n",		true,	5,	numElements,		8},
1412 				{"scalar_uint_const_idx_8",	false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n",		true,	8,	numElements,		8},
1413 				{"vector_sint",				true,	sintTypes,	"v4i32",	"v4i16",	"OpSConvert",	"OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 16\n",	false,	0,	numElements / 4,	8},
1414 				{"vector_uint",				false,	uintTypes,	"v4u32",	"v4u16",	"OpUConvert",	"OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 16\n",	false,	0,	numElements / 4,	8}
1415 			}
1416 		};
1417 
1418 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1419 			for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
1420 			{
1421 				ComputeShaderSpec	spec;
1422 				map<string, string>	specs;
1423 				string				testName		= string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
1424 				const deUint32		inputStride		= cTypes[capIdx][tyIdx].inputStride;
1425 				vector<deInt16>		inputs			= getInt16s(rnd, numElements * inputStride);
1426 				vector<deInt32>		sOutputs;
1427 				vector<deInt32>		uOutputs;
1428 				const deUint16		signBitMask		= 0x8000;
1429 				const deUint32		signExtendMask	= 0xffff0000;
1430 				const deUint32		count			= cTypes[capIdx][tyIdx].count;
1431 				const deUint32		scalarsPerItem	= numElements / count;
1432 
1433 				sOutputs.reserve(numElements);
1434 				uOutputs.reserve(numElements);
1435 
1436 				for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
1437 					for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; ++scalarIdx)
1438 					{
1439 						const deInt16 input = inputs[numNdx * inputStride + scalarIdx];
1440 
1441 						uOutputs.push_back(static_cast<deUint16>(input));
1442 						if (input & signBitMask)
1443 							sOutputs.push_back(static_cast<deInt32>(input | signExtendMask));
1444 						else
1445 							sOutputs.push_back(static_cast<deInt32>(input));
1446 					}
1447 
1448 				vector<deInt32>		intDataConstIdx;
1449 
1450 				if (cTypes[capIdx][tyIdx].useConstantIndex)
1451 				{
1452 					for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
1453 					{
1454 						const deInt32 idx = cTypes[capIdx][tyIdx].constantIndex * scalarsPerItem + numIdx % scalarsPerItem;
1455 
1456 						if (cTypes[capIdx][tyIdx].isSigned)
1457 							intDataConstIdx.push_back(sOutputs[idx]);
1458 						else
1459 							intDataConstIdx.push_back(uOutputs[idx]);
1460 					}
1461 				}
1462 
1463 				specs["capability"]		= CAPABILITIES[capIdx].cap;
1464 				specs["storage"]		= CAPABILITIES[capIdx].decor;
1465 				specs["stride"]			= cTypes[capIdx][tyIdx].stride;
1466 				specs["base32"]			= cTypes[capIdx][tyIdx].base32;
1467 				specs["base16"]			= cTypes[capIdx][tyIdx].base16;
1468 				specs["types"]			= cTypes[capIdx][tyIdx].types;
1469 				specs["convert"]		= cTypes[capIdx][tyIdx].opcode;
1470 				specs["constarrayidx"]	= de::toString(cTypes[capIdx][tyIdx].constantIndex);
1471 				if (cTypes[capIdx][tyIdx].useConstantIndex)
1472 					specs["arrayindex"] = "c_i32_ci";
1473 				else
1474 					specs["arrayindex"] = "x";
1475 
1476 				spec.assembly			= shaderTemplate.specialize(specs);
1477 				spec.numWorkGroups		= IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
1478 
1479 				spec.inputs.push_back(Resource(BufferSp(new Int16Buffer(inputs)), CAPABILITIES[capIdx].dtype));
1480 				if (cTypes[capIdx][tyIdx].useConstantIndex)
1481 					spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(intDataConstIdx))));
1482 				else if (cTypes[capIdx][tyIdx].isSigned)
1483 					spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(sOutputs))));
1484 				else
1485 					spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(uOutputs))));
1486 				spec.extensions.push_back("VK_KHR_16bit_storage");
1487 				spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1488 
1489 				group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1490 			}
1491 	}
1492 }
1493 
addCompute16bitStorageUniform16To32ChainAccessGroup(tcu::TestCaseGroup * group)1494 void addCompute16bitStorageUniform16To32ChainAccessGroup (tcu::TestCaseGroup* group)
1495 {
1496 	tcu::TestContext&				testCtx			= group->getTestContext();
1497 	de::Random						rnd				(deStringHash(group->getName()));
1498 	const deUint32					structSize		= 128; // In number of 16bit items. Includes padding.
1499 	vector<deFloat16>				inputDataFloat	= getFloat16s(rnd, structSize * 4);
1500 	vector<deInt16>					inputDataInt	= getInt16s(rnd, structSize * 4);
1501 	vector<float>					outputDataFloat;
1502 	vector<deInt32>					outputDataSInt;
1503 	vector<deInt32>					outputDataUInt;
1504 	vector<tcu::UVec4>				indices;
1505 
1506 	// Input is an array of a struct that varies on 16bit data type being tested:
1507 	//
1508 	// Float:
1509 	//
1510 	// float16 scalars[3]
1511 	// mat4x3  matrix
1512 	// vec3    vector
1513 	//
1514 	// Int:
1515 	//
1516 	// int16 scalars[3]
1517 	// int16 array2D[4][3]
1518 	// ivec3 vector
1519 	//
1520 	// UInt:
1521 	//
1522 	// uint16 scalars[3]
1523 	// uint16 array2D[4][3]
1524 	// uvec3  vector
1525 
1526 	const StringTemplate			shaderTemplate	(
1527 		"                              OpCapability Shader\n"
1528 		"                              OpCapability ${capability}\n"
1529 		"                              OpExtension \"SPV_KHR_16bit_storage\"\n"
1530 		"                         %1 = OpExtInstImport \"GLSL.std.450\"\n"
1531 		"                              OpMemoryModel Logical GLSL450\n"
1532 		"                              OpEntryPoint GLCompute %main \"main\"\n"
1533 		"                              OpExecutionMode %main LocalSize 1 1 1\n"
1534 		"                              OpSource GLSL 430\n"
1535 		"                              OpDecorate %Output BufferBlock\n"
1536 		"                              OpDecorate %dataOutput DescriptorSet 0\n"
1537 		"                              OpDecorate %dataOutput Binding 1\n"
1538 		"                              OpDecorate %scalarArray ArrayStride 16\n"
1539 		"                              OpDecorate %scalarArray2D ArrayStride 48\n"
1540 		"                              OpMemberDecorate %S 0 Offset 0\n"
1541 		"                              OpMemberDecorate %S 1 Offset 48\n"
1542 		"                              ${decoration:opt}\n"
1543 		"                              OpMemberDecorate %S 2 Offset 240\n"
1544 		"                              OpDecorate %_arr_S_uint_4 ArrayStride 256\n"
1545 		"                              OpMemberDecorate %Input 0 Offset 0\n"
1546 		"                              OpMemberDecorate %Output 0 Offset 0\n"
1547 		"                              OpDecorate %Input ${storage}\n"
1548 		"                              OpDecorate %dataInput DescriptorSet 0\n"
1549 		"                              OpDecorate %dataInput Binding 0\n"
1550 		"                       %f16 = OpTypeFloat 16\n"
1551 		"                       %f32 = OpTypeFloat 32\n"
1552 		"                       %i16 = OpTypeInt 16 1\n"
1553 		"                       %i32 = OpTypeInt 32 1\n"
1554 		"                       %u16 = OpTypeInt 16 0\n"
1555 		"                       %u32 = OpTypeInt 32 0\n"
1556 		"                      %void = OpTypeVoid\n"
1557 		"                  %voidFunc = OpTypeFunction %void\n"
1558 		"        %_ptr_Function_uint = OpTypePointer Function %u32\n"
1559 		"                     %v3u32 = OpTypeVector %u32 3\n"
1560 		"          %_ptr_Input_v3u32 = OpTypePointer Input %v3u32\n"
1561 		"                     %int_0 = OpConstant %i32 0\n"
1562 		"                    %uint_3 = OpConstant %u32 3\n"
1563 		"                    %uint_4 = OpConstant %u32 4\n"
1564 		"                        %s0 = OpConstant %u32 ${s0}\n"
1565 		"                        %s1 = OpConstant %u32 ${s1}\n"
1566 		"                        %s2 = OpConstant %u32 ${s2}\n"
1567 		"                        %s3 = OpConstant %u32 ${s3}\n"
1568 		"                    %Output = OpTypeStruct %${type}32\n"
1569 		"       %_ptr_Uniform_Output = OpTypePointer Uniform %Output\n"
1570 		"                %dataOutput = OpVariable %_ptr_Uniform_Output Uniform\n"
1571 		"               %scalarArray = OpTypeArray %${type}16 %uint_3\n"
1572 		"                     %v3f16 = OpTypeVector %f16 3\n"
1573 		"                     %v3i16 = OpTypeVector %i16 3\n"
1574 		"                     %v3u16 = OpTypeVector %u16 3\n"
1575 		"                    %matrix = OpTypeMatrix %v3f16 4\n"
1576 		"             %scalarArray2D = OpTypeArray %scalarArray %uint_4\n"
1577 		"                         %S = OpTypeStruct %scalarArray %${type2D} %v3${type}16\n"
1578 		"             %_arr_S_uint_4 = OpTypeArray %S %uint_4\n"
1579 		"                     %Input = OpTypeStruct %_arr_S_uint_4\n"
1580 		"        %_ptr_Uniform_Input = OpTypePointer Uniform %Input\n"
1581 		"                 %dataInput = OpVariable %_ptr_Uniform_Input Uniform\n"
1582 		"   %_ptr_Uniform_16bit_data = OpTypePointer Uniform %${type}16\n"
1583 		"   %_ptr_Uniform_32bit_data = OpTypePointer Uniform %${type}32\n"
1584 		"                      %main = OpFunction %void None %voidFunc\n"
1585 		"                     %entry = OpLabel\n"
1586 		"                   %dataPtr = ${accessChain}\n"
1587 		"                      %data = OpLoad %${type}16 %dataPtr\n"
1588 		"                 %converted = ${convert}\n"
1589 		"                    %outPtr = OpAccessChain %_ptr_Uniform_32bit_data %dataOutput %int_0\n"
1590 		"                              OpStore %outPtr %converted\n"
1591 		"                              OpReturn\n"
1592 		"                              OpFunctionEnd\n");
1593 
1594 	// Generate constant indices for OpChainAccess. We need to use constant values
1595 	// when indexing into structures. This loop generates all permutations.
1596 	for (deUint32 idx0 = 0; idx0 < 4; ++idx0)
1597 		for (deUint32 idx1 = 0; idx1 < 3; ++idx1)
1598 			for (deUint32 idx2 = 0; idx2 < (idx1 == 1u ? 4u : 3u); ++idx2)
1599 				for (deUint32 idx3 = 0; idx3 < (idx1 == 1u ? 3u : 1u); ++idx3)
1600 					indices.push_back(tcu::UVec4(idx0, idx1, idx2, idx3));
1601 
1602 
1603 	for (deUint32 numIdx = 0; numIdx < (deUint32)indices.size(); ++numIdx)
1604 	{
1605 		const deUint16		signBitMask			= 0x8000;
1606 		const deUint32		signExtendMask		= 0xffff0000;
1607 		// Determine the selected output float for the selected indices.
1608 		const tcu::UVec4	vec					= indices[numIdx];
1609 		// Offsets are in multiples of 16bits. Floats are using matrix as the
1610 		// second field, which has different layout rules than 2D array.
1611 		// Therefore separate offset tables are needed.
1612 		const deUint32		fieldOffsetsFloat[3][3]	=
1613 		{
1614 			{0u,	8u,		0u},
1615 			{24,	24u,	1u},
1616 			{120u,	1u,		0u}
1617 		};
1618 		const deUint32		fieldOffsetsInt[3][3]	=
1619 		{
1620 			{0u,	8u,		0u},
1621 			{24,	24u,	8u},
1622 			{120u,	1u,		0u}
1623 		};
1624 		const deUint32		offsetFloat				= vec.x() * structSize + fieldOffsetsFloat[vec.y()][0] + fieldOffsetsFloat[vec.y()][1] * vec.z() + fieldOffsetsFloat[vec.y()][2] * vec.w();
1625 		const deUint32		offsetInt				= vec.x() * structSize + fieldOffsetsInt[vec.y()][0] + fieldOffsetsInt[vec.y()][1] * vec.z() + fieldOffsetsInt[vec.y()][2] * vec.w();
1626 		const bool			hasSign					= inputDataInt[offsetInt] & signBitMask;
1627 
1628 		outputDataFloat.push_back(deFloat16To32(inputDataFloat[offsetFloat]));
1629 		outputDataUInt.push_back((deUint16)inputDataInt[offsetInt]);
1630 		outputDataSInt.push_back((deInt32)(inputDataInt[offsetInt] | (hasSign ? signExtendMask : 0u)));
1631 	}
1632 
1633 	for (deUint32 indicesIdx = 0; indicesIdx < (deUint32)indices.size(); ++indicesIdx)
1634 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1635 		{
1636 			string						indexString		= de::toString(indices[indicesIdx].x()) + "_" + de::toString(indices[indicesIdx].y()) + "_" + de::toString(indices[indicesIdx].z());
1637 			if (indices[indicesIdx].y() == 1)
1638 				indexString += string("_") + de::toString(indices[indicesIdx].w());
1639 
1640 			const string				testNameBase	= string(CAPABILITIES[capIdx].name) + "_" + indexString + "_";
1641 
1642 			struct DataType
1643 			{
1644 				string		name;
1645 				string		type;
1646 				string		convert;
1647 				string		type2D; // Matrix when using floats. 2D array otherwise.
1648 				BufferSp	inputs;
1649 				BufferSp	outputs;
1650 			};
1651 
1652 			const DataType				dataTypes[]		=
1653 			{
1654 				{ "float",	"f", "OpFConvert %f32 %data",	"matrix",			BufferSp(new Float16Buffer(inputDataFloat)),	BufferSp(new Float32Buffer(vector<float>(1, outputDataFloat[indicesIdx])))	},
1655 				{ "int",	"i", "OpSConvert %i32 %data",	"scalarArray2D",	BufferSp(new Int16Buffer(inputDataInt)),		BufferSp(new Int32Buffer(vector<deInt32>(1, outputDataSInt[indicesIdx])))	},
1656 				{ "uint",	"u", "OpUConvert %u32 %data",	"scalarArray2D",	BufferSp(new Int16Buffer(inputDataInt)),		BufferSp(new Int32Buffer(vector<deInt32>(1, outputDataUInt[indicesIdx])))	}
1657 			};
1658 
1659 			for (deUint32 dataTypeIdx = 0; dataTypeIdx < DE_LENGTH_OF_ARRAY(dataTypes); ++dataTypeIdx)
1660 			{
1661 				const string				testName	= testNameBase + dataTypes[dataTypeIdx].name;
1662 				map<string, string>			specs;
1663 				ComputeShaderSpec			spec;
1664 
1665 				specs["capability"]						= CAPABILITIES[capIdx].cap;
1666 				specs["storage"]						= CAPABILITIES[capIdx].decor;
1667 				specs["s0"]								= de::toString(indices[indicesIdx].x());
1668 				specs["s1"]								= de::toString(indices[indicesIdx].y());
1669 				specs["s2"]								= de::toString(indices[indicesIdx].z());
1670 				specs["s3"]								= de::toString(indices[indicesIdx].w());
1671 				specs["type"]							= dataTypes[dataTypeIdx].type;
1672 				specs["convert"]						= dataTypes[dataTypeIdx].convert;
1673 				specs["type2D"]							= dataTypes[dataTypeIdx].type2D;
1674 
1675 				if (indices[indicesIdx].y() == 1)
1676 					specs["accessChain"]				= "OpAccessChain %_ptr_Uniform_16bit_data %dataInput %int_0 %s0 %s1 %s2 %s3";
1677 				else
1678 					specs["accessChain"]				= "OpAccessChain %_ptr_Uniform_16bit_data %dataInput %int_0 %s0 %s1 %s2";
1679 
1680 				if (dataTypeIdx == 0)
1681 				{
1682 					spec.verifyIO		= check32BitFloats;
1683 					specs["decoration"]	= "OpMemberDecorate %S 1 ColMajor\nOpMemberDecorate %S 1 MatrixStride 48\n";
1684 				}
1685 
1686 				spec.assembly							= shaderTemplate.specialize(specs);
1687 				spec.numWorkGroups						= IVec3(1, 1, 1);
1688 				spec.extensions.push_back				("VK_KHR_16bit_storage");
1689 				spec.requestedVulkanFeatures			= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1690 				spec.inputs.push_back(Resource(dataTypes[dataTypeIdx].inputs, CAPABILITIES[capIdx].dtype));
1691 				spec.outputs.push_back(Resource(dataTypes[dataTypeIdx].outputs));
1692 
1693 				group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1694 			}
1695 		}
1696 }
1697 
addCompute16bitStoragePushConstant16To32Group(tcu::TestCaseGroup * group)1698 void addCompute16bitStoragePushConstant16To32Group (tcu::TestCaseGroup* group)
1699 {
1700 	tcu::TestContext&				testCtx			= group->getTestContext();
1701 	de::Random						rnd				(deStringHash(group->getName()));
1702 	const int						numElements		= 64;
1703 
1704 	const StringTemplate			shaderTemplate	(
1705 		"OpCapability Shader\n"
1706 		"OpCapability StoragePushConstant16\n"
1707 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
1708 		"OpMemoryModel Logical GLSL450\n"
1709 		"OpEntryPoint GLCompute %main \"main\" %id\n"
1710 		"OpExecutionMode %main LocalSize 1 1 1\n"
1711 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1712 
1713 		"${stride}"
1714 
1715 		"OpDecorate %PC16 Block\n"
1716 		"OpMemberDecorate %PC16 0 Offset 0\n"
1717 		"OpMemberDecorate %SSBO32 0 Offset 0\n"
1718 		"OpDecorate %SSBO32 BufferBlock\n"
1719 		"OpDecorate %ssbo32 DescriptorSet 0\n"
1720 		"OpDecorate %ssbo32 Binding 0\n"
1721 
1722 		"${matrix_decor:opt}\n"
1723 
1724 		"%void      = OpTypeVoid\n"
1725 		"%voidf     = OpTypeFunction %void\n"
1726 		"%u32       = OpTypeInt 32 0\n"
1727 		"%i32       = OpTypeInt 32 1\n"
1728 		"%f32       = OpTypeFloat 32\n"
1729 		"%v3u32     = OpTypeVector %u32 3\n"
1730 		"%uvec3ptr  = OpTypePointer Input %v3u32\n"
1731 		"%i32ptr    = OpTypePointer Uniform %i32\n"
1732 		"%f32ptr    = OpTypePointer Uniform %f32\n"
1733 
1734 		"%zero      = OpConstant %i32 0\n"
1735 		"%c_i32_1   = OpConstant %i32 1\n"
1736 		"%c_i32_8   = OpConstant %i32 8\n"
1737 		"%c_i32_16  = OpConstant %i32 16\n"
1738 		"%c_i32_32  = OpConstant %i32 32\n"
1739 		"%c_i32_64  = OpConstant %i32 64\n"
1740 		"%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
1741 
1742 		"%i32arr    = OpTypeArray %i32 %c_i32_64\n"
1743 		"%f32arr    = OpTypeArray %f32 %c_i32_64\n"
1744 
1745 		"${types}\n"
1746 		"${matrix_types:opt}\n"
1747 
1748 		"%PC16      = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
1749 		"%pp_PC16   = OpTypePointer PushConstant %PC16\n"
1750 		"%pc16      = OpVariable %pp_PC16 PushConstant\n"
1751 		"%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
1752 		"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1753 		"%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
1754 
1755 		"%id        = OpVariable %uvec3ptr Input\n"
1756 
1757 		"%main      = OpFunction %void None %voidf\n"
1758 		"%label     = OpLabel\n"
1759 		"%idval     = OpLoad %v3u32 %id\n"
1760 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1761 		"%inloc     = OpAccessChain %${base16}ptr %pc16 %zero %${arrayindex} ${index0:opt}\n"
1762 		"%val16     = OpLoad %${base16} %inloc\n"
1763 		"%val32     = ${convert} %${base32} %val16\n"
1764 		"%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
1765 		"             OpStore %outloc %val32\n"
1766 		"${matrix_store:opt}\n"
1767 		"             OpReturn\n"
1768 		"             OpFunctionEnd\n");
1769 
1770 	{  // floats
1771 		const char										floatTypes[]	=
1772 			"%f16       = OpTypeFloat 16\n"
1773 			"%f16ptr    = OpTypePointer PushConstant %f16\n"
1774 			"%f16arr    = OpTypeArray %f16 %c_i32_64\n"
1775 			"%v4f16     = OpTypeVector %f16 4\n"
1776 			"%v4f32     = OpTypeVector %f32 4\n"
1777 			"%v4f16ptr  = OpTypePointer PushConstant %v4f16\n"
1778 			"%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
1779 			"%v4f16arr  = OpTypeArray %v4f16 %c_i32_16\n"
1780 			"%v4f32arr  = OpTypeArray %v4f32 %c_i32_16\n";
1781 
1782 		struct CompositeType
1783 		{
1784 			const char*	name;
1785 			const char*	base32;
1786 			const char*	base16;
1787 			const char*	stride;
1788 			bool		useConstantIndex;
1789 			unsigned	constantIndex;
1790 			unsigned	count;
1791 		};
1792 
1793 		const CompositeType	cTypes[]	=
1794 		{
1795 			{"scalar",				"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",				false,	0,	numElements},
1796 			{"scalar_const_idx_5",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",				true,	5,	numElements},
1797 			{"scalar_const_idx_8",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",				true,	8,	numElements},
1798 			{"vector",				"v4f32",	"v4f16",	"OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",		false,	0,	numElements / 4},
1799 			{"matrix",				"v4f32",	"v4f16",	"OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",	false,	0,	numElements / 8},
1800 		};
1801 
1802 		vector<deFloat16>	float16Data			= getFloat16s(rnd, numElements);
1803 		vector<float>		float32Data;
1804 
1805 		float32Data.reserve(numElements);
1806 		for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
1807 			float32Data.push_back(deFloat16To32(float16Data[numIdx]));
1808 
1809 		for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1810 		{
1811 			ComputeShaderSpec		spec;
1812 			map<string, string>		specs;
1813 			string					testName	= string(cTypes[tyIdx].name) + "_float";
1814 
1815 			vector<float>			float32DataConstIdx;
1816 			if (cTypes[tyIdx].useConstantIndex)
1817 			{
1818 				const deUint32 numFloats = numElements / cTypes[tyIdx].count;
1819 				for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
1820 					float32DataConstIdx.push_back(float32Data[cTypes[tyIdx].constantIndex * numFloats + numIdx % numFloats]);
1821 			}
1822 
1823 			specs["stride"]			= cTypes[tyIdx].stride;
1824 			specs["base32"]			= cTypes[tyIdx].base32;
1825 			specs["base16"]			= cTypes[tyIdx].base16;
1826 			specs["types"]			= floatTypes;
1827 			specs["convert"]		= "OpFConvert";
1828 			specs["constarrayidx"]	= de::toString(cTypes[tyIdx].constantIndex);
1829 			if (cTypes[tyIdx].useConstantIndex)
1830 				specs["arrayindex"] = "c_i32_ci";
1831 			else
1832 				specs["arrayindex"] = "x";
1833 
1834 			if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
1835 			{
1836 				specs["index0"]			= "%zero";
1837 				specs["matrix_prefix"]	= "m2";
1838 				specs["matrix_types"]	=
1839 					"%m2v4f16 = OpTypeMatrix %v4f16 2\n"
1840 					"%m2v4f32 = OpTypeMatrix %v4f32 2\n"
1841 					"%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
1842 					"%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_8\n";
1843 				specs["matrix_decor"]	=
1844 					"OpMemberDecorate %SSBO32 0 ColMajor\n"
1845 					"OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1846 					"OpMemberDecorate %PC16 0 ColMajor\n"
1847 					"OpMemberDecorate %PC16 0 MatrixStride 8\n";
1848 				specs["matrix_store"]	=
1849 					"%inloc_1  = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
1850 					"%val16_1  = OpLoad %v4f16 %inloc_1\n"
1851 					"%val32_1  = OpFConvert %v4f32 %val16_1\n"
1852 					"%outloc_1 = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
1853 					"            OpStore %outloc_1 %val32_1\n";
1854 			}
1855 
1856 			spec.assembly			= shaderTemplate.specialize(specs);
1857 			spec.numWorkGroups		= IVec3(cTypes[tyIdx].count, 1, 1);
1858 			spec.verifyIO			= check32BitFloats;
1859 			spec.pushConstants		= BufferSp(new Float16Buffer(float16Data));
1860 
1861 			spec.outputs.push_back(Resource(BufferSp(new Float32Buffer(cTypes[tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))));
1862 			spec.extensions.push_back("VK_KHR_16bit_storage");
1863 			spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
1864 
1865 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1866 		}
1867 	}
1868 	{// integers
1869 		const char		sintTypes[]		=
1870 			"%i16       = OpTypeInt 16 1\n"
1871 			"%i16ptr    = OpTypePointer PushConstant %i16\n"
1872 			"%i16arr    = OpTypeArray %i16 %c_i32_64\n"
1873 			"%v2i16     = OpTypeVector %i16 2\n"
1874 			"%v2i32     = OpTypeVector %i32 2\n"
1875 			"%v2i16ptr  = OpTypePointer PushConstant %v2i16\n"
1876 			"%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
1877 			"%v2i16arr  = OpTypeArray %v2i16 %c_i32_32\n"
1878 			"%v2i32arr  = OpTypeArray %v2i32 %c_i32_32\n";
1879 
1880 		const char		uintTypes[]		=
1881 			"%u16       = OpTypeInt 16 0\n"
1882 			"%u16ptr    = OpTypePointer PushConstant %u16\n"
1883 			"%u32ptr    = OpTypePointer Uniform %u32\n"
1884 			"%u16arr    = OpTypeArray %u16 %c_i32_64\n"
1885 			"%u32arr    = OpTypeArray %u32 %c_i32_64\n"
1886 			"%v2u16     = OpTypeVector %u16 2\n"
1887 			"%v2u32     = OpTypeVector %u32 2\n"
1888 			"%v2u16ptr  = OpTypePointer PushConstant %v2u16\n"
1889 			"%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
1890 			"%v2u16arr  = OpTypeArray %v2u16 %c_i32_32\n"
1891 			"%v2u32arr  = OpTypeArray %v2u32 %c_i32_32\n";
1892 
1893 		struct CompositeType
1894 		{
1895 			const char*	name;
1896 			bool		isSigned;
1897 			const char* types;
1898 			const char*	base32;
1899 			const char*	base16;
1900 			const char* opcode;
1901 			const char*	stride;
1902 			bool		useConstantIndex;
1903 			unsigned	constantIndex;
1904 			unsigned	count;
1905 		};
1906 
1907 		const CompositeType	cTypes[]	=
1908 		{
1909 			{"scalar_sint",				true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",			false,	0,	numElements},
1910 			{"scalar_sint_const_idx_5",	true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",			true,	5,	numElements},
1911 			{"scalar_sint_const_idx_8",	true,	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",			true,	8,	numElements},
1912 			{"scalar_uint",				false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",			false,	0,	numElements},
1913 			{"scalar_uint_const_idx_5",	false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",			true,	5,	numElements},
1914 			{"scalar_uint_const_idx_8",	false,	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",			true,	8,	numElements},
1915 			{"vector_sint",				true,	sintTypes,	"v2i32",	"v2i16",	"OpSConvert",	"OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",		false,	0,	numElements / 2},
1916 			{"vector_uint",				false,	uintTypes,	"v2u32",	"v2u16",	"OpUConvert",	"OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",		false,	0,	numElements / 2},
1917 		};
1918 
1919 		vector<deInt16>	inputs			= getInt16s(rnd, numElements);
1920 		vector<deInt32> sOutputs;
1921 		vector<deInt32> uOutputs;
1922 		const deUint16	signBitMask		= 0x8000;
1923 		const deUint32	signExtendMask	= 0xffff0000;
1924 
1925 		sOutputs.reserve(inputs.size());
1926 		uOutputs.reserve(inputs.size());
1927 
1928 		for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
1929 		{
1930 			uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
1931 			if (inputs[numNdx] & signBitMask)
1932 				sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
1933 			else
1934 				sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
1935 		}
1936 
1937 		for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1938 		{
1939 			ComputeShaderSpec		spec;
1940 			map<string, string>		specs;
1941 			const char*				testName	= cTypes[tyIdx].name;
1942 			vector<deInt32>			intDataConstIdx;
1943 
1944 			if (cTypes[tyIdx].useConstantIndex)
1945 			{
1946 				const deUint32 numInts = numElements / cTypes[tyIdx].count;
1947 
1948 				for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
1949 				{
1950 					const deInt32 idx = cTypes[tyIdx].constantIndex * numInts + numIdx % numInts;
1951 
1952 					if (cTypes[tyIdx].isSigned)
1953 						intDataConstIdx.push_back(sOutputs[idx]);
1954 					else
1955 						intDataConstIdx.push_back(uOutputs[idx]);
1956 				}
1957 			}
1958 
1959 			specs["stride"]			= cTypes[tyIdx].stride;
1960 			specs["base32"]			= cTypes[tyIdx].base32;
1961 			specs["base16"]			= cTypes[tyIdx].base16;
1962 			specs["types"]			= cTypes[tyIdx].types;
1963 			specs["convert"]		= cTypes[tyIdx].opcode;
1964 			specs["constarrayidx"]	= de::toString(cTypes[tyIdx].constantIndex);
1965 			if (cTypes[tyIdx].useConstantIndex)
1966 				specs["arrayindex"] = "c_i32_ci";
1967 			else
1968 				specs["arrayindex"] = "x";
1969 
1970 			spec.assembly			= shaderTemplate.specialize(specs);
1971 			spec.numWorkGroups		= IVec3(cTypes[tyIdx].count, 1, 1);
1972 			spec.pushConstants		= BufferSp(new Int16Buffer(inputs));
1973 
1974 			if (cTypes[tyIdx].useConstantIndex)
1975 				spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(intDataConstIdx))));
1976 			else if (cTypes[tyIdx].isSigned)
1977 				spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(sOutputs))));
1978 			else
1979 				spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(uOutputs))));
1980 			spec.extensions.push_back("VK_KHR_16bit_storage");
1981 			spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
1982 
1983 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName, testName, spec));
1984 		}
1985 	}
1986 }
1987 
addGraphics16BitStorageUniformInt32To16Group(tcu::TestCaseGroup * testGroup)1988 void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup)
1989 {
1990 	de::Random							rnd					(deStringHash(testGroup->getName()));
1991 	map<string, string>					fragments;
1992 	const deUint32						numDataPoints		= 256;
1993 	RGBA								defaultColors[4];
1994 	vector<string>						extensions;
1995 	const StringTemplate				capabilities		("OpCapability ${cap}\n");
1996 	// inputs and outputs are declared to be vectors of signed integers.
1997 	// However, depending on the test, they may be interpreted as unsiged
1998 	// integers. That won't be a problem as long as we passed the bits
1999 	// in faithfully to the pipeline.
2000 	vector<deInt32>						inputs				= getInt32s(rnd, numDataPoints);
2001 	vector<deInt16>						outputs;
2002 
2003 	outputs.reserve(inputs.size());
2004 	for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2005 		outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
2006 
2007 	extensions.push_back("VK_KHR_16bit_storage");
2008 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
2009 
2010 	getDefaultColors(defaultColors);
2011 
2012 	struct IntegerFacts
2013 	{
2014 		const char*	name;
2015 		const char*	type32;
2016 		const char*	type16;
2017 		const char* opcode;
2018 		const char*	isSigned;
2019 	};
2020 
2021 	const IntegerFacts	intFacts[]		=
2022 	{
2023 		{"sint",	"%i32",		"%i16",		"OpSConvert",	"1"},
2024 		{"uint",	"%u32",		"%u16",		"OpUConvert",	"0"},
2025 	};
2026 
2027 	const StringTemplate	scalarPreMain(
2028 			"${itype16} = OpTypeInt 16 ${signed}\n"
2029 			"%c_i32_256 = OpConstant %i32 256\n"
2030 			"   %up_i32 = OpTypePointer Uniform ${itype32}\n"
2031 			"   %up_i16 = OpTypePointer Uniform ${itype16}\n"
2032 			"   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
2033 			"   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
2034 			"   %SSBO32 = OpTypeStruct %ra_i32\n"
2035 			"   %SSBO16 = OpTypeStruct %ra_i16\n"
2036 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2037 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2038 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2039 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2040 
2041 	const StringTemplate	scalarDecoration(
2042 			"OpDecorate %ra_i32 ArrayStride ${arraystride}\n"
2043 			"OpDecorate %ra_i16 ArrayStride 2\n"
2044 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
2045 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
2046 			"OpDecorate %SSBO32 ${indecor}\n"
2047 			"OpDecorate %SSBO16 BufferBlock\n"
2048 			"OpDecorate %ssbo32 DescriptorSet 0\n"
2049 			"OpDecorate %ssbo16 DescriptorSet 0\n"
2050 			"OpDecorate %ssbo32 Binding 0\n"
2051 			"OpDecorate %ssbo16 Binding 1\n");
2052 
2053 	const StringTemplate	scalarTestFunc(
2054 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
2055 			"    %param = OpFunctionParameter %v4f32\n"
2056 
2057 			"%entry = OpLabel\n"
2058 			"    %i = OpVariable %fp_i32 Function\n"
2059 			"         OpStore %i %c_i32_0\n"
2060 			"         OpBranch %loop\n"
2061 
2062 			" %loop = OpLabel\n"
2063 			"   %15 = OpLoad %i32 %i\n"
2064 			"   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2065 			"         OpLoopMerge %merge %inc None\n"
2066 			"         OpBranchConditional %lt %write %merge\n"
2067 
2068 			"%write = OpLabel\n"
2069 			"   %30 = OpLoad %i32 %i\n"
2070 			"  %src = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
2071 			"%val32 = OpLoad ${itype32} %src\n"
2072 			"%val16 = ${convert} ${itype16} %val32\n"
2073 			"  %dst = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
2074 			"         OpStore %dst %val16\n"
2075 			"         OpBranch %inc\n"
2076 
2077 			"  %inc = OpLabel\n"
2078 			"   %37 = OpLoad %i32 %i\n"
2079 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2080 			"         OpStore %i %39\n"
2081 			"         OpBranch %loop\n"
2082 
2083 			"%merge = OpLabel\n"
2084 			"         OpReturnValue %param\n"
2085 
2086 			"OpFunctionEnd\n");
2087 
2088 	const StringTemplate	vecPreMain(
2089 			"${itype16} = OpTypeInt 16 ${signed}\n"
2090 			" %c_i32_64 = OpConstant %i32 64\n"
2091 			"%v4itype16 = OpTypeVector ${itype16} 4\n"
2092 			" %up_v4i32 = OpTypePointer Uniform ${v4itype32}\n"
2093 			" %up_v4i16 = OpTypePointer Uniform %v4itype16\n"
2094 			" %ra_v4i32 = OpTypeArray ${v4itype32} %c_i32_64\n"
2095 			" %ra_v4i16 = OpTypeArray %v4itype16 %c_i32_64\n"
2096 			"   %SSBO32 = OpTypeStruct %ra_v4i32\n"
2097 			"   %SSBO16 = OpTypeStruct %ra_v4i16\n"
2098 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2099 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2100 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2101 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2102 
2103 	const StringTemplate	vecDecoration(
2104 			"OpDecorate %ra_v4i32 ArrayStride 16\n"
2105 			"OpDecorate %ra_v4i16 ArrayStride 8\n"
2106 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
2107 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
2108 			"OpDecorate %SSBO32 ${indecor}\n"
2109 			"OpDecorate %SSBO16 BufferBlock\n"
2110 			"OpDecorate %ssbo32 DescriptorSet 0\n"
2111 			"OpDecorate %ssbo16 DescriptorSet 0\n"
2112 			"OpDecorate %ssbo32 Binding 0\n"
2113 			"OpDecorate %ssbo16 Binding 1\n");
2114 
2115 	const StringTemplate	vecTestFunc(
2116 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
2117 			"    %param = OpFunctionParameter %v4f32\n"
2118 
2119 			"%entry = OpLabel\n"
2120 			"    %i = OpVariable %fp_i32 Function\n"
2121 			"         OpStore %i %c_i32_0\n"
2122 			"         OpBranch %loop\n"
2123 
2124 			" %loop = OpLabel\n"
2125 			"   %15 = OpLoad %i32 %i\n"
2126 			"   %lt = OpSLessThan %bool %15 %c_i32_64\n"
2127 			"         OpLoopMerge %merge %inc None\n"
2128 			"         OpBranchConditional %lt %write %merge\n"
2129 
2130 			"%write = OpLabel\n"
2131 			"   %30 = OpLoad %i32 %i\n"
2132 			"  %src = OpAccessChain %up_v4i32 %ssbo32 %c_i32_0 %30\n"
2133 			"%val32 = OpLoad ${v4itype32} %src\n"
2134 			"%val16 = ${convert} %v4itype16 %val32\n"
2135 			"  %dst = OpAccessChain %up_v4i16 %ssbo16 %c_i32_0 %30\n"
2136 			"         OpStore %dst %val16\n"
2137 			"         OpBranch %inc\n"
2138 
2139 			"  %inc = OpLabel\n"
2140 			"   %37 = OpLoad %i32 %i\n"
2141 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2142 			"         OpStore %i %39\n"
2143 			"         OpBranch %loop\n"
2144 
2145 			"%merge = OpLabel\n"
2146 			"         OpReturnValue %param\n"
2147 
2148 			"OpFunctionEnd\n");
2149 
2150 	// Scalar
2151 	{
2152 		const deUint32	arrayStrides[]		= {4, 16};
2153 
2154 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2155 			for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
2156 			{
2157 				map<string, string>	specs;
2158 				string				name		= string(CAPABILITIES[capIdx].name) + "_scalar_" + intFacts[factIdx].name;
2159 
2160 				specs["cap"]					= CAPABILITIES[capIdx].cap;
2161 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
2162 				specs["itype32"]				= intFacts[factIdx].type32;
2163 				specs["v4itype32"]				= "%v4" + string(intFacts[factIdx].type32).substr(1);
2164 				specs["itype16"]				= intFacts[factIdx].type16;
2165 				specs["signed"]					= intFacts[factIdx].isSigned;
2166 				specs["convert"]				= intFacts[factIdx].opcode;
2167 				specs["arraystride"]			= de::toString(arrayStrides[capIdx]);
2168 
2169 				fragments["pre_main"]			= scalarPreMain.specialize(specs);
2170 				fragments["testfun"]			= scalarTestFunc.specialize(specs);
2171 				fragments["capability"]			= capabilities.specialize(specs);
2172 				fragments["decoration"]			= scalarDecoration.specialize(specs);
2173 
2174 				vector<deInt32>		inputsPadded;
2175 				for (size_t dataIdx = 0; dataIdx < inputs.size(); ++dataIdx)
2176 				{
2177 					inputsPadded.push_back(inputs[dataIdx]);
2178 					for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 4 - 1; ++padIdx)
2179 						inputsPadded.push_back(0);
2180 				}
2181 
2182 				GraphicsResources	resources;
2183 				VulkanFeatures		features;
2184 
2185 				resources.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputsPadded)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2186 				resources.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2187 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
2188 
2189 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2190 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
2191 				features.coreFeatures.fragmentStoresAndAtomics			= true;
2192 
2193 				createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
2194 			}
2195 	}
2196 	// Vector
2197 	{
2198 		GraphicsResources	resources;
2199 		resources.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2200 		resources.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2201 
2202 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2203 			for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
2204 			{
2205 				map<string, string>	specs;
2206 				string				name		= string(CAPABILITIES[capIdx].name) + "_vector_" + intFacts[factIdx].name;
2207 				VulkanFeatures		features;
2208 
2209 				specs["cap"]					= CAPABILITIES[capIdx].cap;
2210 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
2211 				specs["itype32"]				= intFacts[factIdx].type32;
2212 				specs["v4itype32"]				= "%v4" + string(intFacts[factIdx].type32).substr(1);
2213 				specs["itype16"]				= intFacts[factIdx].type16;
2214 				specs["signed"]					= intFacts[factIdx].isSigned;
2215 				specs["convert"]				= intFacts[factIdx].opcode;
2216 
2217 				fragments["pre_main"]			= vecPreMain.specialize(specs);
2218 				fragments["testfun"]			= vecTestFunc.specialize(specs);
2219 				fragments["capability"]			= capabilities.specialize(specs);
2220 				fragments["decoration"]			= vecDecoration.specialize(specs);
2221 
2222 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
2223 
2224 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2225 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
2226 				features.coreFeatures.fragmentStoresAndAtomics			= true;
2227 
2228 				createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
2229 			}
2230 	}
2231 }
2232 
addCompute16bitStorageUniform16To16Group(tcu::TestCaseGroup * group)2233 void addCompute16bitStorageUniform16To16Group (tcu::TestCaseGroup* group)
2234 {
2235 	tcu::TestContext&		testCtx				= group->getTestContext();
2236 	de::Random				rnd					(deStringHash(group->getName()));
2237 	const int				numElements			= 128;
2238 	const vector<deFloat16>	float16Data			= getFloat16s(rnd, numElements);
2239 	const vector<deFloat16>	float16DummyData	(numElements, 0);
2240 	ComputeShaderSpec		spec;
2241 
2242 	std::ostringstream		shaderTemplate;
2243 		shaderTemplate<<"OpCapability Shader\n"
2244 			<< "OpCapability StorageUniformBufferBlock16\n"
2245 			<< "OpExtension \"SPV_KHR_16bit_storage\"\n"
2246 			<< "OpMemoryModel Logical GLSL450\n"
2247 			<< "OpEntryPoint GLCompute %main \"main\" %id\n"
2248 			<< "OpExecutionMode %main LocalSize 1 1 1\n"
2249 			<< "OpDecorate %id BuiltIn GlobalInvocationId\n"
2250 			<< "OpDecorate %f16arr ArrayStride 2\n"
2251 			<< "OpMemberDecorate %SSBO_IN 0 Coherent\n"
2252 			<< "OpMemberDecorate %SSBO_OUT 0 Coherent\n"
2253 			<< "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
2254 			<< "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
2255 			<< "OpDecorate %SSBO_IN BufferBlock\n"
2256 			<< "OpDecorate %SSBO_OUT BufferBlock\n"
2257 			<< "OpDecorate %ssboIN DescriptorSet 0\n"
2258 			<< "OpDecorate %ssboOUT DescriptorSet 0\n"
2259 			<< "OpDecorate %ssboIN Binding 0\n"
2260 			<< "OpDecorate %ssboOUT Binding 1\n"
2261 			<< "\n"
2262 			<< "%bool      = OpTypeBool\n"
2263 			<< "%void      = OpTypeVoid\n"
2264 			<< "%voidf     = OpTypeFunction %void\n"
2265 			<< "%u32       = OpTypeInt 32 0\n"
2266 			<< "%i32       = OpTypeInt 32 1\n"
2267 			<< "%uvec3     = OpTypeVector %u32 3\n"
2268 			<< "%uvec3ptr  = OpTypePointer Input %uvec3\n"
2269 			<< "%f16       = OpTypeFloat 16\n"
2270 			<< "%f16ptr    = OpTypePointer Uniform %f16\n"
2271 			<< "\n"
2272 			<< "%zero      = OpConstant %i32 0\n"
2273 			<< "%c_size    = OpConstant %i32 " << numElements << "\n"
2274 			<< "\n"
2275 			<< "%f16arr    = OpTypeArray %f16 %c_size\n"
2276 			<< "%SSBO_IN   = OpTypeStruct %f16arr\n"
2277 			<< "%SSBO_OUT  = OpTypeStruct %f16arr\n"
2278 			<< "%up_SSBOIN = OpTypePointer Uniform %SSBO_IN\n"
2279 			<< "%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
2280 			<< "%ssboIN    = OpVariable %up_SSBOIN Uniform\n"
2281 			<< "%ssboOUT   = OpVariable %up_SSBOOUT Uniform\n"
2282 			<< "\n"
2283 			<< "%id        = OpVariable %uvec3ptr Input\n"
2284 			<< "%main      = OpFunction %void None %voidf\n"
2285 			<< "%label     = OpLabel\n"
2286 			<< "%idval     = OpLoad %uvec3 %id\n"
2287 			<< "%x         = OpCompositeExtract %u32 %idval 0\n"
2288 			<< "%y         = OpCompositeExtract %u32 %idval 1\n"
2289 			<< "\n"
2290 			<< "%inlocx     = OpAccessChain %f16ptr %ssboIN %zero %x \n"
2291 			<< "%valx       = OpLoad %f16 %inlocx\n"
2292 			<< "%outlocx    = OpAccessChain %f16ptr %ssboOUT %zero %x \n"
2293 			<< "             OpStore %outlocx %valx\n"
2294 
2295 			<< "%inlocy    = OpAccessChain %f16ptr %ssboIN %zero %y \n"
2296 			<< "%valy      = OpLoad %f16 %inlocy\n"
2297 			<< "%outlocy   = OpAccessChain %f16ptr %ssboOUT %zero %y \n"
2298 			<< "             OpStore %outlocy %valy\n"
2299 			<< "\n"
2300 			<< "             OpReturn\n"
2301 			<< "             OpFunctionEnd\n";
2302 
2303 	spec.assembly			= shaderTemplate.str();
2304 	spec.numWorkGroups		= IVec3(numElements, numElements, 1);
2305 	spec.verifyIO			= computeCheckBuffersFloats;
2306 	spec.coherentMemory		= true;
2307 	spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data))));
2308 	spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyData))));
2309 	spec.extensions.push_back("VK_KHR_16bit_storage");
2310 	spec.requestedVulkanFeatures = get16BitStorageFeatures("uniform_buffer_block");
2311 
2312 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "stress_test", "Granularity stress test", spec));
2313 }
2314 
addCompute16bitStorageUniform32To16Group(tcu::TestCaseGroup * group)2315 void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group)
2316 {
2317 	tcu::TestContext&				testCtx			= group->getTestContext();
2318 	de::Random						rnd				(deStringHash(group->getName()));
2319 	const int						numElements		= 128;
2320 
2321 	const StringTemplate			shaderTemplate	(
2322 		"OpCapability Shader\n"
2323 		"OpCapability ${capability}\n"
2324 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
2325 		"OpMemoryModel Logical GLSL450\n"
2326 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2327 		"OpExecutionMode %main LocalSize 1 1 1\n"
2328 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2329 
2330 		"${stride}"
2331 
2332 		"OpMemberDecorate %SSBO32 0 Offset 0\n"
2333 		"OpMemberDecorate %SSBO16 0 Offset 0\n"
2334 		"OpDecorate %SSBO32 ${storage}\n"
2335 		"OpDecorate %SSBO16 BufferBlock\n"
2336 		"OpDecorate %ssbo32 DescriptorSet 0\n"
2337 		"OpDecorate %ssbo16 DescriptorSet 0\n"
2338 		"OpDecorate %ssbo32 Binding 0\n"
2339 		"OpDecorate %ssbo16 Binding 1\n"
2340 
2341 		"${matrix_decor:opt}\n"
2342 
2343 		"${rounding:opt}\n"
2344 
2345 		"%bool      = OpTypeBool\n"
2346 		"%void      = OpTypeVoid\n"
2347 		"%voidf     = OpTypeFunction %void\n"
2348 		"%u32       = OpTypeInt 32 0\n"
2349 		"%i32       = OpTypeInt 32 1\n"
2350 		"%f32       = OpTypeFloat 32\n"
2351 		"%uvec3     = OpTypeVector %u32 3\n"
2352 		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
2353 		"%i32ptr    = OpTypePointer Uniform %i32\n"
2354 		"%f32ptr    = OpTypePointer Uniform %f32\n"
2355 
2356 		"%zero      = OpConstant %i32 0\n"
2357 		"%c_i32_1   = OpConstant %i32 1\n"
2358 		"%c_i32_16  = OpConstant %i32 16\n"
2359 		"%c_i32_32  = OpConstant %i32 32\n"
2360 		"%c_i32_64  = OpConstant %i32 64\n"
2361 		"%c_i32_128 = OpConstant %i32 128\n"
2362 
2363 		"%i32arr    = OpTypeArray %i32 %c_i32_128\n"
2364 		"%f32arr    = OpTypeArray %f32 %c_i32_128\n"
2365 
2366 		"${types}\n"
2367 		"${matrix_types:opt}\n"
2368 
2369 		"%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
2370 		"%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
2371 		"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2372 		"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2373 		"%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
2374 		"%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
2375 
2376 		"%id        = OpVariable %uvec3ptr Input\n"
2377 
2378 		"%main      = OpFunction %void None %voidf\n"
2379 		"%label     = OpLabel\n"
2380 		"%idval     = OpLoad %uvec3 %id\n"
2381 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2382 		"%inloc     = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
2383 		"%val32     = OpLoad %${base32} %inloc\n"
2384 		"%val16     = ${convert} %${base16} %val32\n"
2385 		"%outloc    = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
2386 		"             OpStore %outloc %val16\n"
2387 		"${matrix_store:opt}\n"
2388 		"             OpReturn\n"
2389 		"             OpFunctionEnd\n");
2390 
2391 	{  // Floats
2392 		const char					floatTypes[]	=
2393 			"%f16       = OpTypeFloat 16\n"
2394 			"%f16ptr    = OpTypePointer Uniform %f16\n"
2395 			"%f16arr    = OpTypeArray %f16 %c_i32_128\n"
2396 			"%v4f16     = OpTypeVector %f16 4\n"
2397 			"%v4f32     = OpTypeVector %f32 4\n"
2398 			"%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
2399 			"%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
2400 			"%v4f16arr  = OpTypeArray %v4f16 %c_i32_32\n"
2401 			"%v4f32arr  = OpTypeArray %v4f32 %c_i32_32\n";
2402 
2403 		struct RndMode
2404 		{
2405 			const char*				name;
2406 			const char*				decor;
2407 			VerifyIOFunc			func;
2408 		};
2409 
2410 		const RndMode		rndModes[]		=
2411 		{
2412 			{"rtz",						"OpDecorate %val16  FPRoundingMode RTZ",	computeCheck16BitFloats<ROUNDINGMODE_RTZ>},
2413 			{"rte",						"OpDecorate %val16  FPRoundingMode RTE",	computeCheck16BitFloats<ROUNDINGMODE_RTE>},
2414 			{"unspecified_rnd_mode",	"",											computeCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
2415 		};
2416 
2417 		struct CompositeType
2418 		{
2419 			const char*	name;
2420 			const char*	base32;
2421 			const char*	base16;
2422 			const char*	stride;
2423 			unsigned	count;
2424 			unsigned	inputStride;
2425 		};
2426 
2427 		const CompositeType	cTypes[2][3]	=
2428 		{
2429 			{ // BufferBlock
2430 				{"scalar",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",				numElements,		1},
2431 				{"vector",	"v4f32",	"v4f16",	"OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",		numElements / 4,	1},
2432 				{"matrix",	"v4f32",	"v4f16",	"OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",	numElements / 8,	1}
2433 			},
2434 			{ // Block
2435 				{"scalar",	"f32",		"f16",		"OpDecorate %f32arr ArrayStride 16\nOpDecorate %f16arr ArrayStride 2\n",			numElements,		4},
2436 				{"vector",	"v4f32",	"v4f16",	"OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",		numElements / 4,	1},
2437 				{"matrix",	"v4f32",	"v4f16",	"OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",	numElements / 8,	1}
2438 			}
2439 		};
2440 
2441 		vector<deFloat16>	float16DummyData	(numElements, 0);
2442 
2443 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2444 			for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
2445 				for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
2446 				{
2447 					ComputeShaderSpec		spec;
2448 					map<string, string>		specs;
2449 					string					testName			= string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
2450 					vector<float>			float32Data			= getFloat32s(rnd, numElements * cTypes[capIdx][tyIdx].inputStride);
2451 
2452 					specs["capability"]		= CAPABILITIES[capIdx].cap;
2453 					specs["storage"]		= CAPABILITIES[capIdx].decor;
2454 					specs["stride"]			= cTypes[capIdx][tyIdx].stride;
2455 					specs["base32"]			= cTypes[capIdx][tyIdx].base32;
2456 					specs["base16"]			= cTypes[capIdx][tyIdx].base16;
2457 					specs["rounding"]		= rndModes[rndModeIdx].decor;
2458 					specs["types"]			= floatTypes;
2459 					specs["convert"]		= "OpFConvert";
2460 
2461 					if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
2462 					{
2463 						if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
2464 							specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTZ\n";
2465 						else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
2466 							specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTE\n";
2467 
2468 						specs["index0"]			= "%zero";
2469 						specs["matrix_prefix"]	= "m2";
2470 						specs["matrix_types"]	=
2471 							"%m2v4f16 = OpTypeMatrix %v4f16 2\n"
2472 							"%m2v4f32 = OpTypeMatrix %v4f32 2\n"
2473 							"%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
2474 							"%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_16\n";
2475 						specs["matrix_decor"]	=
2476 							"OpMemberDecorate %SSBO32 0 ColMajor\n"
2477 							"OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
2478 							"OpMemberDecorate %SSBO16 0 ColMajor\n"
2479 							"OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
2480 						specs["matrix_store"]	=
2481 							"%inloc_1  = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
2482 							"%val32_1  = OpLoad %v4f32 %inloc_1\n"
2483 							"%val16_1  = OpFConvert %v4f16 %val32_1\n"
2484 							"%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
2485 							"            OpStore %outloc_1 %val16_1\n";
2486 					}
2487 
2488 					spec.assembly			= shaderTemplate.specialize(specs);
2489 					spec.numWorkGroups		= IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
2490 					spec.verifyIO			= rndModes[rndModeIdx].func;
2491 
2492 					spec.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), CAPABILITIES[capIdx].dtype));
2493 					// We provided a custom verifyIO in the above in which inputs will be used for checking.
2494 					// So put dummy data in the expected values.
2495 					spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyData))));
2496 					spec.extensions.push_back("VK_KHR_16bit_storage");
2497 					spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2498 
2499 					group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
2500 				}
2501 	}
2502 
2503 	{  // Integers
2504 		const char		sintTypes[]	=
2505 			"%i16       = OpTypeInt 16 1\n"
2506 			"%i16ptr    = OpTypePointer Uniform %i16\n"
2507 			"%i16arr    = OpTypeArray %i16 %c_i32_128\n"
2508 			"%v2i16     = OpTypeVector %i16 2\n"
2509 			"%v2i32     = OpTypeVector %i32 2\n"
2510 			"%v2i16ptr  = OpTypePointer Uniform %v2i16\n"
2511 			"%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
2512 			"%v2i16arr  = OpTypeArray %v2i16 %c_i32_64\n"
2513 			"%v2i32arr  = OpTypeArray %v2i32 %c_i32_64\n";
2514 
2515 		const char		uintTypes[]	=
2516 			"%u16       = OpTypeInt 16 0\n"
2517 			"%u16ptr    = OpTypePointer Uniform %u16\n"
2518 			"%u32ptr    = OpTypePointer Uniform %u32\n"
2519 			"%u16arr    = OpTypeArray %u16 %c_i32_128\n"
2520 			"%u32arr    = OpTypeArray %u32 %c_i32_128\n"
2521 			"%v2u16     = OpTypeVector %u16 2\n"
2522 			"%v2u32     = OpTypeVector %u32 2\n"
2523 			"%v2u16ptr  = OpTypePointer Uniform %v2u16\n"
2524 			"%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
2525 			"%v2u16arr  = OpTypeArray %v2u16 %c_i32_64\n"
2526 			"%v2u32arr  = OpTypeArray %v2u32 %c_i32_64\n";
2527 
2528 		struct CompositeType
2529 		{
2530 			const char*	name;
2531 			const char* types;
2532 			const char*	base32;
2533 			const char*	base16;
2534 			const char* opcode;
2535 			const char*	stride;
2536 			unsigned	count;
2537 			unsigned	inputStride;
2538 		};
2539 
2540 		const CompositeType	cTypes[2][4]	=
2541 		{
2542 			{
2543 				{"scalar_sint",	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",		numElements,			1},
2544 				{"scalar_uint",	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",		numElements,			1},
2545 				{"vector_sint",	sintTypes,	"v2i32",	"v2i16",	"OpSConvert",	"OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",	numElements / 2,		2},
2546 				{"vector_uint",	uintTypes,	"v2u32",	"v2u16",	"OpUConvert",	"OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",	numElements / 2,		2}
2547 			},
2548 			{
2549 				{"scalar_sint",	sintTypes,	"i32",		"i16",		"OpSConvert",	"OpDecorate %i32arr ArrayStride 16\nOpDecorate %i16arr ArrayStride 2\n",		numElements,		4},
2550 				{"scalar_uint",	uintTypes,	"u32",		"u16",		"OpUConvert",	"OpDecorate %u32arr ArrayStride 16\nOpDecorate %u16arr ArrayStride 2\n",		numElements,		4},
2551 				{"vector_sint",	sintTypes,	"v2i32",	"v2i16",	"OpSConvert",	"OpDecorate %v2i32arr ArrayStride 16\nOpDecorate %v2i16arr ArrayStride 4\n",	numElements / 2,	4},
2552 				{"vector_uint",	uintTypes,	"v2u32",	"v2u16",	"OpUConvert",	"OpDecorate %v2u32arr ArrayStride 16\nOpDecorate %v2u16arr ArrayStride 4\n",	numElements / 2,	4}
2553 			}
2554 		};
2555 
2556 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2557 			for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
2558 			{
2559 				ComputeShaderSpec		spec;
2560 				map<string, string>		specs;
2561 				string					testName		= string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
2562 				const deUint32			inputStride		= cTypes[capIdx][tyIdx].inputStride;
2563 				const deUint32			count			= cTypes[capIdx][tyIdx].count;
2564 				const deUint32			scalarsPerItem	= numElements / count;
2565 
2566 				vector<deInt32>	inputs					= getInt32s(rnd, numElements * inputStride);
2567 				vector<deInt16> outputs;
2568 
2569 				outputs.reserve(numElements);
2570 				for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
2571 					for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
2572 						outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx * inputStride + scalarIdx]));
2573 
2574 				specs["capability"]		= CAPABILITIES[capIdx].cap;
2575 				specs["storage"]		= CAPABILITIES[capIdx].decor;
2576 				specs["stride"]			= cTypes[capIdx][tyIdx].stride;
2577 				specs["base32"]			= cTypes[capIdx][tyIdx].base32;
2578 				specs["base16"]			= cTypes[capIdx][tyIdx].base16;
2579 				specs["types"]			= cTypes[capIdx][tyIdx].types;
2580 				specs["convert"]		= cTypes[capIdx][tyIdx].opcode;
2581 
2582 				spec.assembly			= shaderTemplate.specialize(specs);
2583 				spec.numWorkGroups		= IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
2584 
2585 				spec.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputs)), CAPABILITIES[capIdx].dtype));
2586 				spec.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs))));
2587 				spec.extensions.push_back("VK_KHR_16bit_storage");
2588 				spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2589 
2590 				group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
2591 			}
2592 	}
2593 }
2594 
addCompute16bitStorageUniform16StructTo32StructGroup(tcu::TestCaseGroup * group)2595 void addCompute16bitStorageUniform16StructTo32StructGroup (tcu::TestCaseGroup* group)
2596 {
2597 	tcu::TestContext&				testCtx			= group->getTestContext();
2598 	de::Random						rnd				(deStringHash(group->getName()));
2599 	const StringTemplate			shaderTemplate	(
2600 		"OpCapability Shader\n"
2601 		"OpCapability ${capability}\n"
2602 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
2603 		"OpMemoryModel Logical GLSL450\n"
2604 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2605 		"OpExecutionMode %main LocalSize 1 1 1\n"
2606 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2607 		"\n"
2608 		"${strideF16}"
2609 		"\n"
2610 		"${strideF32}"
2611 		"\n"
2612 		"OpMemberDecorate %SSBO_IN 0 Offset 0\n"
2613 		"OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
2614 		"OpDecorate %SSBO_IN ${storage}\n"
2615 		"OpDecorate %SSBO_OUT BufferBlock\n"
2616 		"OpDecorate %ssboIN DescriptorSet 0\n"
2617 		"OpDecorate %ssboOUT DescriptorSet 0\n"
2618 		"OpDecorate %ssboIN Binding 0\n"
2619 		"OpDecorate %ssboOUT Binding 1\n"
2620 		"\n"
2621 		"%bool     = OpTypeBool\n"
2622 		"%void     = OpTypeVoid\n"
2623 		"%voidf    = OpTypeFunction %void\n"
2624 		"%u32      = OpTypeInt 32 0\n"
2625 		"%uvec3    = OpTypeVector %u32 3\n"
2626 		"%uvec3ptr = OpTypePointer Input %uvec3\n"
2627 		"\n"
2628 		"%i32      = OpTypeInt 32 1\n"
2629 		"%v2i32    = OpTypeVector %i32 2\n"
2630 		"%v4i32    = OpTypeVector %i32 4\n"
2631 		"\n"
2632 		"%f32      = OpTypeFloat 32\n"
2633 		"%v2f32    = OpTypeVector %f32 2\n"
2634 		"%v3f32    = OpTypeVector %f32 3\n"
2635 		"%v4f32    = OpTypeVector %f32 4\n"
2636 		"${types}\n"
2637 		"\n"
2638 		"%zero = OpConstant %i32 0\n"
2639 		"%c_i32_1 = OpConstant %i32 1\n"
2640 		"%c_i32_2 = OpConstant %i32 2\n"
2641 		"%c_i32_3 = OpConstant %i32 3\n"
2642 		"%c_i32_4 = OpConstant %i32 4\n"
2643 		"%c_i32_5 = OpConstant %i32 5\n"
2644 		"%c_i32_6 = OpConstant %i32 6\n"
2645 		"%c_i32_7 = OpConstant %i32 7\n"
2646 		"%c_i32_8 = OpConstant %i32 8\n"
2647 		"%c_i32_9 = OpConstant %i32 9\n"
2648 		"\n"
2649 		"%c_u32_1 = OpConstant %u32 1\n"
2650 		"%c_u32_3 = OpConstant %u32 3\n"
2651 		"%c_u32_7 = OpConstant %u32 7\n"
2652 		"%c_u32_11 = OpConstant %u32 11\n"
2653 		"\n"
2654 		"%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
2655 		"%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
2656 		"%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
2657 		"%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
2658 		"%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
2659 		"%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
2660 		"%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
2661 		"%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 %v2f16arr11 %f16 %v3f16arr11 %v4f16arr3\n"
2662 		"\n"
2663 		"%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
2664 		"%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
2665 		"%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
2666 		"%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
2667 		"%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
2668 		"%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
2669 		"%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
2670 		"%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 %v2f32arr11 %f32 %v3f32arr11 %v4f32arr3\n"
2671 		"\n"
2672 		"%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
2673 		"%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
2674 		"%SSBO_IN            = OpTypeStruct %f16StructArr7\n"
2675 		"%SSBO_OUT           = OpTypeStruct %f32StructArr7\n"
2676 		"%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
2677 		"%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
2678 		"%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
2679 		"%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
2680 		"\n"
2681 		"%id        = OpVariable %uvec3ptr Input\n"
2682 		"%main      = OpFunction %void None %voidf\n"
2683 		"%label     = OpLabel\n"
2684 		"\n"
2685 		"%idval     = OpLoad %uvec3 %id\n"
2686 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2687 		"%y         = OpCompositeExtract %u32 %idval 1\n"
2688 		"\n"
2689 		"%f16src  = OpAccessChain %f16ptr %ssboIN %zero %x %zero\n"
2690 		"%val_f16 = OpLoad %f16 %f16src\n"
2691 		"%val_f32 = OpFConvert %f32 %val_f16\n"
2692 		"%f32dst  = OpAccessChain %f32ptr %ssboOUT %zero %x %zero\n"
2693 		"OpStore %f32dst %val_f32\n"
2694 		"\n"
2695 		"%v2f16src  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_1\n"
2696 		"%val_v2f16 = OpLoad %v2f16 %v2f16src\n"
2697 		"%val_v2f32 = OpFConvert %v2f32 %val_v2f16\n"
2698 		"%v2f32dst  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_1\n"
2699 		"OpStore %v2f32dst %val_v2f32\n"
2700 		"\n"
2701 		"%v3f16src  = OpAccessChain %v3f16ptr %ssboIN %zero %x %c_i32_2\n"
2702 		"%val_v3f16 = OpLoad %v3f16 %v3f16src\n"
2703 		"%val_v3f32 = OpFConvert %v3f32 %val_v3f16\n"
2704 		"%v3f32dst  = OpAccessChain %v3f32ptr %ssboOUT %zero %x %c_i32_2\n"
2705 		"OpStore %v3f32dst %val_v3f32\n"
2706 		"\n"
2707 		"%v4f16src  = OpAccessChain %v4f16ptr %ssboIN %zero %x %c_i32_3\n"
2708 		"%val_v4f16 = OpLoad %v4f16 %v4f16src\n"
2709 		"%val_v4f32 = OpFConvert %v4f32 %val_v4f16\n"
2710 		"%v4f32dst  = OpAccessChain %v4f32ptr %ssboOUT %zero %x %c_i32_3\n"
2711 		"OpStore %v4f32dst %val_v4f32\n"
2712 		"\n"
2713 		//struct {f16, v2f16[3]}
2714 		"%Sf16src  = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_5 %y %zero\n"
2715 		"%Sval_f16 = OpLoad %f16 %Sf16src\n"
2716 		"%Sval_f32 = OpFConvert %f32 %Sval_f16\n"
2717 		"%Sf32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_5 %y %zero\n"
2718 		"OpStore %Sf32dst2 %Sval_f32\n"
2719 		"\n"
2720 		"%Sv2f16src0   = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2721 		"%Sv2f16_0     = OpLoad %v2f16 %Sv2f16src0\n"
2722 		"%Sv2f32_0     = OpFConvert %v2f32 %Sv2f16_0\n"
2723 		"%Sv2f32dst_0  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2724 		"OpStore %Sv2f32dst_0 %Sv2f32_0\n"
2725 		"\n"
2726 		"%Sv2f16src1  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2727 		"%Sv2f16_1 = OpLoad %v2f16 %Sv2f16src1\n"
2728 		"%Sv2f32_1 = OpFConvert %v2f32 %Sv2f16_1\n"
2729 		"%Sv2f32dst_1  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2730 		"OpStore %Sv2f32dst_1 %Sv2f32_1\n"
2731 		"\n"
2732 		"%Sv2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2733 		"%Sv2f16_2 = OpLoad %v2f16 %Sv2f16src2\n"
2734 		"%Sv2f32_2 = OpFConvert %v2f32 %Sv2f16_2\n"
2735 		"%Sv2f32dst_2  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2736 		"OpStore %Sv2f32dst_2 %Sv2f32_2\n"
2737 		"\n"
2738 
2739 		"%v2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_6 %y\n"
2740 		"%val2_v2f16 = OpLoad %v2f16 %v2f16src2\n"
2741 		"%val2_v2f32 = OpFConvert %v2f32 %val2_v2f16\n"
2742 		"%v2f32dst2  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_6 %y\n"
2743 		"OpStore %v2f32dst2 %val2_v2f32\n"
2744 		"\n"
2745 		"%f16src2  = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_7\n"
2746 		"%val2_f16 = OpLoad %f16 %f16src2\n"
2747 		"%val2_f32 = OpFConvert %f32 %val2_f16\n"
2748 		"%f32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_7\n"
2749 		"OpStore %f32dst2 %val2_f32\n"
2750 		"\n"
2751 		"%v3f16src2  = OpAccessChain %v3f16ptr %ssboIN %zero %x %c_i32_8 %y\n"
2752 		"%val2_v3f16 = OpLoad %v3f16 %v3f16src2\n"
2753 		"%val2_v3f32 = OpFConvert %v3f32 %val2_v3f16\n"
2754 		"%v3f32dst2  = OpAccessChain %v3f32ptr %ssboOUT %zero %x %c_i32_8 %y\n"
2755 		"OpStore %v3f32dst2 %val2_v3f32\n"
2756 		"\n"
2757 
2758 		//Array with 3 elements
2759 		"%LessThan3 = OpSLessThan %bool %y %c_i32_3\n"
2760 		"OpSelectionMerge %BlockIf None\n"
2761 		"OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
2762 		"%LabelIf = OpLabel\n"
2763 		"  %f16src3  = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_4 %y\n"
2764 		"  %val3_f16 = OpLoad %f16 %f16src3\n"
2765 		"  %val3_f32 = OpFConvert %f32 %val3_f16\n"
2766 		"  %f32dst3  = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_4 %y\n"
2767 		"  OpStore %f32dst3 %val3_f32\n"
2768 		"\n"
2769 		"  %v4f16src2  = OpAccessChain %v4f16ptr %ssboIN %zero %x %c_i32_9 %y\n"
2770 		"  %val2_v4f16 = OpLoad %v4f16 %v4f16src2\n"
2771 		"  %val2_v4f32 = OpFConvert %v4f32 %val2_v4f16\n"
2772 		"  %v4f32dst2  = OpAccessChain %v4f32ptr %ssboOUT %zero %x %c_i32_9 %y\n"
2773 		"  OpStore %v4f32dst2 %val2_v4f32\n"
2774 		"OpBranch %BlockIf\n"
2775 		"%BlockIf = OpLabel\n"
2776 
2777 		"   OpReturn\n"
2778 		"   OpFunctionEnd\n");
2779 
2780 	{  // Floats
2781 		vector<float>			float32Data		(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430), 0.0f);
2782 
2783 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2784 		{
2785 			vector<deFloat16>		float16DData	= (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? data16bitStd430(rnd) : data16bitStd140(rnd);
2786 			ComputeShaderSpec		spec;
2787 			map<string, string>		specs;
2788 			string					testName		= string(CAPABILITIES[capIdx].name);
2789 
2790 			specs["capability"]		= CAPABILITIES[capIdx].cap;
2791 			specs["storage"]		= CAPABILITIES[capIdx].decor;
2792 			specs["strideF16"]		= getStructShaderComponet((VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? SHADERTEMPLATE_STRIDE16BIT_STD430 : SHADERTEMPLATE_STRIDE16BIT_STD140);
2793 			specs["strideF32"]		= getStructShaderComponet(SHADERTEMPLATE_STRIDE32BIT_STD430);
2794 			specs["types"]			= getStructShaderComponet(SHADERTEMPLATE_TYPES);
2795 
2796 			spec.assembly			= shaderTemplate.specialize(specs);
2797 			spec.numWorkGroups		= IVec3(structData.structArraySize, structData.nestedArraySize, 1);
2798 			spec.verifyIO			= (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? computeCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD430, SHADERTEMPLATE_STRIDE32BIT_STD430>
2799 																										: computeCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD140, SHADERTEMPLATE_STRIDE32BIT_STD430>;
2800 			spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DData)), CAPABILITIES[capIdx].dtype));
2801 			spec.outputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data))));
2802 			spec.extensions.push_back("VK_KHR_16bit_storage");
2803 			spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2804 
2805 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
2806 		}
2807 	}
2808 }
2809 
addCompute16bitStorageUniform32StructTo16StructGroup(tcu::TestCaseGroup * group)2810 void addCompute16bitStorageUniform32StructTo16StructGroup (tcu::TestCaseGroup* group)
2811 {
2812 	tcu::TestContext&				testCtx			= group->getTestContext();
2813 	de::Random						rnd				(deStringHash(group->getName()));
2814 
2815 	const StringTemplate			shaderTemplate	(
2816 		"OpCapability Shader\n"
2817 		"OpCapability ${capability}\n"
2818 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
2819 		"OpMemoryModel Logical GLSL450\n"
2820 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2821 		"OpExecutionMode %main LocalSize 1 1 1\n"
2822 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2823 		"\n"
2824 		"${strideF16}"
2825 		"\n"
2826 		"${strideF32}"
2827 		"\n"
2828 		"OpMemberDecorate %SSBO_IN 0 Offset 0\n"
2829 		"OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
2830 		"OpDecorate %SSBO_IN ${storage}\n"
2831 		"OpDecorate %SSBO_OUT BufferBlock\n"
2832 		"OpDecorate %ssboIN DescriptorSet 0\n"
2833 		"OpDecorate %ssboOUT DescriptorSet 0\n"
2834 		"OpDecorate %ssboIN Binding 0\n"
2835 		"OpDecorate %ssboOUT Binding 1\n"
2836 		"\n"
2837 		"%bool     = OpTypeBool\n"
2838 		"%void     = OpTypeVoid\n"
2839 		"%voidf    = OpTypeFunction %void\n"
2840 		"%u32      = OpTypeInt 32 0\n"
2841 		"%uvec3    = OpTypeVector %u32 3\n"
2842 		"%uvec3ptr = OpTypePointer Input %uvec3\n"
2843 		"\n"
2844 		"%i32      = OpTypeInt 32 1\n"
2845 		"%v2i32    = OpTypeVector %i32 2\n"
2846 		"%v4i32    = OpTypeVector %i32 4\n"
2847 		"\n"
2848 		"%f32      = OpTypeFloat 32\n"
2849 		"%v2f32    = OpTypeVector %f32 2\n"
2850 		"%v3f32    = OpTypeVector %f32 3\n"
2851 		"%v4f32    = OpTypeVector %f32 4\n"
2852 		"${types}\n"
2853 		"\n"
2854 		"%zero = OpConstant %i32 0\n"
2855 		"%c_i32_1 = OpConstant %i32 1\n"
2856 		"%c_i32_2 = OpConstant %i32 2\n"
2857 		"%c_i32_3 = OpConstant %i32 3\n"
2858 		"%c_i32_4 = OpConstant %i32 4\n"
2859 		"%c_i32_5 = OpConstant %i32 5\n"
2860 		"%c_i32_6 = OpConstant %i32 6\n"
2861 		"%c_i32_7 = OpConstant %i32 7\n"
2862 		"%c_i32_8 = OpConstant %i32 8\n"
2863 		"%c_i32_9 = OpConstant %i32 9\n"
2864 		"\n"
2865 		"%c_u32_1 = OpConstant %u32 1\n"
2866 		"%c_u32_3 = OpConstant %u32 3\n"
2867 		"%c_u32_7 = OpConstant %u32 7\n"
2868 		"%c_u32_11 = OpConstant %u32 11\n"
2869 		"\n"
2870 		"%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
2871 		"%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
2872 		"%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
2873 		"%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
2874 		"%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
2875 		"%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
2876 		"%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
2877 		"%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 %v2f16arr11 %f16 %v3f16arr11 %v4f16arr3\n"
2878 		"\n"
2879 		"%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
2880 		"%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
2881 		"%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
2882 		"%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
2883 		"%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
2884 		"%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
2885 		"%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
2886 		"%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 %v2f32arr11 %f32 %v3f32arr11 %v4f32arr3\n"
2887 		"\n"
2888 		"%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
2889 		"%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
2890 		"%SSBO_IN            = OpTypeStruct %f32StructArr7\n"
2891 		"%SSBO_OUT           = OpTypeStruct %f16StructArr7\n"
2892 		"%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
2893 		"%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
2894 		"%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
2895 		"%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
2896 		"\n"
2897 		"%id        = OpVariable %uvec3ptr Input\n"
2898 		"%main      = OpFunction %void None %voidf\n"
2899 		"%label     = OpLabel\n"
2900 		"\n"
2901 		"%idval     = OpLoad %uvec3 %id\n"
2902 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2903 		"%y         = OpCompositeExtract %u32 %idval 1\n"
2904 		"\n"
2905 		"%f32src  = OpAccessChain %f32ptr %ssboIN %zero %x %zero\n"
2906 		"%val_f32 = OpLoad %f32 %f32src\n"
2907 		"%val_f16 = OpFConvert %f16 %val_f32\n"
2908 		"%f16dst  = OpAccessChain %f16ptr %ssboOUT %zero %x %zero\n"
2909 		"OpStore %f16dst %val_f16\n"
2910 		"\n"
2911 		"%v2f32src  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_1\n"
2912 		"%val_v2f32 = OpLoad %v2f32 %v2f32src\n"
2913 		"%val_v2f16 = OpFConvert %v2f16 %val_v2f32\n"
2914 		"%v2f16dst  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_1\n"
2915 		"OpStore %v2f16dst %val_v2f16\n"
2916 		"\n"
2917 		"%v3f32src  = OpAccessChain %v3f32ptr %ssboIN %zero %x %c_i32_2\n"
2918 		"%val_v3f32 = OpLoad %v3f32 %v3f32src\n"
2919 		"%val_v3f16 = OpFConvert %v3f16 %val_v3f32\n"
2920 		"%v3f16dst  = OpAccessChain %v3f16ptr %ssboOUT %zero %x %c_i32_2\n"
2921 		"OpStore %v3f16dst %val_v3f16\n"
2922 		"\n"
2923 		"%v4f32src  = OpAccessChain %v4f32ptr %ssboIN %zero %x %c_i32_3\n"
2924 		"%val_v4f32 = OpLoad %v4f32 %v4f32src\n"
2925 		"%val_v4f16 = OpFConvert %v4f16 %val_v4f32\n"
2926 		"%v4f16dst  = OpAccessChain %v4f16ptr %ssboOUT %zero %x %c_i32_3\n"
2927 		"OpStore %v4f16dst %val_v4f16\n"
2928 		"\n"
2929 
2930 		//struct {f16, v2f16[3]}
2931 		"%Sf32src  = OpAccessChain %f32ptr %ssboIN %zero %x %c_i32_5 %y %zero\n"
2932 		"%Sval_f32 = OpLoad %f32 %Sf32src\n"
2933 		"%Sval_f16 = OpFConvert %f16 %Sval_f32\n"
2934 		"%Sf16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %x %c_i32_5 %y %zero\n"
2935 		"OpStore %Sf16dst2 %Sval_f16\n"
2936 		"\n"
2937 		"%Sv2f32src0   = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2938 		"%Sv2f32_0     = OpLoad %v2f32 %Sv2f32src0\n"
2939 		"%Sv2f16_0     = OpFConvert %v2f16 %Sv2f32_0\n"
2940 		"%Sv2f16dst_0  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2941 		"OpStore %Sv2f16dst_0 %Sv2f16_0\n"
2942 		"\n"
2943 		"%Sv2f32src1  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2944 		"%Sv2f32_1 = OpLoad %v2f32 %Sv2f32src1\n"
2945 		"%Sv2f16_1 = OpFConvert %v2f16 %Sv2f32_1\n"
2946 		"%Sv2f16dst_1  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2947 		"OpStore %Sv2f16dst_1 %Sv2f16_1\n"
2948 		"\n"
2949 		"%Sv2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2950 		"%Sv2f32_2 = OpLoad %v2f32 %Sv2f32src2\n"
2951 		"%Sv2f16_2 = OpFConvert %v2f16 %Sv2f32_2\n"
2952 		"%Sv2f16dst_2  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2953 		"OpStore %Sv2f16dst_2 %Sv2f16_2\n"
2954 		"\n"
2955 
2956 		"%v2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_6 %y\n"
2957 		"%val2_v2f32 = OpLoad %v2f32 %v2f32src2\n"
2958 		"%val2_v2f16 = OpFConvert %v2f16 %val2_v2f32\n"
2959 		"%v2f16dst2  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_6 %y\n"
2960 		"OpStore %v2f16dst2 %val2_v2f16\n"
2961 		"\n"
2962 		"%f32src2  = OpAccessChain %f32ptr %ssboIN %zero %x %c_i32_7\n"
2963 		"%val2_f32 = OpLoad %f32 %f32src2\n"
2964 		"%val2_f16 = OpFConvert %f16 %val2_f32\n"
2965 		"%f16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %x %c_i32_7\n"
2966 		"OpStore %f16dst2 %val2_f16\n"
2967 		"\n"
2968 		"%v3f32src2  = OpAccessChain %v3f32ptr %ssboIN %zero %x %c_i32_8 %y\n"
2969 		"%val2_v3f32 = OpLoad %v3f32 %v3f32src2\n"
2970 		"%val2_v3f16 = OpFConvert %v3f16 %val2_v3f32\n"
2971 		"%v3f16dst2  = OpAccessChain %v3f16ptr %ssboOUT %zero %x %c_i32_8 %y\n"
2972 		"OpStore %v3f16dst2 %val2_v3f16\n"
2973 		"\n"
2974 
2975 		//Array with 3 elements
2976 		"%LessThan3 = OpSLessThan %bool %y %c_i32_3\n"
2977 		"OpSelectionMerge %BlockIf None\n"
2978 		"OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
2979 		"  %LabelIf = OpLabel\n"
2980 		"  %f32src3  = OpAccessChain %f32ptr %ssboIN %zero %x %c_i32_4 %y\n"
2981 		"  %val3_f32 = OpLoad %f32 %f32src3\n"
2982 		"  %val3_f16 = OpFConvert %f16 %val3_f32\n"
2983 		"  %f16dst3  = OpAccessChain %f16ptr %ssboOUT %zero %x %c_i32_4 %y\n"
2984 		"  OpStore %f16dst3 %val3_f16\n"
2985 		"\n"
2986 		"  %v4f32src2  = OpAccessChain %v4f32ptr %ssboIN %zero %x %c_i32_9 %y\n"
2987 		"  %val2_v4f32 = OpLoad %v4f32 %v4f32src2\n"
2988 		"  %val2_v4f16 = OpFConvert %v4f16 %val2_v4f32\n"
2989 		"  %v4f16dst2  = OpAccessChain %v4f16ptr %ssboOUT %zero %x %c_i32_9 %y\n"
2990 		"  OpStore %v4f16dst2 %val2_v4f16\n"
2991 		"OpBranch %BlockIf\n"
2992 		"%BlockIf = OpLabel\n"
2993 
2994 		"   OpReturn\n"
2995 		"   OpFunctionEnd\n");
2996 
2997 	{  // Floats
2998 		vector<deFloat16>		float16Data		(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430), 0u);
2999 
3000 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3001 		{
3002 			ComputeShaderSpec		spec;
3003 			map<string, string>		specs;
3004 			string					testName		= string(CAPABILITIES[capIdx].name);
3005 			vector<float>			float32DData	= (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? data32bitStd430(rnd) : data32bitStd140(rnd);
3006 
3007 			specs["capability"]		= CAPABILITIES[capIdx].cap;
3008 			specs["storage"]		= CAPABILITIES[capIdx].decor;
3009 			specs["strideF16"]		= getStructShaderComponet(SHADERTEMPLATE_STRIDE16BIT_STD430);
3010 			specs["strideF32"]		= getStructShaderComponet((VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? SHADERTEMPLATE_STRIDE32BIT_STD430 : SHADERTEMPLATE_STRIDE32BIT_STD140);
3011 			specs["types"]			= getStructShaderComponet(SHADERTEMPLATE_TYPES);
3012 
3013 			spec.assembly			= shaderTemplate.specialize(specs);
3014 			spec.numWorkGroups		= IVec3(structData.structArraySize, structData.nestedArraySize, 1);
3015 			spec.verifyIO			= (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? computeCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD430, SHADERTEMPLATE_STRIDE16BIT_STD430> : computeCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD140, SHADERTEMPLATE_STRIDE16BIT_STD430>;
3016 
3017 			spec.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32DData)), CAPABILITIES[capIdx].dtype));
3018 			spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data))));
3019 			spec.extensions.push_back("VK_KHR_16bit_storage");
3020 			spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3021 
3022 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
3023 		}
3024 	}
3025 }
3026 
addCompute16bitStructMixedTypesGroup(tcu::TestCaseGroup * group)3027 void addCompute16bitStructMixedTypesGroup (tcu::TestCaseGroup* group)
3028 {
3029 	tcu::TestContext&		testCtx			= group->getTestContext();
3030 	de::Random				rnd				(deStringHash(group->getName()));
3031 	vector<deInt16>			outData			(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430), 0u);
3032 
3033 	const StringTemplate	shaderTemplate	(
3034 		"OpCapability Shader\n"
3035 		"OpCapability StorageUniformBufferBlock16\n"
3036 		"${capability}\n"
3037 		"OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n"
3038 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
3039 		"OpMemoryModel Logical GLSL450\n"
3040 		"OpEntryPoint GLCompute %main \"main\" %id\n"
3041 		"OpExecutionMode %main LocalSize 1 1 1\n"
3042 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3043 		"${OutOffsets}"
3044 		"${InOffsets}"
3045 		"\n"//SSBO IN
3046 		"OpMemberDecorate %SSBO_IN 0 Offset 0\n"
3047 		"OpDecorate %ssboIN DescriptorSet 0\n"
3048 		"OpDecorate %SSBO_IN ${storage}\n"
3049 		"OpDecorate %SSBO_OUT BufferBlock\n"
3050 		"OpDecorate %ssboIN Binding 0\n"
3051 		"\n"//SSBO OUT
3052 		"OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
3053 		"OpDecorate %ssboOUT DescriptorSet 0\n"
3054 		"OpDecorate %ssboOUT Binding 1\n"
3055 		"\n"//Types
3056 		"%void  = OpTypeVoid\n"
3057 		"%bool  = OpTypeBool\n"
3058 		"%i16   = OpTypeInt 16 1\n"
3059 		"%v2i16 = OpTypeVector %i16 2\n"
3060 		"%v3i16 = OpTypeVector %i16 3\n"
3061 		"%v4i16 = OpTypeVector %i16 4\n"
3062 		"%i32   = OpTypeInt 32 1\n"
3063 		"%v2i32 = OpTypeVector %i32 2\n"
3064 		"%v3i32 = OpTypeVector %i32 3\n"
3065 		"%v4i32 = OpTypeVector %i32 4\n"
3066 		"%u32   = OpTypeInt 32 0\n"
3067 		"%uvec3 = OpTypeVector %u32 3\n"
3068 		"%f32   = OpTypeFloat 32\n"
3069 		"%v4f32 = OpTypeVector %f32  4\n"
3070 		"%voidf = OpTypeFunction %void\n"
3071 		"\n"//Consta value
3072 		"%zero     = OpConstant %i32 0\n"
3073 		"%c_i32_1  = OpConstant %i32 1\n"
3074 		"%c_i32_2  = OpConstant %i32 2\n"
3075 		"%c_i32_3  = OpConstant %i32 3\n"
3076 		"%c_i32_4  = OpConstant %i32 4\n"
3077 		"%c_i32_5  = OpConstant %i32 5\n"
3078 		"%c_i32_6  = OpConstant %i32 6\n"
3079 		"%c_i32_7  = OpConstant %i32 7\n"
3080 		"%c_i32_8  = OpConstant %i32 8\n"
3081 		"%c_i32_9  = OpConstant %i32 9\n"
3082 		"%c_i32_10 = OpConstant %i32 10\n"
3083 		"%c_i32_11 = OpConstant %i32 11\n"
3084 		"%c_u32_1  = OpConstant %u32 1\n"
3085 		"%c_u32_7  = OpConstant %u32 7\n"
3086 		"%c_u32_11 = OpConstant %u32 11\n"
3087 		"\n"//Arrays & Structs
3088 		"%v2b16NestedArr11In  = OpTypeArray %v2i16 %c_u32_11\n"
3089 		"%b32NestedArr11In    = OpTypeArray %i32 %c_u32_11\n"
3090 		"%sb16Arr11In         = OpTypeArray %i16 %c_u32_11\n"
3091 		"%sb32Arr11In         = OpTypeArray %i32 %c_u32_11\n"
3092 		"%sNestedIn           = OpTypeStruct %i16 %i32 %v2b16NestedArr11In %b32NestedArr11In\n"
3093 		"%sNestedArr11In      = OpTypeArray %sNestedIn %c_u32_11\n"
3094 		"%structIn            = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11In %sb16Arr11In %sb32Arr11In\n"
3095 		"%structArr7In        = OpTypeArray %structIn %c_u32_7\n"
3096 		"%v2b16NestedArr11Out = OpTypeArray %v2i16 %c_u32_11\n"
3097 		"%b32NestedArr11Out   = OpTypeArray %i32 %c_u32_11\n"
3098 		"%sb16Arr11Out        = OpTypeArray %i16 %c_u32_11\n"
3099 		"%sb32Arr11Out        = OpTypeArray %i32 %c_u32_11\n"
3100 		"%sNestedOut          = OpTypeStruct %i16 %i32 %v2b16NestedArr11Out %b32NestedArr11Out\n"
3101 		"%sNestedArr11Out     = OpTypeArray %sNestedOut %c_u32_11\n"
3102 		"%structOut           = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11Out %sb16Arr11Out %sb32Arr11Out\n"
3103 		"%structArr7Out       = OpTypeArray %structOut %c_u32_7\n"
3104 		"\n"//Pointers
3105 		"%i16outPtr   = OpTypePointer Uniform %i16\n"
3106 		"%v2i16outPtr = OpTypePointer Uniform %v2i16\n"
3107 		"%v3i16outPtr = OpTypePointer Uniform %v3i16\n"
3108 		"%v4i16outPtr = OpTypePointer Uniform %v4i16\n"
3109 		"%i32outPtr   = OpTypePointer Uniform %i32\n"
3110 		"%v2i32outPtr = OpTypePointer Uniform %v2i32\n"
3111 		"%v3i32outPtr = OpTypePointer Uniform %v3i32\n"
3112 		"%v4i32outPtr = OpTypePointer Uniform %v4i32\n"
3113 		"%fp_i32      = OpTypePointer Function %i32\n"
3114 		"%uvec3ptr    = OpTypePointer Input %uvec3\n"
3115 		"\n"//SSBO IN
3116 		"%SSBO_IN    = OpTypeStruct %structArr7In\n"
3117 		"%up_SSBOIN  = OpTypePointer Uniform %SSBO_IN\n"
3118 		"%ssboIN     = OpVariable %up_SSBOIN Uniform\n"
3119 		"\n"//SSBO OUT
3120 		"%SSBO_OUT   = OpTypeStruct %structArr7Out\n"
3121 		"%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
3122 		"%ssboOUT    = OpVariable %up_SSBOOUT Uniform\n"
3123 		"\n"//MAIN
3124 		"%id      = OpVariable %uvec3ptr Input\n"
3125 		"%main    = OpFunction %void None %voidf\n"
3126 		"%label   = OpLabel\n"
3127 		"%ndxArrz = OpVariable %fp_i32  Function\n"
3128 		"%idval   = OpLoad %uvec3 %id\n"
3129 		"%x       = OpCompositeExtract %u32 %idval 0\n"
3130 		"%y       = OpCompositeExtract %u32 %idval 1\n"
3131 		"\n"//strutOut.b16 = strutIn.b16
3132 		"%inP1  = OpAccessChain %i16${inPtr} %ssboIN %zero %x %zero\n"
3133 		"%inV1  = OpLoad %i16 %inP1\n"
3134 		"%outP1 = OpAccessChain %i16outPtr %ssboOUT %zero %x %zero\n"
3135 		"OpStore %outP1 %inV1\n"
3136 		"\n"//strutOut.b32 = strutIn.b32
3137 		"%inP2  = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_1\n"
3138 		"%inV2  = OpLoad %i32 %inP2\n"
3139 		"%outP2 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_1\n"
3140 		"OpStore %outP2 %inV2\n"
3141 		"\n"//strutOut.v2b16 = strutIn.v2b16
3142 		"%inP3  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %x %c_i32_2\n"
3143 		"%inV3  = OpLoad %v2i16 %inP3\n"
3144 		"%outP3 = OpAccessChain %v2i16outPtr %ssboOUT %zero %x %c_i32_2\n"
3145 		"OpStore %outP3 %inV3\n"
3146 		"\n"//strutOut.v2b32 = strutIn.v2b32
3147 		"%inP4  = OpAccessChain %v2i32${inPtr} %ssboIN %zero %x %c_i32_3\n"
3148 		"%inV4  = OpLoad %v2i32 %inP4\n"
3149 		"%outP4 = OpAccessChain %v2i32outPtr %ssboOUT %zero %x %c_i32_3\n"
3150 		"OpStore %outP4 %inV4\n"
3151 		"\n"//strutOut.v3b16 = strutIn.v3b16
3152 		"%inP5  = OpAccessChain %v3i16${inPtr} %ssboIN %zero %x %c_i32_4\n"
3153 		"%inV5  = OpLoad %v3i16 %inP5\n"
3154 		"%outP5 = OpAccessChain %v3i16outPtr %ssboOUT %zero %x %c_i32_4\n"
3155 		"OpStore %outP5 %inV5\n"
3156 		"\n"//strutOut.v3b32 = strutIn.v3b32
3157 		"%inP6  = OpAccessChain %v3i32${inPtr} %ssboIN %zero %x %c_i32_5\n"
3158 		"%inV6  = OpLoad %v3i32 %inP6\n"
3159 		"%outP6 = OpAccessChain %v3i32outPtr %ssboOUT %zero %x %c_i32_5\n"
3160 		"OpStore %outP6 %inV6\n"
3161 		"\n"//strutOut.v4b16 = strutIn.v4b16
3162 		"%inP7  = OpAccessChain %v4i16${inPtr} %ssboIN %zero %x %c_i32_6\n"
3163 		"%inV7  = OpLoad %v4i16 %inP7\n"
3164 		"%outP7 = OpAccessChain %v4i16outPtr %ssboOUT %zero %x %c_i32_6\n"
3165 		"OpStore %outP7 %inV7\n"
3166 		"\n"//strutOut.v4b32 = strutIn.v4b32
3167 		"%inP8  = OpAccessChain %v4i32${inPtr} %ssboIN %zero %x %c_i32_7\n"
3168 		"%inV8  = OpLoad %v4i32 %inP8\n"
3169 		"%outP8 = OpAccessChain %v4i32outPtr %ssboOUT %zero %x %c_i32_7\n"
3170 		"OpStore %outP8 %inV8\n"
3171 		"\n"//strutOut.b16[y] = strutIn.b16[y]
3172 		"%inP9  = OpAccessChain %i16${inPtr} %ssboIN %zero %x %c_i32_9 %y\n"
3173 		"%inV9  = OpLoad %i16 %inP9\n"
3174 		"%outP9 = OpAccessChain %i16outPtr %ssboOUT %zero %x %c_i32_9 %y\n"
3175 		"OpStore %outP9 %inV9\n"
3176 		"\n"//strutOut.b32[y] = strutIn.b32[y]
3177 		"%inP10  = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_10 %y\n"
3178 		"%inV10  = OpLoad %i32 %inP10\n"
3179 		"%outP10 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_10 %y\n"
3180 		"OpStore %outP10 %inV10\n"
3181 		"\n"//strutOut.strutNestedOut[y].b16 = strutIn.strutNestedIn[y].b16
3182 		"%inP11 = OpAccessChain %i16${inPtr} %ssboIN %zero %x %c_i32_8 %y %zero\n"
3183 		"%inV11 = OpLoad %i16 %inP11\n"
3184 		"%outP11 = OpAccessChain %i16outPtr %ssboOUT %zero %x %c_i32_8 %y %zero\n"
3185 		"OpStore %outP11 %inV11\n"
3186 		"\n"//strutOut.strutNestedOut[y].b32 = strutIn.strutNestedIn[y].b32
3187 		"%inP12 = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_8 %y %c_i32_1\n"
3188 		"%inV12 = OpLoad %i32 %inP12\n"
3189 		"%outP12 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_8 %y %c_i32_1\n"
3190 		"OpStore %outP12 %inV12\n"
3191 		"\n"
3192 		"${zBeginLoop}"
3193 		"\n"//strutOut.strutNestedOut[y].v2b16[valNdx] = strutIn.strutNestedIn[y].v2b16[valNdx]
3194 		"%inP13  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %x %c_i32_8 %y %c_i32_2 %Valz\n"
3195 		"%inV13  = OpLoad %v2i16 %inP13\n"
3196 		"%outP13 = OpAccessChain %v2i16outPtr %ssboOUT %zero %x %c_i32_8 %y %c_i32_2 %Valz\n"
3197 		"OpStore %outP13 %inV13\n"
3198 		"\n"//strutOut.strutNestedOut[y].b32[valNdx] = strutIn.strutNestedIn[y].b32[valNdx]
3199 		"%inP14  = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_8 %y %c_i32_3 %Valz\n"
3200 		"%inV14  = OpLoad %i32 %inP14\n"
3201 		"%outP14 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_8 %y %c_i32_3 %Valz\n"
3202 		"OpStore %outP14 %inV14\n"
3203 		"\n${zEndLoop}\n"
3204 		"OpBranch %exitLabel\n"
3205 		"%exitLabel = OpLabel\n"
3206 		"OpReturn\n"
3207 		"OpFunctionEnd\n");
3208 
3209 	for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3210 	{  // int
3211 		const bool				isUniform	= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER == CAPABILITIES[capIdx].dtype;
3212 		vector<deInt16>			inData		= isUniform ? dataMixStd140(rnd) : dataMixStd430(rnd);
3213 		ComputeShaderSpec		spec;
3214 		map<string, string>		specsOffset;
3215 		map<string, string>		specsLoop;
3216 		map<string, string>		specs;
3217 		string					testName	= string(CAPABILITIES[capIdx].name);
3218 
3219 		specsLoop["exeCount"]	= "c_i32_11";
3220 		specsLoop["loopName"]	= "z";
3221 		specs["zBeginLoop"]		= beginLoop(specsLoop);
3222 		specs["zEndLoop"]		= endLoop(specsLoop);
3223 		specs["capability"]		= isUniform ? "OpCapability " + string(CAPABILITIES[capIdx].cap) : " ";
3224 		specs["inPtr"]			= "outPtr";
3225 		specs["storage"]		= isUniform ? "Block" : "BufferBlock";
3226 		specsOffset["InOut"]	= "In";
3227 		specs["InOffsets"]		= StringTemplate(isUniform ? getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD140) : getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430)).specialize(specsOffset);
3228 		specsOffset["InOut"]	= "Out";
3229 		specs["OutOffsets"]		= StringTemplate(getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430)).specialize(specsOffset);
3230 
3231 		spec.assembly					= shaderTemplate.specialize(specs);
3232 		spec.numWorkGroups				= IVec3(structData.structArraySize, structData.nestedArraySize, 1);
3233 		spec.verifyIO					= isUniform ? computeCheckStruct<deInt16, deInt16, SHADERTEMPLATE_STRIDEMIX_STD140, SHADERTEMPLATE_STRIDEMIX_STD430> : computeCheckStruct<deInt16, deInt16, SHADERTEMPLATE_STRIDEMIX_STD430, SHADERTEMPLATE_STRIDEMIX_STD430>;
3234 		spec.inputs.push_back			(Resource(BufferSp(new Int16Buffer(inData)), CAPABILITIES[capIdx].dtype));
3235 		spec.outputs.push_back			(Resource(BufferSp(new Int16Buffer(outData))));
3236 		spec.extensions.push_back		("VK_KHR_16bit_storage");
3237 		spec.requestedVulkanFeatures	= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3238 
3239 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
3240 	}
3241 }
3242 
addGraphics16BitStorageUniformFloat32To16Group(tcu::TestCaseGroup * testGroup)3243 void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGroup)
3244 {
3245 	de::Random							rnd					(deStringHash(testGroup->getName()));
3246 	map<string, string>					fragments;
3247 	vector<string>						extensions;
3248 	const deUint32						numDataPoints		= 256;
3249 	RGBA								defaultColors[4];
3250 	const vector<float>					float32Data			= getFloat32s(rnd, numDataPoints);
3251 	vector<float>						float32DataPadded;
3252 	vector<deFloat16>					float16DummyData	(numDataPoints, 0);
3253 	const StringTemplate				capabilities		("OpCapability ${cap}\n");
3254 
3255 	for (size_t dataIdx = 0; dataIdx < float32Data.size(); ++dataIdx)
3256 	{
3257 		float32DataPadded.push_back(float32Data[dataIdx]);
3258 		float32DataPadded.push_back(0.0f);
3259 		float32DataPadded.push_back(0.0f);
3260 		float32DataPadded.push_back(0.0f);
3261 	}
3262 
3263 	extensions.push_back("VK_KHR_16bit_storage");
3264 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
3265 
3266 	struct RndMode
3267 	{
3268 		const char*				name;
3269 		const char*				decor;
3270 		VerifyIOFunc			f;
3271 	};
3272 
3273 	getDefaultColors(defaultColors);
3274 
3275 	{  // scalar cases
3276 		fragments["pre_main"]				=
3277 			"      %f16 = OpTypeFloat 16\n"
3278 			"%c_i32_256 = OpConstant %i32 256\n"
3279 			"   %up_f32 = OpTypePointer Uniform %f32\n"
3280 			"   %up_f16 = OpTypePointer Uniform %f16\n"
3281 			"   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
3282 			"   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
3283 			"   %SSBO32 = OpTypeStruct %ra_f32\n"
3284 			"   %SSBO16 = OpTypeStruct %ra_f16\n"
3285 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3286 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3287 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3288 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3289 
3290 		const StringTemplate decoration		(
3291 			"OpDecorate %ra_f32 ArrayStride ${arraystride}\n"
3292 			"OpDecorate %ra_f16 ArrayStride 2\n"
3293 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
3294 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
3295 			"OpDecorate %SSBO32 ${indecor}\n"
3296 			"OpDecorate %SSBO16 BufferBlock\n"
3297 			"OpDecorate %ssbo32 DescriptorSet 0\n"
3298 			"OpDecorate %ssbo16 DescriptorSet 0\n"
3299 			"OpDecorate %ssbo32 Binding 0\n"
3300 			"OpDecorate %ssbo16 Binding 1\n"
3301 			"${rounddecor}\n");
3302 
3303 		fragments["testfun"]				=
3304 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
3305 			"    %param = OpFunctionParameter %v4f32\n"
3306 
3307 			"%entry = OpLabel\n"
3308 			"    %i = OpVariable %fp_i32 Function\n"
3309 			"         OpStore %i %c_i32_0\n"
3310 			"         OpBranch %loop\n"
3311 
3312 			" %loop = OpLabel\n"
3313 			"   %15 = OpLoad %i32 %i\n"
3314 			"   %lt = OpSLessThan %bool %15 %c_i32_256\n"
3315 			"         OpLoopMerge %merge %inc None\n"
3316 			"         OpBranchConditional %lt %write %merge\n"
3317 
3318 			"%write = OpLabel\n"
3319 			"   %30 = OpLoad %i32 %i\n"
3320 			"  %src = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
3321 			"%val32 = OpLoad %f32 %src\n"
3322 			"%val16 = OpFConvert %f16 %val32\n"
3323 			"  %dst = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
3324 			"         OpStore %dst %val16\n"
3325 			"         OpBranch %inc\n"
3326 
3327 			"  %inc = OpLabel\n"
3328 			"   %37 = OpLoad %i32 %i\n"
3329 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3330 			"         OpStore %i %39\n"
3331 			"         OpBranch %loop\n"
3332 
3333 			"%merge = OpLabel\n"
3334 			"         OpReturnValue %param\n"
3335 
3336 			"OpFunctionEnd\n";
3337 
3338 		const RndMode	rndModes[]			=
3339 		{
3340 			{"rtz",						"OpDecorate %val16  FPRoundingMode RTZ",	graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
3341 			{"rte",						"OpDecorate %val16  FPRoundingMode RTE",	graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
3342 			{"unspecified_rnd_mode",	"",											graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
3343 		};
3344 
3345 		const deUint32	arrayStrides[]		= {4, 16};
3346 
3347 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3348 			for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3349 			{
3350 				map<string, string>	specs;
3351 				string				testName	= string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
3352 				GraphicsResources	resources;
3353 				VulkanFeatures		features;
3354 
3355 				resources.inputs.push_back(Resource(BufferSp(new Float32Buffer(arrayStrides[capIdx] == 4 ? float32Data : float32DataPadded)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3356 				// We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
3357 				resources.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3358 
3359 				specs["cap"]					= CAPABILITIES[capIdx].cap;
3360 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
3361 				specs["arraystride"]			= de::toString(arrayStrides[capIdx]);
3362 				specs["rounddecor"]				= rndModes[rndModeIdx].decor;
3363 
3364 				fragments["capability"]			= capabilities.specialize(specs);
3365 				fragments["decoration"]			= decoration.specialize(specs);
3366 
3367 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
3368 				resources.verifyIO				= rndModes[rndModeIdx].f;
3369 
3370 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3371 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
3372 				features.coreFeatures.fragmentStoresAndAtomics			= true;
3373 
3374 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
3375 			}
3376 	}
3377 
3378 	// Non-scalar cases can use the same resources.
3379 	GraphicsResources	resources;
3380 	resources.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3381 	// We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
3382 	resources.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3383 
3384 	{  // vector cases
3385 		fragments["pre_main"]				=
3386 			"      %f16 = OpTypeFloat 16\n"
3387 			" %c_i32_64 = OpConstant %i32 64\n"
3388 			"	 %v4f16 = OpTypeVector %f16 4\n"
3389 			" %up_v4f32 = OpTypePointer Uniform %v4f32\n"
3390 			" %up_v4f16 = OpTypePointer Uniform %v4f16\n"
3391 			" %ra_v4f32 = OpTypeArray %v4f32 %c_i32_64\n"
3392 			" %ra_v4f16 = OpTypeArray %v4f16 %c_i32_64\n"
3393 			"   %SSBO32 = OpTypeStruct %ra_v4f32\n"
3394 			"   %SSBO16 = OpTypeStruct %ra_v4f16\n"
3395 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3396 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3397 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3398 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3399 
3400 		const StringTemplate decoration		(
3401 			"OpDecorate %ra_v4f32 ArrayStride 16\n"
3402 			"OpDecorate %ra_v4f16 ArrayStride 8\n"
3403 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
3404 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
3405 			"OpDecorate %SSBO32 ${indecor}\n"
3406 			"OpDecorate %SSBO16 BufferBlock\n"
3407 			"OpDecorate %ssbo32 DescriptorSet 0\n"
3408 			"OpDecorate %ssbo16 DescriptorSet 0\n"
3409 			"OpDecorate %ssbo32 Binding 0\n"
3410 			"OpDecorate %ssbo16 Binding 1\n"
3411 			"${rounddecor}\n");
3412 
3413 		// ssbo16[] <- convert ssbo32[] to 16bit float
3414 		fragments["testfun"]				=
3415 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
3416 			"    %param = OpFunctionParameter %v4f32\n"
3417 
3418 			"%entry = OpLabel\n"
3419 			"    %i = OpVariable %fp_i32 Function\n"
3420 			"         OpStore %i %c_i32_0\n"
3421 			"         OpBranch %loop\n"
3422 
3423 			" %loop = OpLabel\n"
3424 			"   %15 = OpLoad %i32 %i\n"
3425 			"   %lt = OpSLessThan %bool %15 %c_i32_64\n"
3426 			"         OpLoopMerge %merge %inc None\n"
3427 			"         OpBranchConditional %lt %write %merge\n"
3428 
3429 			"%write = OpLabel\n"
3430 			"   %30 = OpLoad %i32 %i\n"
3431 			"  %src = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30\n"
3432 			"%val32 = OpLoad %v4f32 %src\n"
3433 			"%val16 = OpFConvert %v4f16 %val32\n"
3434 			"  %dst = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30\n"
3435 			"         OpStore %dst %val16\n"
3436 			"         OpBranch %inc\n"
3437 
3438 			"  %inc = OpLabel\n"
3439 			"   %37 = OpLoad %i32 %i\n"
3440 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3441 			"         OpStore %i %39\n"
3442 			"         OpBranch %loop\n"
3443 
3444 			"%merge = OpLabel\n"
3445 			"         OpReturnValue %param\n"
3446 
3447 			"OpFunctionEnd\n";
3448 
3449 		const RndMode	rndModes[] =
3450 		{
3451 			{"rtz",						"OpDecorate %val16  FPRoundingMode RTZ",	graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
3452 			{"rte",						"OpDecorate %val16  FPRoundingMode RTE",	graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
3453 			{"unspecified_rnd_mode",	"",											graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
3454 		};
3455 
3456 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3457 			for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3458 			{
3459 				map<string, string>	specs;
3460 				VulkanFeatures		features;
3461 				string				testName	= string(CAPABILITIES[capIdx].name) + "_vector_float_" + rndModes[rndModeIdx].name;
3462 
3463 				specs["cap"]					= CAPABILITIES[capIdx].cap;
3464 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
3465 				specs["rounddecor"]				= rndModes[rndModeIdx].decor;
3466 
3467 				fragments["capability"]			= capabilities.specialize(specs);
3468 				fragments["decoration"]			= decoration.specialize(specs);
3469 
3470 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
3471 				resources.verifyIO				= rndModes[rndModeIdx].f;
3472 
3473 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3474 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
3475 				features.coreFeatures.fragmentStoresAndAtomics			= true;
3476 
3477 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
3478 			}
3479 	}
3480 
3481 	{  // matrix cases
3482 		fragments["pre_main"]				=
3483 			"       %f16 = OpTypeFloat 16\n"
3484 			"  %c_i32_16 = OpConstant %i32 16\n"
3485 			"     %v4f16 = OpTypeVector %f16 4\n"
3486 			"   %m4x4f32 = OpTypeMatrix %v4f32 4\n"
3487 			"   %m4x4f16 = OpTypeMatrix %v4f16 4\n"
3488 			"  %up_v4f32 = OpTypePointer Uniform %v4f32\n"
3489 			"  %up_v4f16 = OpTypePointer Uniform %v4f16\n"
3490 			"%a16m4x4f32 = OpTypeArray %m4x4f32 %c_i32_16\n"
3491 			"%a16m4x4f16 = OpTypeArray %m4x4f16 %c_i32_16\n"
3492 			"    %SSBO32 = OpTypeStruct %a16m4x4f32\n"
3493 			"    %SSBO16 = OpTypeStruct %a16m4x4f16\n"
3494 			" %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3495 			" %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3496 			"    %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3497 			"    %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3498 
3499 		const StringTemplate decoration		(
3500 			"OpDecorate %a16m4x4f32 ArrayStride 64\n"
3501 			"OpDecorate %a16m4x4f16 ArrayStride 32\n"
3502 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
3503 			"OpMemberDecorate %SSBO32 0 ColMajor\n"
3504 			"OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
3505 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
3506 			"OpMemberDecorate %SSBO16 0 ColMajor\n"
3507 			"OpMemberDecorate %SSBO16 0 MatrixStride 8\n"
3508 			"OpDecorate %SSBO32 ${indecor}\n"
3509 			"OpDecorate %SSBO16 BufferBlock\n"
3510 			"OpDecorate %ssbo32 DescriptorSet 0\n"
3511 			"OpDecorate %ssbo16 DescriptorSet 0\n"
3512 			"OpDecorate %ssbo32 Binding 0\n"
3513 			"OpDecorate %ssbo16 Binding 1\n"
3514 			"${rounddecor}\n");
3515 
3516 		fragments["testfun"]				=
3517 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
3518 			"    %param = OpFunctionParameter %v4f32\n"
3519 
3520 			"%entry = OpLabel\n"
3521 			"    %i = OpVariable %fp_i32 Function\n"
3522 			"         OpStore %i %c_i32_0\n"
3523 			"         OpBranch %loop\n"
3524 
3525 			" %loop = OpLabel\n"
3526 			"   %15 = OpLoad %i32 %i\n"
3527 			"   %lt = OpSLessThan %bool %15 %c_i32_16\n"
3528 			"         OpLoopMerge %merge %inc None\n"
3529 			"         OpBranchConditional %lt %write %merge\n"
3530 
3531 			"  %write = OpLabel\n"
3532 			"     %30 = OpLoad %i32 %i\n"
3533 			"  %src_0 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
3534 			"  %src_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
3535 			"  %src_2 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
3536 			"  %src_3 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
3537 			"%val32_0 = OpLoad %v4f32 %src_0\n"
3538 			"%val32_1 = OpLoad %v4f32 %src_1\n"
3539 			"%val32_2 = OpLoad %v4f32 %src_2\n"
3540 			"%val32_3 = OpLoad %v4f32 %src_3\n"
3541 			"%val16_0 = OpFConvert %v4f16 %val32_0\n"
3542 			"%val16_1 = OpFConvert %v4f16 %val32_1\n"
3543 			"%val16_2 = OpFConvert %v4f16 %val32_2\n"
3544 			"%val16_3 = OpFConvert %v4f16 %val32_3\n"
3545 			"  %dst_0 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
3546 			"  %dst_1 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
3547 			"  %dst_2 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
3548 			"  %dst_3 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
3549 			"           OpStore %dst_0 %val16_0\n"
3550 			"           OpStore %dst_1 %val16_1\n"
3551 			"           OpStore %dst_2 %val16_2\n"
3552 			"           OpStore %dst_3 %val16_3\n"
3553 			"           OpBranch %inc\n"
3554 
3555 			"  %inc = OpLabel\n"
3556 			"   %37 = OpLoad %i32 %i\n"
3557 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3558 			"         OpStore %i %39\n"
3559 			"         OpBranch %loop\n"
3560 
3561 			"%merge = OpLabel\n"
3562 			"         OpReturnValue %param\n"
3563 
3564 			"OpFunctionEnd\n";
3565 
3566 		const RndMode	rndModes[] =
3567 		{
3568 			{"rte",						"OpDecorate %val16_0  FPRoundingMode RTE\nOpDecorate %val16_1  FPRoundingMode RTE\nOpDecorate %val16_2  FPRoundingMode RTE\nOpDecorate %val16_3  FPRoundingMode RTE",	graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
3569 			{"rtz",						"OpDecorate %val16_0  FPRoundingMode RTZ\nOpDecorate %val16_1  FPRoundingMode RTZ\nOpDecorate %val16_2  FPRoundingMode RTZ\nOpDecorate %val16_3  FPRoundingMode RTZ",	graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
3570 			{"unspecified_rnd_mode",	"",																																										graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
3571 		};
3572 
3573 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3574 			for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3575 			{
3576 				map<string, string>	specs;
3577 				VulkanFeatures		features;
3578 				string				testName	= string(CAPABILITIES[capIdx].name) + "_matrix_float_" + rndModes[rndModeIdx].name;
3579 
3580 				specs["cap"]					= CAPABILITIES[capIdx].cap;
3581 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
3582 				specs["rounddecor"]				= rndModes[rndModeIdx].decor;
3583 
3584 				fragments["capability"]			= capabilities.specialize(specs);
3585 				fragments["decoration"]			= decoration.specialize(specs);
3586 
3587 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
3588 				resources.verifyIO				= rndModes[rndModeIdx].f;
3589 
3590 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3591 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
3592 				features.coreFeatures.fragmentStoresAndAtomics			= true;
3593 
3594 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
3595 			}
3596 	}
3597 }
3598 
addGraphics16BitStorageInputOutputFloat32To16Group(tcu::TestCaseGroup * testGroup)3599 void addGraphics16BitStorageInputOutputFloat32To16Group (tcu::TestCaseGroup* testGroup)
3600 {
3601 	de::Random			rnd					(deStringHash(testGroup->getName()));
3602 	RGBA				defaultColors[4];
3603 	vector<string>		extensions;
3604 	map<string, string>	fragments			= passthruFragments();
3605 	const deUint32		numDataPoints		= 64;
3606 	vector<float>		float32Data			= getFloat32s(rnd, numDataPoints);
3607 
3608 	extensions.push_back("VK_KHR_16bit_storage");
3609 
3610 	fragments["capability"]				= "OpCapability StorageInputOutput16\n";
3611 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"\n";
3612 
3613 	getDefaultColors(defaultColors);
3614 
3615 	struct RndMode
3616 	{
3617 		const char*				name;
3618 		const char*				decor;
3619 		const char*				decor_tessc;
3620 		RoundingModeFlags		flags;
3621 	};
3622 
3623 	const RndMode		rndModes[]		=
3624 	{
3625 		{"rtz",
3626 		 "OpDecorate %ret0  FPRoundingMode RTZ\n",
3627 		 "OpDecorate %ret1  FPRoundingMode RTZ\n"
3628 		 "OpDecorate %ret2  FPRoundingMode RTZ\n",
3629 		 ROUNDINGMODE_RTZ},
3630 		{"rte",
3631 		 "OpDecorate %ret0  FPRoundingMode RTE\n",
3632 		 "OpDecorate %ret1  FPRoundingMode RTE\n"
3633 		 "OpDecorate %ret2  FPRoundingMode RTE\n",
3634 		  ROUNDINGMODE_RTE},
3635 		{"unspecified_rnd_mode",	"",		"",			RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)},
3636 	};
3637 
3638 	struct Case
3639 	{
3640 		const char*	name;
3641 		const char*	interfaceOpCall;
3642 		const char*	interfaceOpFunc;
3643 		const char* postInterfaceOp;
3644 		const char* postInterfaceOpGeom;
3645 		const char* postInterfaceOpTessc;
3646 		const char*	preMain;
3647 		const char*	inputType;
3648 		const char*	outputType;
3649 		deUint32	numPerCase;
3650 		deUint32	numElements;
3651 	};
3652 
3653 	const Case	cases[]		=
3654 	{
3655 		{ // Scalar cases
3656 			"scalar",
3657 			"OpFConvert %f16",
3658 			"",
3659 
3660 			"             %ret0 = OpFConvert %f16 %IF_input_val\n"
3661 			"                OpStore %IF_output %ret0\n",
3662 
3663 			"             %ret0 = OpFConvert %f16 %IF_input_val0\n"
3664 			"                OpStore %IF_output %ret0\n",
3665 
3666 			"             %ret0 = OpFConvert %f16 %IF_input_val0\n"
3667 			"                OpStore %IF_output_ptr0 %ret0\n"
3668 			"             %ret1 = OpFConvert %f16 %IF_input_val1\n"
3669 			"                OpStore %IF_output_ptr1 %ret1\n"
3670 			"             %ret2 = OpFConvert %f16 %IF_input_val2\n"
3671 			"                OpStore %IF_output_ptr2 %ret2\n",
3672 
3673 			"             %f16 = OpTypeFloat 16\n"
3674 			"          %op_f16 = OpTypePointer Output %f16\n"
3675 			"           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
3676 			"        %op_a3f16 = OpTypePointer Output %a3f16\n"
3677 			"%f16_f32_function = OpTypeFunction %f16 %f32\n"
3678 			"           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
3679 			"        %ip_a3f32 = OpTypePointer Input %a3f32\n",
3680 
3681 			"f32",
3682 			"f16",
3683 			4,
3684 			1,
3685 		},
3686 		{ // Vector cases
3687 			"vector",
3688 
3689 			"OpFConvert %v2f16",
3690 			"",
3691 
3692 			"             %ret0 = OpFConvert %v2f16 %IF_input_val\n"
3693 			"                OpStore %IF_output %ret0\n",
3694 
3695 			"             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
3696 			"                OpStore %IF_output %ret0\n",
3697 
3698 			"             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
3699 			"                OpStore %IF_output_ptr0 %ret0\n"
3700 			"             %ret1 = OpFConvert %v2f16 %IF_input_val1\n"
3701 			"                OpStore %IF_output_ptr1 %ret1\n"
3702 			"             %ret2 = OpFConvert %v2f16 %IF_input_val2\n"
3703 			"                OpStore %IF_output_ptr2 %ret2\n",
3704 
3705 			"                 %f16 = OpTypeFloat 16\n"
3706 			"               %v2f16 = OpTypeVector %f16 2\n"
3707 			"            %op_v2f16 = OpTypePointer Output %v2f16\n"
3708 			"             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
3709 			"          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n"
3710 			"%v2f16_v2f32_function = OpTypeFunction %v2f16 %v2f32\n"
3711 			"             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
3712 			"          %ip_a3v2f32 = OpTypePointer Input %a3v2f32\n",
3713 
3714 			"v2f32",
3715 			"v2f16",
3716 			2 * 4,
3717 			2,
3718 		}
3719 	};
3720 
3721 	VulkanFeatures	requiredFeatures;
3722 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
3723 
3724 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
3725 		for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3726 		{
3727 			fragments["interface_op_call"]			= cases[caseIdx].interfaceOpCall;
3728 			fragments["interface_op_func"]			= cases[caseIdx].interfaceOpFunc;
3729 			fragments["post_interface_op_frag"]		= cases[caseIdx].postInterfaceOp;
3730 			fragments["post_interface_op_vert"]		= cases[caseIdx].postInterfaceOp;
3731 			fragments["post_interface_op_geom"]		= cases[caseIdx].postInterfaceOpGeom;
3732 			fragments["post_interface_op_tesse"]	= cases[caseIdx].postInterfaceOpGeom;
3733 			fragments["post_interface_op_tessc"]	= cases[caseIdx].postInterfaceOpTessc;
3734 			fragments["pre_main"]					= cases[caseIdx].preMain;
3735 			fragments["decoration"]					= rndModes[rndModeIdx].decor;
3736 			fragments["decoration_tessc"]			= rndModes[rndModeIdx].decor_tessc;
3737 
3738 			fragments["input_type"]					= cases[caseIdx].inputType;
3739 			fragments["output_type"]				= cases[caseIdx].outputType;
3740 
3741 			GraphicsInterfaces	interfaces;
3742 			const deUint32		numPerCase	= cases[caseIdx].numPerCase;
3743 			vector<float>		subInputs	(numPerCase);
3744 			vector<deFloat16>	subOutputs	(numPerCase);
3745 
3746 			// The pipeline need this to call compare16BitFloat() when checking the result.
3747 			interfaces.setRoundingMode(rndModes[rndModeIdx].flags);
3748 
3749 			for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
3750 			{
3751 				string		testName	= string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
3752 
3753 				for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
3754 				{
3755 					subInputs[numNdx]	= float32Data[caseNdx * numPerCase + numNdx];
3756 					// We derive the expected result from inputs directly in the graphics pipeline.
3757 					subOutputs[numNdx]	= 0;
3758 				}
3759 				interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subInputs))),
3760 										  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subOutputs))));
3761 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
3762 			}
3763 		}
3764 }
3765 
addGraphics16BitStorageInputOutputFloat16To32Group(tcu::TestCaseGroup * testGroup)3766 void addGraphics16BitStorageInputOutputFloat16To32Group (tcu::TestCaseGroup* testGroup)
3767 {
3768 	de::Random				rnd					(deStringHash(testGroup->getName()));
3769 	RGBA					defaultColors[4];
3770 	vector<string>			extensions;
3771 	map<string, string>		fragments			= passthruFragments();
3772 	const deUint32			numDataPoints		= 64;
3773 	vector<deFloat16>		float16Data			(getFloat16s(rnd, numDataPoints));
3774 	vector<float>			float32Data;
3775 
3776 	float32Data.reserve(numDataPoints);
3777 	for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
3778 		float32Data.push_back(deFloat16To32(float16Data[numIdx]));
3779 
3780 	extensions.push_back("VK_KHR_16bit_storage");
3781 
3782 	fragments["capability"]				= "OpCapability StorageInputOutput16\n";
3783 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"\n";
3784 
3785 	getDefaultColors(defaultColors);
3786 
3787 	struct Case
3788 	{
3789 		const char*	name;
3790 		const char*	interfaceOpCall;
3791 		const char*	interfaceOpFunc;
3792 		const char*	preMain;
3793 		const char*	inputType;
3794 		const char*	outputType;
3795 		deUint32	numPerCase;
3796 		deUint32	numElements;
3797 	};
3798 
3799 	Case	cases[]		=
3800 	{
3801 		{ // Scalar cases
3802 			"scalar",
3803 
3804 			"OpFConvert %f32",
3805 			"",
3806 
3807 			"             %f16 = OpTypeFloat 16\n"
3808 			"          %ip_f16 = OpTypePointer Input %f16\n"
3809 			"           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
3810 			"        %ip_a3f16 = OpTypePointer Input %a3f16\n"
3811 			"%f32_f16_function = OpTypeFunction %f32 %f16\n"
3812 			"           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
3813 			"        %op_a3f32 = OpTypePointer Output %a3f32\n",
3814 
3815 			"f16",
3816 			"f32",
3817 			4,
3818 			1,
3819 		},
3820 		{ // Vector cases
3821 			"vector",
3822 
3823 			"OpFConvert %v2f32",
3824 			"",
3825 
3826 			"                 %f16 = OpTypeFloat 16\n"
3827 			"		        %v2f16 = OpTypeVector %f16 2\n"
3828 			"            %ip_v2f16 = OpTypePointer Input %v2f16\n"
3829 			"             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
3830 			"          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
3831 			"%v2f32_v2f16_function = OpTypeFunction %v2f32 %v2f16\n"
3832 			"             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
3833 			"          %op_a3v2f32 = OpTypePointer Output %a3v2f32\n",
3834 
3835 			"v2f16",
3836 			"v2f32",
3837 			2 * 4,
3838 			2,
3839 		}
3840 	};
3841 
3842 	VulkanFeatures	requiredFeatures;
3843 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
3844 
3845 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
3846 	{
3847 		fragments["interface_op_call"]  = cases[caseIdx].interfaceOpCall;
3848 		fragments["interface_op_func"]	= cases[caseIdx].interfaceOpFunc;
3849 		fragments["pre_main"]			= cases[caseIdx].preMain;
3850 
3851 		fragments["input_type"]			= cases[caseIdx].inputType;
3852 		fragments["output_type"]		= cases[caseIdx].outputType;
3853 
3854 		GraphicsInterfaces	interfaces;
3855 		const deUint32		numPerCase	= cases[caseIdx].numPerCase;
3856 		vector<deFloat16>	subInputs	(numPerCase);
3857 		vector<float>		subOutputs	(numPerCase);
3858 
3859 		for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
3860 		{
3861 			string			testName	= string(cases[caseIdx].name) + numberToString(caseNdx);
3862 
3863 			for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
3864 			{
3865 				subInputs[numNdx]	= float16Data[caseNdx * numPerCase + numNdx];
3866 				subOutputs[numNdx]	= float32Data[caseNdx * numPerCase + numNdx];
3867 			}
3868 			interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subInputs))),
3869 									  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subOutputs))));
3870 			createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
3871 		}
3872 	}
3873 }
3874 
addGraphics16BitStorageInputOutputFloat16To16Group(tcu::TestCaseGroup * testGroup)3875 void addGraphics16BitStorageInputOutputFloat16To16Group (tcu::TestCaseGroup* testGroup)
3876 {
3877 	de::Random			rnd					(deStringHash(testGroup->getName()));
3878 	RGBA				defaultColors[4];
3879 	vector<string>		extensions;
3880 	map<string, string>	fragments			= passthruFragments();
3881 	const deUint32		numDataPoints		= 64;
3882 	vector<deFloat16>	float16Data			(getFloat16s(rnd, numDataPoints));
3883 	VulkanFeatures		requiredFeatures;
3884 
3885 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
3886 	extensions.push_back("VK_KHR_16bit_storage");
3887 
3888 	fragments["capability"]					= "OpCapability StorageInputOutput16\n";
3889 	fragments["extension"]					= "OpExtension \"SPV_KHR_16bit_storage\"\n";
3890 
3891 	getDefaultColors(defaultColors);
3892 
3893 	struct Case
3894 	{
3895 		const char*	name;
3896 		const char*	interfaceOpCall;
3897 		const char*	interfaceOpFunc;
3898 		const char*	preMain;
3899 		const char*	inputType;
3900 		const char*	outputType;
3901 		deUint32	numPerCase;
3902 		deUint32	numElements;
3903 	};
3904 
3905 	Case				cases[]				=
3906 	{
3907 		{ // Scalar cases
3908 			"scalar",
3909 
3910 			"OpCopyObject %f16",
3911 			"",
3912 
3913 			"             %f16 = OpTypeFloat 16\n"
3914 			"          %ip_f16 = OpTypePointer Input %f16\n"
3915 			"           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
3916 			"        %ip_a3f16 = OpTypePointer Input %a3f16\n"
3917 			"%f16_f16_function = OpTypeFunction %f16 %f16\n"
3918 			"          %op_f16 = OpTypePointer Output %f16\n"
3919 			"        %op_a3f16 = OpTypePointer Output %a3f16\n",
3920 
3921 			"f16",
3922 			"f16",
3923 			4,
3924 			1,
3925 		},
3926 		{ // Vector cases
3927 			"vector",
3928 
3929 			"OpCopyObject %v2f16",
3930 			"",
3931 
3932 			"                 %f16 = OpTypeFloat 16\n"
3933 			"               %v2f16 = OpTypeVector %f16 2\n"
3934 			"            %ip_v2f16 = OpTypePointer Input %v2f16\n"
3935 			"             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
3936 			"          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
3937 			"%v2f16_v2f16_function = OpTypeFunction %v2f16 %v2f16\n"
3938 			"            %op_v2f16 = OpTypePointer Output %v2f16\n"
3939 			"          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n",
3940 
3941 			"v2f16",
3942 			"v2f16",
3943 			2 * 4,
3944 			2,
3945 		}
3946 	};
3947 
3948 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
3949 	{
3950 		fragments["interface_op_call"]			= cases[caseIdx].interfaceOpCall;
3951 		fragments["interface_op_func"]			= cases[caseIdx].interfaceOpFunc;
3952 		fragments["pre_main"]					= cases[caseIdx].preMain;
3953 
3954 		fragments["input_type"]					= cases[caseIdx].inputType;
3955 		fragments["output_type"]				= cases[caseIdx].outputType;
3956 
3957 		GraphicsInterfaces	interfaces;
3958 		const deUint32		numPerCase			= cases[caseIdx].numPerCase;
3959 		vector<deFloat16>	subInputsOutputs	(numPerCase);
3960 
3961 		for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
3962 		{
3963 			string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
3964 
3965 			for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
3966 				subInputsOutputs[numNdx] = float16Data[caseNdx * numPerCase + numNdx];
3967 
3968 			interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subInputsOutputs))),
3969 									  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subInputsOutputs))));
3970 
3971 			createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
3972 		}
3973 	}
3974 }
3975 
addShaderCode16BitStorageInputOutput16To16x2(vk::SourceCollections & dst,TestDefinition def)3976 void addShaderCode16BitStorageInputOutput16To16x2 (vk::SourceCollections& dst, TestDefinition def)
3977 {
3978 	SpirvVersion			targetSpirvVersion	= def.instanceContext.resources.spirvVersion;
3979 	const deUint32			vulkanVersion		= dst.usedVulkanVersion;
3980 	map<string, string>		spec;
3981 
3982 	switch(def.dataType)
3983 	{
3984 		case DATATYPE_FLOAT:
3985 			spec["type"]			= "f";
3986 			spec["convert"]			= "OpFConvert";
3987 			spec["scale"]			= "%x = OpCopyObject %f32 %dataIn0_converted\n%y = OpCopyObject %f32 %dataIn1_converted\n";
3988 			spec["colorConstruct"]	= "OpCompositeConstruct %v4f32 %x %y %c_f32_1 %c_f32_1";
3989 			spec["interpolation0"]	= spec["interpolation1"] = "";
3990 			break;
3991 
3992 		case DATATYPE_VEC2:
3993 			spec["type"]			= "v2f";
3994 			spec["convert"]			= "OpFConvert";
3995 			spec["scale"]			= "%xy = OpCopyObject %v2f32 %dataIn0_converted\n%zw = OpCopyObject %v2f32 %dataIn1_converted\n";
3996 			spec["colorConstruct"]	= "OpCompositeConstruct %v4f32 %xy %zw";
3997 			spec["interpolation0"]	= spec["interpolation1"] = "";
3998 			break;
3999 
4000 		case DATATYPE_INT:
4001 			spec["type"]			= "i";
4002 			spec["convert"]			= "OpSConvert";
4003 			spec["scale"]			= "%x_unscaled = OpConvertSToF %f32 %dataIn0_converted\n%x = OpFDiv %f32 %x_unscaled %scale_f32\n%y_unscaled = OpConvertSToF %f32 %dataIn1_converted\n%y = OpFDiv %f32 %y_unscaled %scale_f32\n";
4004 			spec["colorConstruct"]	= "OpCompositeConstruct %v4f32 %x %y %c_f32_1 %c_f32_1";
4005 			spec["interpolation0"]	= "OpDecorate %dataIn0 Flat";
4006 			spec["interpolation1"]	= "OpDecorate %dataIn1 Flat";
4007 			break;
4008 
4009 		case DATATYPE_UINT:
4010 			spec["type"]			= "u";
4011 			spec["convert"]			= "OpUConvert";
4012 			spec["scale"]			= "%x_unscaled = OpConvertUToF %f32 %dataIn0_converted\n%x = OpFDiv %f32 %x_unscaled %scale_f32\n%y_unscaled = OpConvertUToF %f32 %dataIn1_converted\n%y = OpFDiv %f32 %y_unscaled %scale_f32\n";
4013 			spec["colorConstruct"]	= "OpCompositeConstruct %v4f32 %x %y %c_f32_1 %c_f32_1";
4014 			spec["interpolation0"]	= "OpDecorate %dataIn0 Flat";
4015 			spec["interpolation1"]	= "OpDecorate %dataIn1 Flat";
4016 			break;
4017 
4018 		case DATATYPE_IVEC2:
4019 			spec["type"]			= "v2i";
4020 			spec["convert"]			= "OpSConvert";
4021 			spec["scale"]			= "%xy_unscaled = OpConvertSToF %v2f32 %dataIn0_converted\n%xy = OpFDiv %v2f32 %xy_unscaled %scale_v2f32\n%zw_unscaled = OpConvertSToF %v2f32 %dataIn1_converted\n%zw = OpFDiv %v2f32 %zw_unscaled %scale_v2f32\n";
4022 			spec["colorConstruct"]	= "OpCompositeConstruct %v4f32 %xy %zw";
4023 			spec["interpolation0"]	= "OpDecorate %dataIn0 Flat";
4024 			spec["interpolation1"]	= "OpDecorate %dataIn1 Flat";
4025 			break;
4026 
4027 		case DATATYPE_UVEC2:
4028 			spec["type"]			= "v2u";
4029 			spec["convert"]			= "OpUConvert";
4030 			spec["scale"]			= "%xy_unscaled = OpConvertUToF %v2f32 %dataIn0_converted\n%xy = OpFDiv %v2f32 %xy_unscaled %scale_v2f32\n%zw_unscaled = OpConvertUToF %v2f32 %dataIn1_converted\n%zw = OpFDiv %v2f32 %zw_unscaled %scale_v2f32\n";
4031 			spec["colorConstruct"]	= "OpCompositeConstruct %v4f32 %xy %zw";
4032 			spec["interpolation0"]	= "OpDecorate %dataIn0 Flat";
4033 			spec["interpolation1"]	= "OpDecorate %dataIn1 Flat";
4034 			break;
4035 
4036 		default:
4037 			DE_FATAL("Unexpected data type");
4038 			break;
4039 	};
4040 
4041 	// Read input data from binding 1, location 2. Should have value(s) of 0.5 in 16bit float or 32767 in 16bit int.
4042 	// Store the value to two outputs (dataOut0 and 1).
4043 	StringTemplate			vertexShader		(
4044 		"                             OpCapability Shader\n"
4045 		"                             OpCapability StorageInputOutput16\n"
4046 		"                             OpExtension \"SPV_KHR_16bit_storage\"\n"
4047 		"                        %1 = OpExtInstImport \"GLSL.std.450\"\n"
4048 		"                             OpMemoryModel Logical GLSL450\n"
4049 		"                             OpEntryPoint Vertex %main \"main\" %_ %position %vtxColor %dataIn %color %dataOut0 %dataOut1\n"
4050 		"                             OpSource GLSL 430\n"
4051 		"                             OpMemberDecorate %gl_PerVertex 0 BuiltIn Position\n"
4052 		"                             OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize\n"
4053 		"                             OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance\n"
4054 		"                             OpDecorate %gl_PerVertex Block\n"
4055 		"                             OpDecorate %position Location 0\n"
4056 		"                             OpDecorate %vtxColor Location 1\n"
4057 		"                             OpDecorate %dataIn Binding 1\n"
4058 		"                             OpDecorate %dataIn Location 2\n"
4059 		"                             OpDecorate %color Location 1\n"
4060 		"                             OpDecorate %dataOut0 Location 2\n"
4061 		"                             OpDecorate %dataOut1 Location 3\n"
4062 		"                     %void = OpTypeVoid\n"
4063 		"                %void_func = OpTypeFunction %void\n"
4064 		"                      %f32 = OpTypeFloat 32\n"
4065 		"                      %f16 = OpTypeFloat 16\n"
4066 		"                      %i32 = OpTypeInt 32 1\n"
4067 		"                      %i16 = OpTypeInt 16 1\n"
4068 		"                      %u32 = OpTypeInt 32 0\n"
4069 		"                      %u16 = OpTypeInt 16 0\n"
4070 		"                    %v4f32 = OpTypeVector %f32 4\n"
4071 		"                    %v2f32 = OpTypeVector %f32 2\n"
4072 		"                    %v2f16 = OpTypeVector %f16 2\n"
4073 		"                    %v2i32 = OpTypeVector %i32 2\n"
4074 		"                    %v2i16 = OpTypeVector %i16 2\n"
4075 		"                    %v2u32 = OpTypeVector %u32 2\n"
4076 		"                    %v2u16 = OpTypeVector %u16 2\n"
4077 		"                    %u32_0 = OpConstant %u32 0\n"
4078 		"                    %u32_1 = OpConstant %u32 1\n"
4079 		"           %_arr_f32_u32_1 = OpTypeArray %f32 %u32_1\n"
4080 		"             %gl_PerVertex = OpTypeStruct %v4f32 %f32 %_arr_f32_u32_1\n"
4081 		" %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex\n"
4082 		"        %_ptr_Output_v4f32 = OpTypePointer Output %v4f32\n"
4083 		"    %_ptr_Output_${type}16 = OpTypePointer Output %${type}16\n"
4084 		"     %_ptr_Input_${type}16 = OpTypePointer Input %${type}16\n"
4085 		"         %_ptr_Input_v4f32 = OpTypePointer Input %v4f32\n"
4086 		"                        %_ = OpVariable %_ptr_Output_gl_PerVertex Output\n"
4087 		"                   %dataIn = OpVariable %_ptr_Input_${type}16 Input\n"
4088 		"                 %position = OpVariable %_ptr_Input_v4f32 Input\n"
4089 		"                    %color = OpVariable %_ptr_Input_v4f32 Input\n"
4090 		"                 %vtxColor = OpVariable %_ptr_Output_v4f32 Output\n"
4091 		"                 %dataOut0 = OpVariable %_ptr_Output_${type}16 Output\n"
4092 		"                 %dataOut1 = OpVariable %_ptr_Output_${type}16 Output\n"
4093 		"                     %main = OpFunction %void None %void_func\n"
4094 		"                    %entry = OpLabel\n"
4095 		"                  %posData = OpLoad %v4f32 %position\n"
4096 		"             %posOutputPtr = OpAccessChain %_ptr_Output_v4f32 %_ %u32_0\n"
4097 		"                             OpStore %posOutputPtr %posData\n"
4098 		"                %colorData = OpLoad %v4f32 %color\n"
4099 		"                             OpStore %vtxColor %colorData\n"
4100 		"                        %d = OpLoad %${type}16 %dataIn\n"
4101 		"                             OpStore %dataOut0 %d\n"
4102 		"                             OpStore %dataOut1 %d\n"
4103 		"                             OpReturn\n"
4104 		"                             OpFunctionEnd\n");
4105 
4106 	// Scalar:
4107 	// Read two 16bit values from vertex shader. Convert to 32bit and store as
4108 	// fragment color of (val0, val1, 1.0, 1.0). Val0 and 1 should equal to 0.5.
4109 	// Vector:
4110 	// Read two 16bit vec2s from vertex shader. Convert to 32bit and store as
4111 	// fragment color of (val0.x, val0.y, val1.x, val1.y). Val0 and 1 should equal to (0.5, 0.5).
4112 	StringTemplate			fragmentShader		(
4113 		"                             OpCapability Shader\n"
4114 		"                             OpCapability StorageInputOutput16\n"
4115 		"                             OpExtension \"SPV_KHR_16bit_storage\"\n"
4116 		"                        %1 = OpExtInstImport \"GLSL.std.450\"\n"
4117 		"                             OpMemoryModel Logical GLSL450\n"
4118 		"                             OpEntryPoint Fragment %main \"main\" %fragColor %dataOut %vtxColor %dataIn0 %dataIn1\n"
4119 		"                             OpExecutionMode %main OriginUpperLeft\n"
4120 		"                             OpSource GLSL 430\n"
4121 		"                             OpDecorate %vtxColor Location 1\n"
4122 		"                             OpDecorate %dataIn0 Location 2\n"
4123 		"                             OpDecorate %dataIn1 Location 3\n"
4124 		"                             ${interpolation0}\n"
4125 		"                             ${interpolation1}\n"
4126 		"                             OpDecorate %fragColor Location 0\n"
4127 		"                             OpDecorate %dataOut Location 1\n"
4128 		"                     %void = OpTypeVoid\n"
4129 		"                %void_func = OpTypeFunction %void\n"
4130 		"                      %f32 = OpTypeFloat 32\n"
4131 		"                      %f16 = OpTypeFloat 16\n"
4132 		"                      %i32 = OpTypeInt 32 1\n"
4133 		"                      %i16 = OpTypeInt 16 1\n"
4134 		"                      %u32 = OpTypeInt 32 0\n"
4135 		"                      %u16 = OpTypeInt 16 0\n"
4136 		"                    %v2f32 = OpTypeVector %f32 2\n"
4137 		"                    %v2f16 = OpTypeVector %f16 2\n"
4138 		"                    %v4f32 = OpTypeVector %f32 4\n"
4139 		"                    %v2i32 = OpTypeVector %i32 2\n"
4140 		"                    %v2i16 = OpTypeVector %i16 2\n"
4141 		"                    %v2u32 = OpTypeVector %u32 2\n"
4142 		"                    %v2u16 = OpTypeVector %u16 2\n"
4143 		"        %_ptr_Output_v4f32 = OpTypePointer Output %v4f32\n"
4144 		"    %_ptr_Output_${type}16 = OpTypePointer Output %${type}16\n"
4145 		"                %fragColor = OpVariable %_ptr_Output_v4f32 Output\n"
4146 		"                  %dataOut = OpVariable %_ptr_Output_${type}16 Output\n"
4147 		"     %_ptr_Input_${type}16 = OpTypePointer Input %${type}16\n"
4148 		"         %_ptr_Input_v4f32 = OpTypePointer Input %v4f32\n"
4149 		"                 %vtxColor = OpVariable %_ptr_Input_v4f32 Input\n"
4150 		"                  %dataIn0 = OpVariable %_ptr_Input_${type}16 Input\n"
4151 		"                  %dataIn1 = OpVariable %_ptr_Input_${type}16 Input\n"
4152 		"                  %c_f32_1 = OpConstant %f32 1\n"
4153 		"                %scale_f32 = OpConstant %f32 65534.0\n"
4154 		"              %scale_v2f32 = OpConstantComposite %v2f32 %scale_f32 %scale_f32\n"
4155 		"                     %main = OpFunction %void None %void_func\n"
4156 		"                    %entry = OpLabel\n"
4157 		"              %dataIn0_val = OpLoad %${type}16 %dataIn0\n"
4158 		"              %dataIn1_val = OpLoad %${type}16 %dataIn1\n"
4159 		"        %dataIn0_converted = ${convert} %${type}32 %dataIn0_val\n"
4160 		"        %dataIn1_converted = ${convert} %${type}32 %dataIn1_val\n"
4161 		"${scale}"
4162 		"                    %color = ${colorConstruct}\n"
4163 		"                             OpStore %fragColor %color\n"
4164 		"                             OpStore %dataOut %dataIn0_val\n"
4165 		"                             OpReturn\n"
4166 		"                             OpFunctionEnd\n");
4167 
4168 	dst.spirvAsmSources.add("vert", DE_NULL) << vertexShader.specialize(spec) << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4169 	dst.spirvAsmSources.add("frag", DE_NULL) << fragmentShader.specialize(spec) << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4170 }
4171 
runAndVerifyDefaultPipeline(Context & context,TestDefinition testDef)4172 TestStatus runAndVerifyDefaultPipeline (Context& context, TestDefinition testDef)
4173 {
4174 	return runAndVerifyDefaultPipeline (context, testDef.instanceContext);
4175 }
4176 
addGraphics16BitStorageInputOutputFloat16To16x2Group(tcu::TestCaseGroup * testGroup)4177 void addGraphics16BitStorageInputOutputFloat16To16x2Group (tcu::TestCaseGroup* testGroup)
4178 {
4179 	RGBA					defaultColors[4];
4180 	SpecConstants			noSpecConstants;
4181 	PushConstants			noPushConstants;
4182 	vector<string>			extensions;
4183 	map<string, string>		noFragments;
4184 	GraphicsResources		noResources;
4185 	StageToSpecConstantMap	specConstantMap;
4186 	VulkanFeatures			requiredFeatures;
4187 
4188 	const ShaderElement		pipelineStages[]		=
4189 	{
4190 		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
4191 		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
4192 	};
4193 
4194 	specConstantMap[VK_SHADER_STAGE_VERTEX_BIT]		= noSpecConstants;
4195 	specConstantMap[VK_SHADER_STAGE_FRAGMENT_BIT]	= noSpecConstants;
4196 
4197 	getDefaultColors(defaultColors);
4198 
4199 	extensions.push_back("VK_KHR_16bit_storage");
4200 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
4201 
4202 	const struct
4203 	{
4204 		string			name;
4205 		deUint32		numElements;
4206 		TestDefDataType	dataType;
4207 		NumberType		numberType;
4208 		bool			isVector;
4209 	} cases[] =
4210 	{
4211 		{ "scalar",	1,	DATATYPE_FLOAT,	NUMBERTYPE_FLOAT16,	false	},
4212 		{ "vec2",	2,	DATATYPE_VEC2,	NUMBERTYPE_FLOAT16,	true	},
4213 	};
4214 
4215 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4216 	{
4217 		const RGBA				outColor			(128u, 128u, cases[caseIdx].isVector ? 128u : 255u, cases[caseIdx].isVector ? 128u : 255u);
4218 		RGBA					outputColors[4]		= {outColor, outColor, outColor, outColor};
4219 		vector<deFloat16>		float16Data			(4 * cases[caseIdx].numElements, deFloat32To16(0.5f));
4220 		GraphicsInterfaces		interfaces;
4221 
4222 		interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType), BufferSp(new Float16Buffer(float16Data))),
4223 								  std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType), BufferSp(new Float16Buffer(float16Data))));
4224 
4225 		const InstanceContext&	instanceContext		= createInstanceContext(pipelineStages,
4226 																			defaultColors,
4227 																			outputColors,
4228 																			noFragments,
4229 																			specConstantMap,
4230 																			noPushConstants,
4231 																			noResources,
4232 																			interfaces,
4233 																			extensions,
4234 																			requiredFeatures,
4235 																			VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
4236 																			QP_TEST_RESULT_FAIL,
4237 																			string());
4238 
4239 		TestDefinition	testDef						= {instanceContext, cases[caseIdx].dataType};
4240 
4241 		addFunctionCaseWithPrograms<TestDefinition>(testGroup,
4242 													cases[caseIdx].name,
4243 													"",
4244 													addShaderCode16BitStorageInputOutput16To16x2,
4245 													runAndVerifyDefaultPipeline,
4246 													testDef);
4247 	}
4248 }
4249 
addGraphics16BitStorageInputOutputInt16To16x2Group(tcu::TestCaseGroup * testGroup)4250 void addGraphics16BitStorageInputOutputInt16To16x2Group (tcu::TestCaseGroup* testGroup)
4251 {
4252 	map<string, string>		fragments;
4253 	RGBA					defaultColors[4];
4254 	SpecConstants			noSpecConstants;
4255 	PushConstants			noPushConstants;
4256 	vector<string>			extensions;
4257 	GraphicsResources		noResources;
4258 	StageToSpecConstantMap	specConstantMap;
4259 	VulkanFeatures			requiredFeatures;
4260 
4261 	const ShaderElement		pipelineStages[]		=
4262 	{
4263 		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
4264 		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
4265 	};
4266 
4267 	specConstantMap[VK_SHADER_STAGE_VERTEX_BIT]		= noSpecConstants;
4268 	specConstantMap[VK_SHADER_STAGE_FRAGMENT_BIT]	= noSpecConstants;
4269 
4270 	getDefaultColors(defaultColors);
4271 
4272 	extensions.push_back("VK_KHR_16bit_storage");
4273 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
4274 	requiredFeatures.coreFeatures.shaderInt16 = DE_TRUE;
4275 
4276 	const struct
4277 	{
4278 		string			name;
4279 		deUint32		numElements;
4280 		TestDefDataType	dataType;
4281 		NumberType		numberType;
4282 		bool			isVector;
4283 	} cases[] =
4284 	{
4285 		{ "scalar_int",		1,	DATATYPE_INT,	NUMBERTYPE_INT16,	false	},
4286 		{ "scalar_uint",	1,	DATATYPE_UINT,	NUMBERTYPE_UINT16,	false	},
4287 		{ "ivec2",			2,	DATATYPE_IVEC2,	NUMBERTYPE_INT16,	true	},
4288 		{ "uvec2",			2,	DATATYPE_UVEC2,	NUMBERTYPE_UINT16,	true	}
4289 	};
4290 
4291 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4292 	{
4293 		const RGBA				outColor			(128u, 128u, cases[caseIdx].isVector ? 128u : 255u, cases[caseIdx].isVector ? 128u : 255u);
4294 		RGBA					outputColors[4]		= {outColor, outColor, outColor, outColor};
4295 		vector<deInt16>			int16Data			(4 * cases[caseIdx].numElements, 32767);
4296 		GraphicsInterfaces		interfaces;
4297 
4298 		interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType), BufferSp(new Int16Buffer(int16Data))),
4299 								  std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType), BufferSp(new Int16Buffer(int16Data))));
4300 
4301 		const InstanceContext&	instanceContext		= createInstanceContext(pipelineStages,
4302 																			defaultColors,
4303 																			outputColors,
4304 																			fragments,
4305 																			specConstantMap,
4306 																			noPushConstants,
4307 																			noResources,
4308 																			interfaces,
4309 																			extensions,
4310 																			requiredFeatures,
4311 																			VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
4312 																			QP_TEST_RESULT_FAIL,
4313 																			string());
4314 
4315 		TestDefinition	testDef						= {instanceContext, cases[caseIdx].dataType};
4316 
4317 		addFunctionCaseWithPrograms<TestDefinition>(testGroup,
4318 													cases[caseIdx].name,
4319 													"",
4320 													addShaderCode16BitStorageInputOutput16To16x2,
4321 													runAndVerifyDefaultPipeline,
4322 													testDef);
4323 	}
4324 }
4325 
addGraphics16BitStorageInputOutputInt32To16Group(tcu::TestCaseGroup * testGroup)4326 void addGraphics16BitStorageInputOutputInt32To16Group (tcu::TestCaseGroup* testGroup)
4327 {
4328 	de::Random							rnd					(deStringHash(testGroup->getName()));
4329 	RGBA								defaultColors[4];
4330 	vector<string>						extensions;
4331 	map<string, string>					fragments			= passthruFragments();
4332 	const deUint32						numDataPoints		= 64;
4333 	// inputs and outputs are declared to be vectors of signed integers.
4334 	// However, depending on the test, they may be interpreted as unsiged
4335 	// integers. That won't be a problem as long as we passed the bits
4336 	// in faithfully to the pipeline.
4337 	vector<deInt32>						inputs				= getInt32s(rnd, numDataPoints);
4338 	vector<deInt16>						outputs;
4339 
4340 	outputs.reserve(inputs.size());
4341 	for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
4342 		outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
4343 
4344 	extensions.push_back("VK_KHR_16bit_storage");
4345 
4346 	fragments["capability"]				= "OpCapability StorageInputOutput16\n";
4347 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"\n";
4348 
4349 	getDefaultColors(defaultColors);
4350 
4351 	const StringTemplate scalarInterfaceOpCall(
4352 			"${convert} %${type16}");
4353 
4354 	const StringTemplate	scalarInterfaceOpFunc("");
4355 
4356 	const StringTemplate	scalarPreMain(
4357 			"             %${type16} = OpTypeInt 16 ${signed}\n"
4358 			"          %op_${type16} = OpTypePointer Output %${type16}\n"
4359 			"           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4360 			"        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
4361 			"%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
4362 			"           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4363 			"        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
4364 
4365 	const StringTemplate vecInterfaceOpCall(
4366 			"${convert} %${type16}");
4367 
4368 	const StringTemplate	vecInterfaceOpFunc("");
4369 
4370 	const StringTemplate	vecPreMain(
4371 			"	                %i16 = OpTypeInt 16 1\n"
4372 			"	                %u16 = OpTypeInt 16 0\n"
4373 			"                 %v4i16 = OpTypeVector %i16 4\n"
4374 			"                 %v4u16 = OpTypeVector %u16 4\n"
4375 			"          %op_${type16} = OpTypePointer Output %${type16}\n"
4376 			"           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4377 			"        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
4378 			"%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
4379 			"           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4380 			"        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
4381 
4382 	struct Case
4383 	{
4384 		const char*				name;
4385 		const StringTemplate&	interfaceOpCall;
4386 		const StringTemplate&	interfaceOpFunc;
4387 		const StringTemplate&	preMain;
4388 		const char*				type32;
4389 		const char*				type16;
4390 		const char*				sign;
4391 		const char*				opcode;
4392 		deUint32				numPerCase;
4393 		deUint32				numElements;
4394 	};
4395 
4396 	Case	cases[]		=
4397 	{
4398 		{"scalar_sint",	scalarInterfaceOpCall, scalarInterfaceOpFunc,	scalarPreMain,	"i32",		"i16",		"1",	"OpSConvert",	4,		1},
4399 		{"scalar_uint",	scalarInterfaceOpCall, scalarInterfaceOpFunc,	scalarPreMain,	"u32",		"u16",		"0",	"OpUConvert",	4,		1},
4400 		{"vector_sint",	vecInterfaceOpCall,	vecInterfaceOpFunc,		vecPreMain,		"v4i32",	"v4i16",	"1",	"OpSConvert",	4 * 4,	4},
4401 		{"vector_uint",	vecInterfaceOpCall,	vecInterfaceOpFunc,		vecPreMain,		"v4u32",	"v4u16",	"0",	"OpUConvert",	4 * 4,	4},
4402 	};
4403 
4404 	VulkanFeatures	requiredFeatures;
4405 	requiredFeatures.coreFeatures.shaderInt16 = DE_TRUE;
4406 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
4407 
4408 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4409 	{
4410 		map<string, string>				specs;
4411 
4412 		specs["type32"]					= cases[caseIdx].type32;
4413 		specs["type16"]					= cases[caseIdx].type16;
4414 		specs["signed"]					= cases[caseIdx].sign;
4415 		specs["convert"]				= cases[caseIdx].opcode;
4416 
4417 		fragments["pre_main"]			= cases[caseIdx].preMain.specialize(specs);
4418 		fragments["interface_op_call"]  = cases[caseIdx].interfaceOpCall.specialize(specs);
4419 		fragments["interface_op_func"]	= cases[caseIdx].interfaceOpFunc.specialize(specs);
4420 		fragments["input_type"]			= cases[caseIdx].type32;
4421 		fragments["output_type"]		= cases[caseIdx].type16;
4422 
4423 		GraphicsInterfaces				interfaces;
4424 		const deUint32					numPerCase	= cases[caseIdx].numPerCase;
4425 		vector<deInt32>					subInputs	(numPerCase);
4426 		vector<deInt16>					subOutputs	(numPerCase);
4427 
4428 		for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
4429 		{
4430 			string			testName	= string(cases[caseIdx].name) + numberToString(caseNdx);
4431 
4432 			for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
4433 			{
4434 				subInputs[numNdx]	= inputs[caseNdx * numPerCase + numNdx];
4435 				subOutputs[numNdx]	= outputs[caseNdx * numPerCase + numNdx];
4436 			}
4437 			if (strcmp(cases[caseIdx].sign, "1") == 0)
4438 			{
4439 				interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subInputs))),
4440 										  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subOutputs))));
4441 			}
4442 			else
4443 			{
4444 				interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subInputs))),
4445 										  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subOutputs))));
4446 			}
4447 			createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
4448 		}
4449 	}
4450 }
4451 
addGraphics16BitStorageInputOutputInt16To32Group(tcu::TestCaseGroup * testGroup)4452 void addGraphics16BitStorageInputOutputInt16To32Group (tcu::TestCaseGroup* testGroup)
4453 {
4454 	de::Random							rnd					(deStringHash(testGroup->getName()));
4455 	RGBA								defaultColors[4];
4456 	vector<string>						extensions;
4457 	map<string, string>					fragments			= passthruFragments();
4458 	const deUint32						numDataPoints		= 64;
4459 	// inputs and outputs are declared to be vectors of signed integers.
4460 	// However, depending on the test, they may be interpreted as unsiged
4461 	// integers. That won't be a problem as long as we passed the bits
4462 	// in faithfully to the pipeline.
4463 	vector<deInt16>						inputs				= getInt16s(rnd, numDataPoints);
4464 	vector<deInt32>						sOutputs;
4465 	vector<deInt32>						uOutputs;
4466 	const deUint16						signBitMask			= 0x8000;
4467 	const deUint32						signExtendMask		= 0xffff0000;
4468 
4469 	sOutputs.reserve(inputs.size());
4470 	uOutputs.reserve(inputs.size());
4471 
4472 	for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
4473 	{
4474 		uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
4475 		if (inputs[numNdx] & signBitMask)
4476 			sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
4477 		else
4478 			sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
4479 	}
4480 
4481 	extensions.push_back("VK_KHR_16bit_storage");
4482 
4483 	fragments["capability"]				= "OpCapability StorageInputOutput16\n";
4484 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"\n";
4485 
4486 	getDefaultColors(defaultColors);
4487 
4488 	const StringTemplate scalarIfOpCall (
4489 			"${convert} %${type32}");
4490 
4491 	const StringTemplate scalarIfOpFunc	("");
4492 
4493 	const StringTemplate scalarPreMain	(
4494 			"             %${type16} = OpTypeInt 16 ${signed}\n"
4495 			"          %ip_${type16} = OpTypePointer Input %${type16}\n"
4496 			"           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4497 			"        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4498 			"%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
4499 			"           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4500 			"        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
4501 
4502 	const StringTemplate vecIfOpCall (
4503 			"${convert} %${type32}");
4504 
4505 	const StringTemplate vecIfOpFunc	("");
4506 
4507 	const StringTemplate vecPreMain	(
4508 			"	                %i16 = OpTypeInt 16 1\n"
4509 			"	                %u16 = OpTypeInt 16 0\n"
4510 			"                 %v4i16 = OpTypeVector %i16 4\n"
4511 			"                 %v4u16 = OpTypeVector %u16 4\n"
4512 			"          %ip_${type16} = OpTypePointer Input %${type16}\n"
4513 			"           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4514 			"        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4515 			"%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
4516 			"           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4517 			"        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
4518 
4519 	struct Case
4520 	{
4521 		const char*				name;
4522 		const StringTemplate&	interfaceOpCall;
4523 		const StringTemplate&	interfaceOpFunc;
4524 		const StringTemplate&	preMain;
4525 		const char*				type32;
4526 		const char*				type16;
4527 		const char*				sign;
4528 		const char*				opcode;
4529 		deUint32				numPerCase;
4530 		deUint32				numElements;
4531 	};
4532 
4533 	Case	cases[]		=
4534 	{
4535 		{"scalar_sint",	scalarIfOpCall, scalarIfOpFunc,	scalarPreMain,	"i32",		"i16",		"1",	"OpSConvert",	4,		1},
4536 		{"scalar_uint",	scalarIfOpCall, scalarIfOpFunc,	scalarPreMain,	"u32",		"u16",		"0",	"OpUConvert",	4,		1},
4537 		{"vector_sint",	vecIfOpCall,	vecIfOpFunc,	vecPreMain,		"v4i32",	"v4i16",	"1",	"OpSConvert",	4 * 4,	4},
4538 		{"vector_uint",	vecIfOpCall,	vecIfOpFunc,	vecPreMain,		"v4u32",	"v4u16",	"0",	"OpUConvert",	4 * 4,	4},
4539 	};
4540 
4541 	VulkanFeatures	requiredFeatures;
4542 	requiredFeatures.coreFeatures.shaderInt16 = DE_TRUE;
4543 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
4544 
4545 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4546 	{
4547 		map<string, string>				specs;
4548 
4549 		specs["type32"]					= cases[caseIdx].type32;
4550 		specs["type16"]					= cases[caseIdx].type16;
4551 		specs["signed"]					= cases[caseIdx].sign;
4552 		specs["convert"]				= cases[caseIdx].opcode;
4553 
4554 		fragments["pre_main"]			= cases[caseIdx].preMain.specialize(specs);
4555 		fragments["interface_op_call"]	= cases[caseIdx].interfaceOpCall.specialize(specs);
4556 		fragments["interface_op_func"]	= cases[caseIdx].interfaceOpFunc.specialize(specs);
4557 		fragments["input_type"]			= cases[caseIdx].type16;
4558 		fragments["output_type"]		= cases[caseIdx].type32;
4559 
4560 		GraphicsInterfaces				interfaces;
4561 		const deUint32					numPerCase	= cases[caseIdx].numPerCase;
4562 		vector<deInt16>					subInputs	(numPerCase);
4563 		vector<deInt32>					subOutputs	(numPerCase);
4564 
4565 		for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
4566 		{
4567 			string			testName	= string(cases[caseIdx].name) + numberToString(caseNdx);
4568 
4569 			for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
4570 			{
4571 				subInputs[numNdx]	= inputs[caseNdx * numPerCase + numNdx];
4572 				if (cases[caseIdx].sign[0] == '1')
4573 					subOutputs[numNdx]	= sOutputs[caseNdx * numPerCase + numNdx];
4574 				else
4575 					subOutputs[numNdx]	= uOutputs[caseNdx * numPerCase + numNdx];
4576 			}
4577 			if (strcmp(cases[caseIdx].sign, "1") == 0)
4578 			{
4579 				interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subInputs))),
4580 										  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subOutputs))));
4581 			}
4582 			else
4583 			{
4584 				interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subInputs))),
4585 										  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subOutputs))));
4586 			}
4587 			createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
4588 		}
4589 	}
4590 }
4591 
addGraphics16BitStorageInputOutputInt16To16Group(tcu::TestCaseGroup * testGroup)4592 void addGraphics16BitStorageInputOutputInt16To16Group (tcu::TestCaseGroup* testGroup)
4593 {
4594 	de::Random				rnd					(deStringHash(testGroup->getName()));
4595 	RGBA					defaultColors[4];
4596 	vector<string>			extensions;
4597 	map<string, string>		fragments			= passthruFragments();
4598 	const deUint32			numDataPoints		= 64;
4599 	// inputs and outputs are declared to be vectors of signed integers.
4600 	// However, depending on the test, they may be interpreted as unsiged
4601 	// integers. That won't be a problem as long as we passed the bits
4602 	// in faithfully to the pipeline.
4603 	vector<deInt16>			inputs				= getInt16s(rnd, numDataPoints);
4604 	VulkanFeatures			requiredFeatures;
4605 
4606 	requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
4607 	extensions.push_back("VK_KHR_16bit_storage");
4608 
4609 	fragments["capability"]						= "OpCapability StorageInputOutput16\n";
4610 	fragments["extension"]						= "OpExtension \"SPV_KHR_16bit_storage\"\n";
4611 
4612 	getDefaultColors(defaultColors);
4613 
4614     const StringTemplate  scalarIfOpCall    (
4615 			"OpCopyObject %${type16}");
4616 
4617 	const StringTemplate	scalarIfOpFunc		("");
4618 
4619 	const StringTemplate	scalarPreMain		(
4620 			"             %${type16} = OpTypeInt 16 ${signed}\n"
4621 			"          %ip_${type16} = OpTypePointer Input %${type16}\n"
4622 			"           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4623 			"        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4624 			"%${type16}_${type16}_function = OpTypeFunction %${type16} %${type16}\n"
4625 			"          %op_${type16} = OpTypePointer Output %${type16}\n"
4626 			"        %op_a3${type16} = OpTypePointer Output %a3${type16}\n");
4627 
4628 	const StringTemplate  vecIfOpCall     (
4629 			"OpCopyObject %${type16}");
4630 
4631 	const StringTemplate	vecIfOpFunc			("");
4632 
4633 	const StringTemplate	vecPreMain			(
4634 			"                   %i16 = OpTypeInt 16 1\n"
4635 			"                   %u16 = OpTypeInt 16 0\n"
4636 			"                 %v4i16 = OpTypeVector %i16 4\n"
4637 			"                 %v4u16 = OpTypeVector %u16 4\n"
4638 			"          %ip_${type16} = OpTypePointer Input %${type16}\n"
4639 			"           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4640 			"        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4641 			"%${type16}_${type16}_function = OpTypeFunction %${type16} %${type16}\n"
4642 			"          %op_${type16} = OpTypePointer Output %${type16}\n"
4643 			"        %op_a3${type16} = OpTypePointer Output %a3${type16}\n");
4644 
4645 	struct Case
4646 	{
4647 		const char*				name;
4648 		const StringTemplate&	interfaceOpCall;
4649 		const StringTemplate&	interfaceOpFunc;
4650 		const StringTemplate&	preMain;
4651 		const char*				type16;
4652 		const char*				sign;
4653 		deUint32				numPerCase;
4654 		deUint32				numElements;
4655 	};
4656 
4657 	Case					cases[]				=
4658 	{
4659 		{"scalar_sint",	scalarIfOpCall, scalarIfOpFunc,	scalarPreMain,	"i16",		"1",	4,		1},
4660 		{"scalar_uint",	scalarIfOpCall, scalarIfOpFunc,	scalarPreMain,	"u16",		"0",	4,		1},
4661 		{"vector_sint",	vecIfOpCall,    vecIfOpFunc,	vecPreMain,		"v4i16",	"1",	4 * 4,	4},
4662 		{"vector_uint",	vecIfOpCall,    vecIfOpFunc,	vecPreMain,		"v4u16",	"0",	4 * 4,	4},
4663 	};
4664 
4665 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4666 	{
4667 		map<string, string>				specs;
4668 
4669 		specs["type16"]					= cases[caseIdx].type16;
4670 		specs["signed"]					= cases[caseIdx].sign;
4671 
4672 		fragments["pre_main"]			= cases[caseIdx].preMain.specialize(specs);
4673 		fragments["interface_op_call"]	= cases[caseIdx].interfaceOpCall.specialize(specs);
4674 		fragments["interface_op_func"]	= cases[caseIdx].interfaceOpFunc.specialize(specs);
4675 		fragments["input_type"]			= cases[caseIdx].type16;
4676 		fragments["output_type"]		= cases[caseIdx].type16;
4677 
4678 		GraphicsInterfaces				interfaces;
4679 		const deUint32					numPerCase			= cases[caseIdx].numPerCase;
4680 		vector<deInt16>					subInputsOutputs	(numPerCase);
4681 		const NumberType				numberType			= strcmp(cases[caseIdx].sign, "1") == 0 ? NUMBERTYPE_INT16 : NUMBERTYPE_UINT16;
4682 
4683 		for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
4684 		{
4685 			string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
4686 
4687 			for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
4688 				subInputsOutputs[numNdx] = inputs[caseNdx * numPerCase + numNdx];
4689 
4690 			interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, numberType), BufferSp(new Int16Buffer(subInputsOutputs))),
4691 									  std::make_pair(IFDataType(cases[caseIdx].numElements, numberType), BufferSp(new Int16Buffer(subInputsOutputs))));
4692 
4693 			createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
4694 		}
4695 	}
4696 }
4697 
addGraphics16BitStoragePushConstantFloat16To32Group(tcu::TestCaseGroup * testGroup)4698 void addGraphics16BitStoragePushConstantFloat16To32Group (tcu::TestCaseGroup* testGroup)
4699 {
4700 	de::Random							rnd					(deStringHash(testGroup->getName()));
4701 	map<string, string>					fragments;
4702 	RGBA								defaultColors[4];
4703 	vector<string>						extensions;
4704 	GraphicsResources					resources;
4705 	PushConstants						pcs;
4706 	const deUint32						numDataPoints		= 64;
4707 	vector<deFloat16>					float16Data			(getFloat16s(rnd, numDataPoints));
4708 	vector<float>						float32Data;
4709 	VulkanFeatures						requiredFeatures;
4710 
4711 	struct ConstantIndex
4712 	{
4713 		bool		useConstantIndex;
4714 		deUint32	constantIndex;
4715 	};
4716 
4717 	ConstantIndex	constantIndices[] =
4718 	{
4719 		{ false,	0 },
4720 		{ true,		4 },
4721 		{ true,		5 },
4722 		{ true,		6 }
4723 	};
4724 
4725 	float32Data.reserve(numDataPoints);
4726 	for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
4727 		float32Data.push_back(deFloat16To32(float16Data[numIdx]));
4728 
4729 	extensions.push_back("VK_KHR_16bit_storage");
4730 
4731 	requiredFeatures.coreFeatures.vertexPipelineStoresAndAtomics	= true;
4732 	requiredFeatures.coreFeatures.fragmentStoresAndAtomics			= true;
4733 	requiredFeatures.ext16BitStorage								= EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
4734 
4735 	fragments["capability"]				= "OpCapability StoragePushConstant16\n";
4736 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"";
4737 
4738 	pcs.setPushConstant(BufferSp(new Float16Buffer(float16Data)));
4739 	resources.verifyIO = check32BitFloats;
4740 
4741 	getDefaultColors(defaultColors);
4742 
4743 	const StringTemplate	testFun		(
4744 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
4745 		"    %param = OpFunctionParameter %v4f32\n"
4746 
4747 		"%entry = OpLabel\n"
4748 		"    %i = OpVariable %fp_i32 Function\n"
4749 		"         OpStore %i %c_i32_0\n"
4750 		"         OpBranch %loop\n"
4751 
4752 		" %loop = OpLabel\n"
4753 		"   %15 = OpLoad %i32 %i\n"
4754 		"   %lt = OpSLessThan %bool %15 ${count}\n"
4755 		"         OpLoopMerge %merge %inc None\n"
4756 		"         OpBranchConditional %lt %write %merge\n"
4757 
4758 		"%write = OpLabel\n"
4759 		"   %30 = OpLoad %i32 %i\n"
4760 		"  %src = OpAccessChain ${pp_type16} %pc16 %c_i32_0 %${arrayindex} ${index0:opt}\n"
4761 		"%val16 = OpLoad ${f_type16} %src\n"
4762 		"%val32 = OpFConvert ${f_type32} %val16\n"
4763 		"  %dst = OpAccessChain ${up_type32} %ssbo32 %c_i32_0 %30 ${index0:opt}\n"
4764 		"         OpStore %dst %val32\n"
4765 
4766 		"${store:opt}\n"
4767 
4768 		"         OpBranch %inc\n"
4769 
4770 		"  %inc = OpLabel\n"
4771 		"   %37 = OpLoad %i32 %i\n"
4772 		"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
4773 		"         OpStore %i %39\n"
4774 		"         OpBranch %loop\n"
4775 
4776 		"%merge = OpLabel\n"
4777 		"         OpReturnValue %param\n"
4778 
4779 		"OpFunctionEnd\n");
4780 
4781 	{  // Scalar cases
4782 		const StringTemplate	preMain		(
4783 			"      %f16 = OpTypeFloat 16\n"
4784 			" %c_i32_64 = OpConstant %i32 64\n"
4785 			" %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
4786 			"  %a64f16 = OpTypeArray %f16 %c_i32_64\n"
4787 			"  %a64f32 = OpTypeArray %f32 %c_i32_64\n"
4788 			"   %pp_f16 = OpTypePointer PushConstant %f16\n"
4789 			"   %up_f32 = OpTypePointer Uniform %f32\n"
4790 			"   %SSBO32 = OpTypeStruct %a64f32\n"
4791 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
4792 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
4793 			"     %PC16 = OpTypeStruct %a64f16\n"
4794 			"  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
4795 			"     %pc16 = OpVariable %pp_PC16 PushConstant\n");
4796 
4797 		fragments["decoration"]				=
4798 			"OpDecorate %a64f16 ArrayStride 2\n"
4799 			"OpDecorate %a64f32 ArrayStride 4\n"
4800 			"OpDecorate %SSBO32 BufferBlock\n"
4801 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
4802 			"OpDecorate %PC16 Block\n"
4803 			"OpMemberDecorate %PC16 0 Offset 0\n"
4804 			"OpDecorate %ssbo32 DescriptorSet 0\n"
4805 			"OpDecorate %ssbo32 Binding 0\n";
4806 
4807 		map<string, string>		specs;
4808 
4809 		specs["count"]			= "%c_i32_64";
4810 		specs["pp_type16"]		= "%pp_f16";
4811 		specs["f_type16"]		= "%f16";
4812 		specs["f_type32"]		= "%f32";
4813 		specs["up_type32"]		= "%up_f32";
4814 
4815 		for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
4816 		{
4817 			bool			useConstIdx		= constantIndices[constIndexIdx].useConstantIndex;
4818 			deUint32		constIdx		= constantIndices[constIndexIdx].constantIndex;
4819 			string			testName		= "scalar";
4820 			vector<float>	float32ConstIdxData;
4821 
4822 			if (useConstIdx)
4823 			{
4824 				float32ConstIdxData.reserve(numDataPoints);
4825 
4826 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
4827 					float32ConstIdxData.push_back(float32Data[constIdx]);
4828 			}
4829 
4830 			specs["constarrayidx"]	= de::toString(constIdx);
4831 			if (useConstIdx)
4832 				specs["arrayindex"] = "c_i32_ci";
4833 			else
4834 				specs["arrayindex"] = "30";
4835 
4836 			resources.outputs.clear();
4837 			resources.outputs.push_back(Resource(BufferSp(new Float32Buffer(useConstIdx ? float32ConstIdxData : float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4838 
4839 			fragments["pre_main"]		= preMain.specialize(specs);
4840 			fragments["testfun"]		= testFun.specialize(specs);
4841 
4842 			if (useConstIdx)
4843 				testName += string("_const_idx_") + de::toString(constIdx);
4844 
4845 			createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
4846 		}
4847 	}
4848 
4849 	{  // Vector cases
4850 		const StringTemplate	preMain		(
4851 			"      %f16 = OpTypeFloat 16\n"
4852 			"    %v4f16 = OpTypeVector %f16 4\n"
4853 			" %c_i32_16 = OpConstant %i32 16\n"
4854 			" %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
4855 			" %a16v4f16 = OpTypeArray %v4f16 %c_i32_16\n"
4856 			" %a16v4f32 = OpTypeArray %v4f32 %c_i32_16\n"
4857 			" %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
4858 			" %up_v4f32 = OpTypePointer Uniform %v4f32\n"
4859 			"   %SSBO32 = OpTypeStruct %a16v4f32\n"
4860 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
4861 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
4862 			"     %PC16 = OpTypeStruct %a16v4f16\n"
4863 			"  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
4864 			"     %pc16 = OpVariable %pp_PC16 PushConstant\n");
4865 
4866 		fragments["decoration"]				=
4867 			"OpDecorate %a16v4f16 ArrayStride 8\n"
4868 			"OpDecorate %a16v4f32 ArrayStride 16\n"
4869 			"OpDecorate %SSBO32 BufferBlock\n"
4870 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
4871 			"OpDecorate %PC16 Block\n"
4872 			"OpMemberDecorate %PC16 0 Offset 0\n"
4873 			"OpDecorate %ssbo32 DescriptorSet 0\n"
4874 			"OpDecorate %ssbo32 Binding 0\n";
4875 
4876 		map<string, string>		specs;
4877 
4878 		specs["count"]			= "%c_i32_16";
4879 		specs["pp_type16"]		= "%pp_v4f16";
4880 		specs["f_type16"]		= "%v4f16";
4881 		specs["f_type32"]		= "%v4f32";
4882 		specs["up_type32"]		= "%up_v4f32";
4883 
4884 		for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
4885 		{
4886 			bool			useConstIdx			= constantIndices[constIndexIdx].useConstantIndex;
4887 			deUint32		constIdx			= constantIndices[constIndexIdx].constantIndex;
4888 			string			testName			= "vector";
4889 			vector<float>	float32ConstIdxData;
4890 
4891 			if (useConstIdx)
4892 			{
4893 				float32ConstIdxData.reserve(numDataPoints);
4894 
4895 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
4896 					float32ConstIdxData.push_back(float32Data[constIdx * 4 + numIdx % 4]);
4897 			}
4898 
4899 			specs["constarrayidx"]	= de::toString(constIdx);
4900 			if (useConstIdx)
4901 				specs["arrayindex"] = "c_i32_ci";
4902 			else
4903 				specs["arrayindex"] = "30";
4904 
4905 			resources.outputs.clear();
4906 			resources.outputs.push_back(Resource(BufferSp(new Float32Buffer(useConstIdx ? float32ConstIdxData : float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4907 
4908 			fragments["pre_main"]	= preMain.specialize(specs);
4909 			fragments["testfun"]	= testFun.specialize(specs);
4910 
4911 			if (useConstIdx)
4912 				testName += string("_const_idx_") + de::toString(constIdx);
4913 
4914 			createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
4915 		}
4916 	}
4917 
4918 	{  // Matrix cases
4919 		const StringTemplate	preMain		(
4920 			"   %c_i32_8 = OpConstant %i32 8\n"
4921 			"  %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
4922 			"      %f16  = OpTypeFloat 16\n"
4923 			"    %v4f16  = OpTypeVector %f16 4\n"
4924 			"  %m2v4f16  = OpTypeMatrix %v4f16 2\n"
4925 			"  %m2v4f32  = OpTypeMatrix %v4f32 2\n"
4926 			" %a8m2v4f16 = OpTypeArray %m2v4f16 %c_i32_8\n"
4927 			" %a8m2v4f32 = OpTypeArray %m2v4f32 %c_i32_8\n"
4928 			" %pp_v4f16  = OpTypePointer PushConstant %v4f16\n"
4929 			" %up_v4f32  = OpTypePointer Uniform %v4f32\n"
4930 			"   %SSBO32  = OpTypeStruct %a8m2v4f32\n"
4931 			"%up_SSBO32  = OpTypePointer Uniform %SSBO32\n"
4932 			"   %ssbo32  = OpVariable %up_SSBO32 Uniform\n"
4933 			"     %PC16  = OpTypeStruct %a8m2v4f16\n"
4934 			"  %pp_PC16  = OpTypePointer PushConstant %PC16\n"
4935 			"     %pc16  = OpVariable %pp_PC16 PushConstant\n");
4936 
4937 		fragments["decoration"]				=
4938 			"OpDecorate %a8m2v4f16 ArrayStride 16\n"
4939 			"OpDecorate %a8m2v4f32 ArrayStride 32\n"
4940 			"OpDecorate %SSBO32 BufferBlock\n"
4941 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
4942 			"OpMemberDecorate %SSBO32 0 ColMajor\n"
4943 			"OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
4944 			"OpDecorate %PC16 Block\n"
4945 			"OpMemberDecorate %PC16 0 Offset 0\n"
4946 			"OpMemberDecorate %PC16 0 ColMajor\n"
4947 			"OpMemberDecorate %PC16 0 MatrixStride 8\n"
4948 			"OpDecorate %ssbo32 DescriptorSet 0\n"
4949 			"OpDecorate %ssbo32 Binding 0\n";
4950 
4951 		map<string, string>		specs;
4952 
4953 		specs["count"]			= "%c_i32_8";
4954 		specs["pp_type16"]		= "%pp_v4f16";
4955 		specs["up_type32"]		= "%up_v4f32";
4956 		specs["f_type16"]		= "%v4f16";
4957 		specs["f_type32"]		= "%v4f32";
4958 		specs["index0"]			= "%c_i32_0";
4959 
4960 		for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
4961 		{
4962 			bool					useConstIdx			= constantIndices[constIndexIdx].useConstantIndex;
4963 			deUint32				constIdx			= constantIndices[constIndexIdx].constantIndex;
4964 			string					testName			= "matrix";
4965 			vector<float>			float32ConstIdxData;
4966 			const StringTemplate	store				(
4967 				"  %src_1 = OpAccessChain %pp_v4f16 %pc16 %c_i32_0 %${arrayindex} %c_i32_1\n"
4968 				"%val16_1 = OpLoad %v4f16 %src_1\n"
4969 				"%val32_1 = OpFConvert %v4f32 %val16_1\n"
4970 				"  %dst_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
4971 				"           OpStore %dst_1 %val32_1\n");
4972 
4973 			if (useConstIdx)
4974 			{
4975 				float32ConstIdxData.reserve(numDataPoints);
4976 
4977 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
4978 					float32ConstIdxData.push_back(float32Data[constIdx * 8 + numIdx % 8]);
4979 			}
4980 
4981 			specs["constarrayidx"]	= de::toString(constIdx);
4982 			if (useConstIdx)
4983 				specs["arrayindex"] = "c_i32_ci";
4984 			else
4985 				specs["arrayindex"] = "30";
4986 
4987 			specs["store"] = store.specialize(specs);
4988 
4989 			resources.outputs.clear();
4990 			resources.outputs.push_back(Resource(BufferSp(new Float32Buffer(useConstIdx ? float32ConstIdxData : float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4991 
4992 			fragments["pre_main"]		= preMain.specialize(specs);
4993 			fragments["testfun"]		= testFun.specialize(specs);
4994 
4995 			if (useConstIdx)
4996 				testName += string("_const_idx_") + de::toString(constIdx);
4997 
4998 			createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
4999 		}
5000 	}
5001 }
5002 
addGraphics16BitStoragePushConstantInt16To32Group(tcu::TestCaseGroup * testGroup)5003 void addGraphics16BitStoragePushConstantInt16To32Group (tcu::TestCaseGroup* testGroup)
5004 {
5005 	de::Random							rnd					(deStringHash(testGroup->getName()));
5006 	map<string, string>					fragments;
5007 	RGBA								defaultColors[4];
5008 	const deUint32						numDataPoints		= 64;
5009 	vector<deInt16>						inputs				= getInt16s(rnd, numDataPoints);
5010 	vector<deInt32>						sOutputs;
5011 	vector<deInt32>						uOutputs;
5012 	PushConstants						pcs;
5013 	GraphicsResources					resources;
5014 	vector<string>						extensions;
5015 	const deUint16						signBitMask			= 0x8000;
5016 	const deUint32						signExtendMask		= 0xffff0000;
5017 	VulkanFeatures						requiredFeatures;
5018 
5019 	struct ConstantIndex
5020 	{
5021 		bool		useConstantIndex;
5022 		deUint32	constantIndex;
5023 	};
5024 
5025 	ConstantIndex	constantIndices[] =
5026 	{
5027 		{ false,	0 },
5028 		{ true,		4 },
5029 		{ true,		5 },
5030 		{ true,		6 }
5031 	};
5032 
5033 	sOutputs.reserve(inputs.size());
5034 	uOutputs.reserve(inputs.size());
5035 
5036 	for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
5037 	{
5038 		uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
5039 		if (inputs[numNdx] & signBitMask)
5040 			sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
5041 		else
5042 			sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
5043 	}
5044 
5045 	extensions.push_back("VK_KHR_16bit_storage");
5046 
5047 	requiredFeatures.coreFeatures.vertexPipelineStoresAndAtomics	= true;
5048 	requiredFeatures.coreFeatures.fragmentStoresAndAtomics			= true;
5049 	requiredFeatures.ext16BitStorage								= EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
5050 
5051 	fragments["capability"]				= "OpCapability StoragePushConstant16\n";
5052 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"";
5053 
5054 	pcs.setPushConstant(BufferSp(new Int16Buffer(inputs)));
5055 
5056 	getDefaultColors(defaultColors);
5057 
5058 	const StringTemplate	testFun		(
5059 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5060 		"    %param = OpFunctionParameter %v4f32\n"
5061 
5062 		"%entry = OpLabel\n"
5063 		"    %i = OpVariable %fp_i32 Function\n"
5064 		"         OpStore %i %c_i32_0\n"
5065 		"         OpBranch %loop\n"
5066 
5067 		" %loop = OpLabel\n"
5068 		"   %15 = OpLoad %i32 %i\n"
5069 		"   %lt = OpSLessThan %bool %15 %c_i32_${count}\n"
5070 		"         OpLoopMerge %merge %inc None\n"
5071 		"         OpBranchConditional %lt %write %merge\n"
5072 
5073 		"%write = OpLabel\n"
5074 		"   %30 = OpLoad %i32 %i\n"
5075 		"  %src = OpAccessChain %pp_${type16} %pc16 %c_i32_0 %${arrayindex}\n"
5076 		"%val16 = OpLoad %${type16} %src\n"
5077 		"%val32 = ${convert} %${type32} %val16\n"
5078 		"  %dst = OpAccessChain %up_${type32} %ssbo32 %c_i32_0 %30\n"
5079 		"         OpStore %dst %val32\n"
5080 		"         OpBranch %inc\n"
5081 
5082 		"  %inc = OpLabel\n"
5083 		"   %37 = OpLoad %i32 %i\n"
5084 		"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5085 		"         OpStore %i %39\n"
5086 		"         OpBranch %loop\n"
5087 
5088 		"%merge = OpLabel\n"
5089 		"         OpReturnValue %param\n"
5090 
5091 		"OpFunctionEnd\n");
5092 
5093 	{  // Scalar cases
5094 		const StringTemplate	preMain		(
5095 			"         %${type16} = OpTypeInt 16 ${signed}\n"
5096 			"    %c_i32_${count} = OpConstant %i32 ${count}\n"					// Should be the same as numDataPoints
5097 			"          %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
5098 			"%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
5099 			"%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
5100 			"      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
5101 			"      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
5102 			"            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
5103 			"         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5104 			"            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5105 			"              %PC16 = OpTypeStruct %a${count}${type16}\n"
5106 			"           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
5107 			"              %pc16 = OpVariable %pp_PC16 PushConstant\n");
5108 
5109 		const StringTemplate	decoration	(
5110 			"OpDecorate %a${count}${type16} ArrayStride 2\n"
5111 			"OpDecorate %a${count}${type32} ArrayStride 4\n"
5112 			"OpDecorate %SSBO32 BufferBlock\n"
5113 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
5114 			"OpDecorate %PC16 Block\n"
5115 			"OpMemberDecorate %PC16 0 Offset 0\n"
5116 			"OpDecorate %ssbo32 DescriptorSet 0\n"
5117 			"OpDecorate %ssbo32 Binding 0\n");
5118 
5119 		{  // signed int
5120 			map<string, string>		specs;
5121 
5122 			specs["type16"]			= "i16";
5123 			specs["type32"]			= "i32";
5124 			specs["signed"]			= "1";
5125 			specs["count"]			= "64";
5126 			specs["convert"]		= "OpSConvert";
5127 
5128 			for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5129 			{
5130 				bool			useConstIdx		= constantIndices[constIndexIdx].useConstantIndex;
5131 				deUint32		constIdx		= constantIndices[constIndexIdx].constantIndex;
5132 				string			testName		= "sint_scalar";
5133 				vector<deInt32>	constIdxData;
5134 
5135 				if (useConstIdx)
5136 				{
5137 					constIdxData.reserve(numDataPoints);
5138 
5139 					for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
5140 						constIdxData.push_back(sOutputs[constIdx]);
5141 				}
5142 
5143 				specs["constarrayidx"]	= de::toString(constIdx);
5144 				if (useConstIdx)
5145 					specs["arrayindex"] = "c_i32_ci";
5146 				else
5147 					specs["arrayindex"] = "30";
5148 
5149 				if (useConstIdx)
5150 					testName += string("_const_idx_") + de::toString(constIdx);
5151 
5152 				resources.outputs.clear();
5153 				resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : sOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5154 
5155 				fragments["testfun"]	= testFun.specialize(specs);
5156 				fragments["pre_main"]	= preMain.specialize(specs);
5157 				fragments["decoration"]	= decoration.specialize(specs);
5158 
5159 				createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
5160 			}
5161 		}
5162 		{  // unsigned int
5163 			map<string, string>		specs;
5164 
5165 			specs["type16"]			= "u16";
5166 			specs["type32"]			= "u32";
5167 			specs["signed"]			= "0";
5168 			specs["count"]			= "64";
5169 			specs["convert"]		= "OpUConvert";
5170 
5171 			for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5172 			{
5173 				bool			useConstIdx		= constantIndices[constIndexIdx].useConstantIndex;
5174 				deUint32		constIdx		= constantIndices[constIndexIdx].constantIndex;
5175 				string			testName		= "uint_scalar";
5176 				vector<deInt32>	constIdxData;
5177 
5178 				if (useConstIdx)
5179 				{
5180 					constIdxData.reserve(numDataPoints);
5181 
5182 					for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
5183 						constIdxData.push_back(uOutputs[constIdx]);
5184 				}
5185 
5186 				specs["constarrayidx"]	= de::toString(constIdx);
5187 				if (useConstIdx)
5188 					specs["arrayindex"] = "c_i32_ci";
5189 				else
5190 					specs["arrayindex"] = "30";
5191 
5192 				if (useConstIdx)
5193 					testName += string("_const_idx_") + de::toString(constIdx);
5194 
5195 				resources.outputs.clear();
5196 				resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : uOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5197 
5198 				fragments["testfun"]	= testFun.specialize(specs);
5199 				fragments["pre_main"]	= preMain.specialize(specs);
5200 				fragments["decoration"]	= decoration.specialize(specs);
5201 
5202 				createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
5203 			}
5204 		}
5205 	}
5206 
5207 	{  // Vector cases
5208 		const StringTemplate	preMain		(
5209 			"    %${base_type16} = OpTypeInt 16 ${signed}\n"
5210 			"         %${type16} = OpTypeVector %${base_type16} 2\n"
5211 			"    %c_i32_${count} = OpConstant %i32 ${count}\n"
5212 			"          %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
5213 			"%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
5214 			"%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
5215 			"      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
5216 			"      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
5217 			"            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
5218 			"         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5219 			"            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5220 			"              %PC16 = OpTypeStruct %a${count}${type16}\n"
5221 			"           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
5222 			"              %pc16 = OpVariable %pp_PC16 PushConstant\n");
5223 
5224 		const StringTemplate	decoration	(
5225 			"OpDecorate %a${count}${type16} ArrayStride 4\n"
5226 			"OpDecorate %a${count}${type32} ArrayStride 8\n"
5227 			"OpDecorate %SSBO32 BufferBlock\n"
5228 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
5229 			"OpDecorate %PC16 Block\n"
5230 			"OpMemberDecorate %PC16 0 Offset 0\n"
5231 			"OpDecorate %ssbo32 DescriptorSet 0\n"
5232 			"OpDecorate %ssbo32 Binding 0\n");
5233 
5234 		{  // signed int
5235 			map<string, string>		specs;
5236 
5237 			specs["base_type16"]	= "i16";
5238 			specs["type16"]			= "v2i16";
5239 			specs["type32"]			= "v2i32";
5240 			specs["signed"]			= "1";
5241 			specs["count"]			= "32";
5242 			specs["convert"]		= "OpSConvert";
5243 
5244 			for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5245 			{
5246 				bool			useConstIdx		= constantIndices[constIndexIdx].useConstantIndex;
5247 				deUint32		constIdx		= constantIndices[constIndexIdx].constantIndex;
5248 				string			testName		= "sint_vector";
5249 				vector<deInt32>	constIdxData;
5250 
5251 				if (useConstIdx)
5252 				{
5253 					constIdxData.reserve(numDataPoints);
5254 
5255 					for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
5256 						constIdxData.push_back(sOutputs[constIdx * 2 + numIdx % 2]);
5257 				}
5258 
5259 				specs["constarrayidx"]	= de::toString(constIdx);
5260 				if (useConstIdx)
5261 					specs["arrayindex"] = "c_i32_ci";
5262 				else
5263 					specs["arrayindex"] = "30";
5264 
5265 				if (useConstIdx)
5266 					testName += string("_const_idx_") + de::toString(constIdx);
5267 
5268 				resources.outputs.clear();
5269 				resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : sOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5270 
5271 				fragments["testfun"]	= testFun.specialize(specs);
5272 				fragments["pre_main"]	= preMain.specialize(specs);
5273 				fragments["decoration"]	= decoration.specialize(specs);
5274 
5275 				createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
5276 			}
5277 		}
5278 		{  // unsigned int
5279 			map<string, string>		specs;
5280 
5281 			specs["base_type16"]	= "u16";
5282 			specs["type16"]			= "v2u16";
5283 			specs["type32"]			= "v2u32";
5284 			specs["signed"]			= "0";
5285 			specs["count"]			= "32";
5286 			specs["convert"]		= "OpUConvert";
5287 
5288 			for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5289 			{
5290 				bool			useConstIdx		= constantIndices[constIndexIdx].useConstantIndex;
5291 				deUint32		constIdx		= constantIndices[constIndexIdx].constantIndex;
5292 				string			testName		= "uint_vector";
5293 				vector<deInt32>	constIdxData;
5294 
5295 				if (useConstIdx)
5296 				{
5297 					constIdxData.reserve(numDataPoints);
5298 
5299 					for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
5300 						constIdxData.push_back(uOutputs[constIdx * 2 + numIdx % 2]);
5301 				}
5302 
5303 				specs["constarrayidx"]	= de::toString(constIdx);
5304 				if (useConstIdx)
5305 					specs["arrayindex"] = "c_i32_ci";
5306 				else
5307 					specs["arrayindex"] = "30";
5308 
5309 				if (useConstIdx)
5310 					testName += string("_const_idx_") + de::toString(constIdx);
5311 
5312 				resources.outputs.clear();
5313 				resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : uOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5314 
5315 				fragments["testfun"]	= testFun.specialize(specs);
5316 				fragments["pre_main"]	= preMain.specialize(specs);
5317 				fragments["decoration"]	= decoration.specialize(specs);
5318 
5319 				createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
5320 			}
5321 		}
5322 	}
5323 }
5324 
addGraphics16BitStorageUniformInt16To32Group(tcu::TestCaseGroup * testGroup)5325 void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup)
5326 {
5327 	de::Random							rnd					(deStringHash(testGroup->getName()));
5328 	map<string, string>					fragments;
5329 	const deUint32						numDataPoints		= 256;
5330 	RGBA								defaultColors[4];
5331 	vector<deInt16>						inputs				= getInt16s(rnd, numDataPoints);
5332 	vector<deInt32>						sOutputs;
5333 	vector<deInt32>						uOutputs;
5334 	vector<string>						extensions;
5335 	const deUint16						signBitMask			= 0x8000;
5336 	const deUint32						signExtendMask		= 0xffff0000;
5337 	const StringTemplate				capabilities		("OpCapability ${cap}\n");
5338 
5339 	sOutputs.reserve(inputs.size());
5340 	uOutputs.reserve(inputs.size());
5341 
5342 	for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
5343 	{
5344 		uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
5345 		if (inputs[numNdx] & signBitMask)
5346 			sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
5347 		else
5348 			sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
5349 	}
5350 
5351 	extensions.push_back("VK_KHR_16bit_storage");
5352 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
5353 
5354 	getDefaultColors(defaultColors);
5355 
5356 	struct IntegerFacts
5357 	{
5358 		const char*	name;
5359 		const char*	type32;
5360 		const char*	type16;
5361 		const char* opcode;
5362 		bool		isSigned;
5363 	};
5364 
5365 	const IntegerFacts	intFacts[]	=
5366 	{
5367 		{"sint",	"%i32",		"%i16",		"OpSConvert",	true},
5368 		{"uint",	"%u32",		"%u16",		"OpUConvert",	false},
5369 	};
5370 
5371 	struct ConstantIndex
5372 	{
5373 		bool		useConstantIndex;
5374 		deUint32	constantIndex;
5375 	};
5376 
5377 	ConstantIndex	constantIndices[] =
5378 	{
5379 		{ false,	0 },
5380 		{ true,		4 },
5381 		{ true,		5 },
5382 		{ true,		6 }
5383 	};
5384 
5385 	const StringTemplate scalarPreMain		(
5386 			"${itype16} = OpTypeInt 16 ${signed}\n"
5387 			"%c_i32_256 = OpConstant %i32 256\n"
5388 			"%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
5389 			"   %up_i32 = OpTypePointer Uniform ${itype32}\n"
5390 			"   %up_i16 = OpTypePointer Uniform ${itype16}\n"
5391 			"   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
5392 			"   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
5393 			"   %SSBO32 = OpTypeStruct %ra_i32\n"
5394 			"   %SSBO16 = OpTypeStruct %ra_i16\n"
5395 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5396 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5397 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5398 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5399 
5400 	const StringTemplate scalarDecoration		(
5401 			"OpDecorate %ra_i32 ArrayStride 4\n"
5402 			"OpDecorate %ra_i16 ArrayStride ${arraystride}\n"
5403 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
5404 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
5405 			"OpDecorate %SSBO32 BufferBlock\n"
5406 			"OpDecorate %SSBO16 ${indecor}\n"
5407 			"OpDecorate %ssbo32 DescriptorSet 0\n"
5408 			"OpDecorate %ssbo16 DescriptorSet 0\n"
5409 			"OpDecorate %ssbo32 Binding 1\n"
5410 			"OpDecorate %ssbo16 Binding 0\n");
5411 
5412 	const StringTemplate scalarTestFunc	(
5413 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5414 			"    %param = OpFunctionParameter %v4f32\n"
5415 
5416 			"%entry = OpLabel\n"
5417 			"    %i = OpVariable %fp_i32 Function\n"
5418 			"         OpStore %i %c_i32_0\n"
5419 			"         OpBranch %loop\n"
5420 
5421 			" %loop = OpLabel\n"
5422 			"   %15 = OpLoad %i32 %i\n"
5423 			"   %lt = OpSLessThan %bool %15 %c_i32_256\n"
5424 			"         OpLoopMerge %merge %inc None\n"
5425 			"         OpBranchConditional %lt %write %merge\n"
5426 
5427 			"%write = OpLabel\n"
5428 			"   %30 = OpLoad %i32 %i\n"
5429 			"  %src = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5430 			"%val16 = OpLoad ${itype16} %src\n"
5431 			"%val32 = ${convert} ${itype32} %val16\n"
5432 			"  %dst = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
5433 			"         OpStore %dst %val32\n"
5434 			"         OpBranch %inc\n"
5435 
5436 			"  %inc = OpLabel\n"
5437 			"   %37 = OpLoad %i32 %i\n"
5438 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5439 			"         OpStore %i %39\n"
5440 			"         OpBranch %loop\n"
5441 			"%merge = OpLabel\n"
5442 			"         OpReturnValue %param\n"
5443 
5444 			"OpFunctionEnd\n");
5445 
5446 	const StringTemplate vecPreMain		(
5447 			"${itype16} = OpTypeInt 16 ${signed}\n"
5448 			"%c_i32_128 = OpConstant %i32 128\n"
5449 			"%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
5450 			"%v2itype16 = OpTypeVector ${itype16} 2\n"
5451 			" %up_v2i32 = OpTypePointer Uniform ${v2itype32}\n"
5452 			" %up_v2i16 = OpTypePointer Uniform %v2itype16\n"
5453 			" %ra_v2i32 = OpTypeArray ${v2itype32} %c_i32_128\n"
5454 			" %ra_v2i16 = OpTypeArray %v2itype16 %c_i32_128\n"
5455 			"   %SSBO32 = OpTypeStruct %ra_v2i32\n"
5456 			"   %SSBO16 = OpTypeStruct %ra_v2i16\n"
5457 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5458 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5459 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5460 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5461 
5462 	const StringTemplate vecDecoration		(
5463 			"OpDecorate %ra_v2i32 ArrayStride 8\n"
5464 			"OpDecorate %ra_v2i16 ArrayStride ${arraystride}\n"
5465 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
5466 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
5467 			"OpDecorate %SSBO32 BufferBlock\n"
5468 			"OpDecorate %SSBO16 ${indecor}\n"
5469 			"OpDecorate %ssbo32 DescriptorSet 0\n"
5470 			"OpDecorate %ssbo16 DescriptorSet 0\n"
5471 			"OpDecorate %ssbo32 Binding 1\n"
5472 			"OpDecorate %ssbo16 Binding 0\n");
5473 
5474 	const StringTemplate vecTestFunc		(
5475 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5476 			"    %param = OpFunctionParameter %v4f32\n"
5477 
5478 			"%entry = OpLabel\n"
5479 			"    %i = OpVariable %fp_i32 Function\n"
5480 			"         OpStore %i %c_i32_0\n"
5481 			"         OpBranch %loop\n"
5482 
5483 			" %loop = OpLabel\n"
5484 			"   %15 = OpLoad %i32 %i\n"
5485 			"   %lt = OpSLessThan %bool %15 %c_i32_128\n"
5486 			"         OpLoopMerge %merge %inc None\n"
5487 			"         OpBranchConditional %lt %write %merge\n"
5488 
5489 			"%write = OpLabel\n"
5490 			"   %30 = OpLoad %i32 %i\n"
5491 			"  %src = OpAccessChain %up_v2i16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5492 			"%val16 = OpLoad %v2itype16 %src\n"
5493 			"%val32 = ${convert} ${v2itype32} %val16\n"
5494 			"  %dst = OpAccessChain %up_v2i32 %ssbo32 %c_i32_0 %30\n"
5495 			"         OpStore %dst %val32\n"
5496 			"         OpBranch %inc\n"
5497 
5498 			"  %inc = OpLabel\n"
5499 			"   %37 = OpLoad %i32 %i\n"
5500 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5501 			"         OpStore %i %39\n"
5502 			"         OpBranch %loop\n"
5503 			"%merge = OpLabel\n"
5504 			"         OpReturnValue %param\n"
5505 
5506 			"OpFunctionEnd\n");
5507 
5508 	struct Category
5509 	{
5510 		const char*				name;
5511 		const StringTemplate&	preMain;
5512 		const StringTemplate&	decoration;
5513 		const StringTemplate&	testFunction;
5514 		const deUint32			numElements;
5515 	};
5516 
5517 	const Category		categories[]		=
5518 	{
5519 		{"scalar",	scalarPreMain,	scalarDecoration,	scalarTestFunc,	1},
5520 		{"vector",	vecPreMain,		vecDecoration,		vecTestFunc,	2},
5521 	};
5522 
5523 	const deUint32		minArrayStride[]	= {2, 16};
5524 
5525 	for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
5526 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5527 			for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
5528 				for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5529 				{
5530 					bool				useConstIdx		= constantIndices[constIndexIdx].useConstantIndex;
5531 					deUint32			constIdx		= constantIndices[constIndexIdx].constantIndex;
5532 					map<string, string>	specs;
5533 					string				name			= string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
5534 					const deUint32		numElements		= categories[catIdx].numElements;
5535 					const deUint32		arrayStride		= de::max(numElements * 2, minArrayStride[capIdx]);
5536 
5537 					specs["cap"]						= CAPABILITIES[capIdx].cap;
5538 					specs["indecor"]					= CAPABILITIES[capIdx].decor;
5539 					specs["arraystride"]				= de::toString(arrayStride);
5540 					specs["itype32"]					= intFacts[factIdx].type32;
5541 					specs["v2itype32"]					= "%v2" + string(intFacts[factIdx].type32).substr(1);
5542 					specs["v3itype32"]					= "%v3" + string(intFacts[factIdx].type32).substr(1);
5543 					specs["itype16"]					= intFacts[factIdx].type16;
5544 					if (intFacts[factIdx].isSigned)
5545 						specs["signed"]					= "1";
5546 					else
5547 						specs["signed"]					= "0";
5548 					specs["convert"]					= intFacts[factIdx].opcode;
5549 					specs["constarrayidx"]				= de::toString(constIdx);
5550 					if (useConstIdx)
5551 						specs["arrayindex"] = "c_i32_ci";
5552 					else
5553 						specs["arrayindex"] = "30";
5554 
5555 					fragments["pre_main"]				= categories[catIdx].preMain.specialize(specs);
5556 					fragments["testfun"]				= categories[catIdx].testFunction.specialize(specs);
5557 					fragments["capability"]				= capabilities.specialize(specs);
5558 					fragments["decoration"]				= categories[catIdx].decoration.specialize(specs);
5559 
5560 					GraphicsResources	resources;
5561 					vector<deInt16>		inputsPadded;
5562 					VulkanFeatures		features;
5563 
5564 					for (size_t dataIdx = 0; dataIdx < inputs.size() / numElements; ++dataIdx)
5565 					{
5566 						for (deUint32 elementIdx = 0; elementIdx < numElements; ++elementIdx)
5567 							inputsPadded.push_back(inputs[dataIdx * numElements + elementIdx]);
5568 						for (deUint32 padIdx = 0; padIdx < arrayStride / 2 - numElements; ++padIdx)
5569 							inputsPadded.push_back(0);
5570 					}
5571 
5572 					resources.inputs.push_back(Resource(BufferSp(new Int16Buffer(inputsPadded)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5573 
5574 					vector<deInt32>		constIdxOutputs;
5575 					if (useConstIdx)
5576 					{
5577 						name += string("_const_idx_") + de::toString(constIdx);
5578 						for (deUint32 i = 0; i < numDataPoints; i++)
5579 						{
5580 							deUint32 idx = constIdx * numElements + i % numElements;
5581 							constIdxOutputs.push_back(intFacts[factIdx].isSigned ? sOutputs[idx] : uOutputs[idx]);
5582 						}
5583 					}
5584 
5585 					resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5586 					resources.outputs.clear();
5587 					if (useConstIdx)
5588 						resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(constIdxOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5589 					else if (intFacts[factIdx].isSigned)
5590 						resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(sOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5591 					else
5592 						resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(uOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5593 
5594 					features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5595 					features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
5596 					features.coreFeatures.fragmentStoresAndAtomics			= true;
5597 
5598 					createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
5599 				}
5600 }
5601 
addGraphics16BitStorageUniformFloat16To32Group(tcu::TestCaseGroup * testGroup)5602 void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGroup)
5603 {
5604 	de::Random							rnd					(deStringHash(testGroup->getName()));
5605 	map<string, string>					fragments;
5606 	vector<string>						extensions;
5607 	const deUint32						numDataPoints		= 256;
5608 	RGBA								defaultColors[4];
5609 	const StringTemplate				capabilities		("OpCapability ${cap}\n");
5610 	vector<deFloat16>					float16Data			= getFloat16s(rnd, numDataPoints);
5611 
5612 	struct ConstantIndex
5613 	{
5614 		bool		useConstantIndex;
5615 		deUint32	constantIndex;
5616 	};
5617 
5618 	ConstantIndex	constantIndices[] =
5619 	{
5620 		{ false,	0 },
5621 		{ true,		4 },
5622 		{ true,		5 },
5623 		{ true,		6 }
5624 	};
5625 
5626 	extensions.push_back("VK_KHR_16bit_storage");
5627 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
5628 
5629 	getDefaultColors(defaultColors);
5630 
5631 	{ // scalar cases
5632 		const StringTemplate preMain		(
5633 			"      %f16 = OpTypeFloat 16\n"
5634 			"%c_i32_256 = OpConstant %i32 256\n"
5635 			" %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
5636 			"   %up_f32 = OpTypePointer Uniform %f32\n"
5637 			"   %up_f16 = OpTypePointer Uniform %f16\n"
5638 			"   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
5639 			"   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
5640 			"   %SSBO32 = OpTypeStruct %ra_f32\n"
5641 			"   %SSBO16 = OpTypeStruct %ra_f16\n"
5642 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5643 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5644 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5645 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5646 
5647 		const StringTemplate decoration		(
5648 			"OpDecorate %ra_f32 ArrayStride 4\n"
5649 			"OpDecorate %ra_f16 ArrayStride ${arraystride}\n"
5650 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
5651 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
5652 			"OpDecorate %SSBO32 BufferBlock\n"
5653 			"OpDecorate %SSBO16 ${indecor}\n"
5654 			"OpDecorate %ssbo32 DescriptorSet 0\n"
5655 			"OpDecorate %ssbo16 DescriptorSet 0\n"
5656 			"OpDecorate %ssbo32 Binding 1\n"
5657 			"OpDecorate %ssbo16 Binding 0\n");
5658 
5659 		// ssbo32[] <- convert ssbo16[] to 32bit float
5660 		const StringTemplate testFun		(
5661 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5662 			"    %param = OpFunctionParameter %v4f32\n"
5663 
5664 			"%entry = OpLabel\n"
5665 			"    %i = OpVariable %fp_i32 Function\n"
5666 			"         OpStore %i %c_i32_0\n"
5667 			"         OpBranch %loop\n"
5668 
5669 			" %loop = OpLabel\n"
5670 			"   %15 = OpLoad %i32 %i\n"
5671 			"   %lt = OpSLessThan %bool %15 %c_i32_256\n"
5672 			"         OpLoopMerge %merge %inc None\n"
5673 			"         OpBranchConditional %lt %write %merge\n"
5674 
5675 			"%write = OpLabel\n"
5676 			"   %30 = OpLoad %i32 %i\n"
5677 			"  %src = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5678 			"%val16 = OpLoad %f16 %src\n"
5679 			"%val32 = OpFConvert %f32 %val16\n"
5680 			"  %dst = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
5681 			"         OpStore %dst %val32\n"
5682 			"         OpBranch %inc\n"
5683 
5684 			"  %inc = OpLabel\n"
5685 			"   %37 = OpLoad %i32 %i\n"
5686 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5687 			"         OpStore %i %39\n"
5688 			"         OpBranch %loop\n"
5689 
5690 			"%merge = OpLabel\n"
5691 			"         OpReturnValue %param\n"
5692 
5693 			"OpFunctionEnd\n");
5694 
5695 		const deUint32	arrayStrides[]		= {2, 16};
5696 
5697 		for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5698 		{
5699 			for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5700 			{
5701 				GraphicsResources	resources;
5702 				map<string, string>	specs;
5703 				VulkanFeatures		features;
5704 				string				testName	= string(CAPABILITIES[capIdx].name) + "_scalar_float";
5705 				bool				useConstIdx	= constantIndices[constIndexIdx].useConstantIndex;
5706 				deUint32			constIdx	= constantIndices[constIndexIdx].constantIndex;
5707 
5708 				specs["cap"]					= CAPABILITIES[capIdx].cap;
5709 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
5710 				specs["arraystride"]			= de::toString(arrayStrides[capIdx]);
5711 				specs["constarrayidx"]			= de::toString(constIdx);
5712 				if (useConstIdx)
5713 					specs["arrayindex"] = "c_i32_ci";
5714 				else
5715 					specs["arrayindex"] = "30";
5716 
5717 				fragments["capability"]			= capabilities.specialize(specs);
5718 				fragments["decoration"]			= decoration.specialize(specs);
5719 				fragments["pre_main"]			= preMain.specialize(specs);
5720 				fragments["testfun"]			= testFun.specialize(specs);
5721 
5722 				vector<deFloat16>	inputData;
5723 				for (size_t dataIdx = 0; dataIdx < float16Data.size(); ++dataIdx)
5724 				{
5725 					inputData.push_back(float16Data[dataIdx]);
5726 					for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 1; ++padIdx)
5727 						inputData.push_back(deFloat16(0.0f));
5728 				}
5729 
5730 				vector<float>		float32Data;
5731 				float32Data.reserve(numDataPoints);
5732 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
5733 					float32Data.push_back(deFloat16To32(float16Data[useConstIdx ? constIdx : numIdx]));
5734 
5735 				resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5736 				resources.outputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5737 				resources.verifyIO = check32BitFloats;
5738 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5739 
5740 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5741 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
5742 				features.coreFeatures.fragmentStoresAndAtomics			= true;
5743 
5744 				if (useConstIdx)
5745 					testName += string("_const_idx_") + de::toString(constIdx);
5746 
5747 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
5748 			}
5749 		}
5750 	}
5751 
5752 	{ // vector cases
5753 		const StringTemplate preMain		(
5754 			"      %f16 = OpTypeFloat 16\n"
5755 			"%c_i32_128 = OpConstant %i32 128\n"
5756 			"%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
5757 			"	 %v2f16 = OpTypeVector %f16 2\n"
5758 			" %up_v2f32 = OpTypePointer Uniform %v2f32\n"
5759 			" %up_v2f16 = OpTypePointer Uniform %v2f16\n"
5760 			" %ra_v2f32 = OpTypeArray %v2f32 %c_i32_128\n"
5761 			" %ra_v2f16 = OpTypeArray %v2f16 %c_i32_128\n"
5762 			"   %SSBO32 = OpTypeStruct %ra_v2f32\n"
5763 			"   %SSBO16 = OpTypeStruct %ra_v2f16\n"
5764 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5765 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5766 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5767 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5768 
5769 		const StringTemplate decoration		(
5770 			"OpDecorate %ra_v2f32 ArrayStride 8\n"
5771 			"OpDecorate %ra_v2f16 ArrayStride ${arraystride}\n"
5772 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
5773 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
5774 			"OpDecorate %SSBO32 BufferBlock\n"
5775 			"OpDecorate %SSBO16 ${indecor}\n"
5776 			"OpDecorate %ssbo32 DescriptorSet 0\n"
5777 			"OpDecorate %ssbo16 DescriptorSet 0\n"
5778 			"OpDecorate %ssbo32 Binding 1\n"
5779 			"OpDecorate %ssbo16 Binding 0\n");
5780 
5781 		// ssbo32[] <- convert ssbo16[] to 32bit float
5782 		const StringTemplate testFun		(
5783 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5784 			"    %param = OpFunctionParameter %v4f32\n"
5785 
5786 			"%entry = OpLabel\n"
5787 			"    %i = OpVariable %fp_i32 Function\n"
5788 			"         OpStore %i %c_i32_0\n"
5789 			"         OpBranch %loop\n"
5790 
5791 			" %loop = OpLabel\n"
5792 			"   %15 = OpLoad %i32 %i\n"
5793 			"   %lt = OpSLessThan %bool %15 %c_i32_128\n"
5794 			"         OpLoopMerge %merge %inc None\n"
5795 			"         OpBranchConditional %lt %write %merge\n"
5796 
5797 			"%write = OpLabel\n"
5798 			"   %30 = OpLoad %i32 %i\n"
5799 			"  %src = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5800 			"%val16 = OpLoad %v2f16 %src\n"
5801 			"%val32 = OpFConvert %v2f32 %val16\n"
5802 			"  %dst = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30\n"
5803 			"         OpStore %dst %val32\n"
5804 			"         OpBranch %inc\n"
5805 
5806 			"  %inc = OpLabel\n"
5807 			"   %37 = OpLoad %i32 %i\n"
5808 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5809 			"         OpStore %i %39\n"
5810 			"         OpBranch %loop\n"
5811 
5812 			"%merge = OpLabel\n"
5813 			"         OpReturnValue %param\n"
5814 
5815 			"OpFunctionEnd\n");
5816 
5817 		const deUint32	arrayStrides[]		= {4, 16};
5818 
5819 		for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5820 		{
5821 			for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5822 			{
5823 				GraphicsResources	resources;
5824 				map<string, string>	specs;
5825 				VulkanFeatures		features;
5826 				string				testName	= string(CAPABILITIES[capIdx].name) + "_vector_float";
5827 				bool				useConstIdx	= constantIndices[constIndexIdx].useConstantIndex;
5828 				deUint32			constIdx	= constantIndices[constIndexIdx].constantIndex;
5829 
5830 				specs["cap"]					= CAPABILITIES[capIdx].cap;
5831 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
5832 				specs["arraystride"]			= de::toString(arrayStrides[capIdx]);
5833 				specs["constarrayidx"]			= de::toString(constIdx);
5834 				if (useConstIdx)
5835 					specs["arrayindex"] = "c_i32_ci";
5836 				else
5837 					specs["arrayindex"] = "30";
5838 
5839 				fragments["capability"]			= capabilities.specialize(specs);
5840 				fragments["decoration"]			= decoration.specialize(specs);
5841 				fragments["pre_main"]			= preMain.specialize(specs);
5842 				fragments["testfun"]			= testFun.specialize(specs);
5843 
5844 				vector<deFloat16>	inputData;
5845 				for (size_t dataIdx = 0; dataIdx < float16Data.size() / 2; ++dataIdx)
5846 				{
5847 					inputData.push_back(float16Data[dataIdx * 2]);
5848 					inputData.push_back(float16Data[dataIdx * 2 + 1]);
5849 					for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 2; ++padIdx)
5850 						inputData.push_back(deFloat16(0.0f));
5851 				}
5852 
5853 				vector<float>		float32Data;
5854 				float32Data.reserve(numDataPoints);
5855 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
5856 					float32Data.push_back(deFloat16To32(float16Data[constantIndices[constIndexIdx].useConstantIndex ? (constantIndices[constIndexIdx].constantIndex * 2 + numIdx % 2) : numIdx]));
5857 
5858 				resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5859 				resources.outputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5860 				resources.verifyIO = check32BitFloats;
5861 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5862 
5863 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5864 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
5865 				features.coreFeatures.fragmentStoresAndAtomics			= true;
5866 
5867 				if (constantIndices[constIndexIdx].useConstantIndex)
5868 					testName += string("_const_idx_") + de::toString(constantIndices[constIndexIdx].constantIndex);
5869 
5870 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
5871 			}
5872 		}
5873 	}
5874 
5875 	{ // matrix cases
5876 		fragments["pre_main"]				=
5877 			" %c_i32_32 = OpConstant %i32 32\n"
5878 			"      %f16 = OpTypeFloat 16\n"
5879 			"    %v2f16 = OpTypeVector %f16 2\n"
5880 			"  %m4x2f32 = OpTypeMatrix %v2f32 4\n"
5881 			"  %m4x2f16 = OpTypeMatrix %v2f16 4\n"
5882 			" %up_v2f32 = OpTypePointer Uniform %v2f32\n"
5883 			" %up_v2f16 = OpTypePointer Uniform %v2f16\n"
5884 			"%a8m4x2f32 = OpTypeArray %m4x2f32 %c_i32_32\n"
5885 			"%a8m4x2f16 = OpTypeArray %m4x2f16 %c_i32_32\n"
5886 			"   %SSBO32 = OpTypeStruct %a8m4x2f32\n"
5887 			"   %SSBO16 = OpTypeStruct %a8m4x2f16\n"
5888 			"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5889 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5890 			"   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5891 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
5892 
5893 		const StringTemplate decoration		(
5894 			"OpDecorate %a8m4x2f32 ArrayStride 32\n"
5895 			"OpDecorate %a8m4x2f16 ArrayStride 16\n"
5896 			"OpMemberDecorate %SSBO32 0 Offset 0\n"
5897 			"OpMemberDecorate %SSBO32 0 ColMajor\n"
5898 			"OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
5899 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
5900 			"OpMemberDecorate %SSBO16 0 ColMajor\n"
5901 			"OpMemberDecorate %SSBO16 0 MatrixStride 4\n"
5902 			"OpDecorate %SSBO32 BufferBlock\n"
5903 			"OpDecorate %SSBO16 ${indecor}\n"
5904 			"OpDecorate %ssbo32 DescriptorSet 0\n"
5905 			"OpDecorate %ssbo16 DescriptorSet 0\n"
5906 			"OpDecorate %ssbo32 Binding 1\n"
5907 			"OpDecorate %ssbo16 Binding 0\n");
5908 
5909 		fragments["testfun"]				=
5910 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5911 			"    %param = OpFunctionParameter %v4f32\n"
5912 
5913 			"%entry = OpLabel\n"
5914 			"    %i = OpVariable %fp_i32 Function\n"
5915 			"         OpStore %i %c_i32_0\n"
5916 			"         OpBranch %loop\n"
5917 
5918 			" %loop = OpLabel\n"
5919 			"   %15 = OpLoad %i32 %i\n"
5920 			"   %lt = OpSLessThan %bool %15 %c_i32_32\n"
5921 			"         OpLoopMerge %merge %inc None\n"
5922 			"         OpBranchConditional %lt %write %merge\n"
5923 
5924 			"  %write = OpLabel\n"
5925 			"     %30 = OpLoad %i32 %i\n"
5926 			"  %src_0 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
5927 			"  %src_1 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
5928 			"  %src_2 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
5929 			"  %src_3 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
5930 			"%val16_0 = OpLoad %v2f16 %src_0\n"
5931 			"%val16_1 = OpLoad %v2f16 %src_1\n"
5932 			"%val16_2 = OpLoad %v2f16 %src_2\n"
5933 			"%val16_3 = OpLoad %v2f16 %src_3\n"
5934 			"%val32_0 = OpFConvert %v2f32 %val16_0\n"
5935 			"%val32_1 = OpFConvert %v2f32 %val16_1\n"
5936 			"%val32_2 = OpFConvert %v2f32 %val16_2\n"
5937 			"%val32_3 = OpFConvert %v2f32 %val16_3\n"
5938 			"  %dst_0 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
5939 			"  %dst_1 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
5940 			"  %dst_2 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
5941 			"  %dst_3 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
5942 			"           OpStore %dst_0 %val32_0\n"
5943 			"           OpStore %dst_1 %val32_1\n"
5944 			"           OpStore %dst_2 %val32_2\n"
5945 			"           OpStore %dst_3 %val32_3\n"
5946 			"           OpBranch %inc\n"
5947 
5948 			"  %inc = OpLabel\n"
5949 			"   %37 = OpLoad %i32 %i\n"
5950 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5951 			"         OpStore %i %39\n"
5952 			"         OpBranch %loop\n"
5953 
5954 			"%merge = OpLabel\n"
5955 			"         OpReturnValue %param\n"
5956 
5957 			"OpFunctionEnd\n";
5958 
5959 			for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5960 			{
5961 				GraphicsResources	resources;
5962 				map<string, string>	specs;
5963 				VulkanFeatures		features;
5964 				string				testName	= string(CAPABILITIES[capIdx].name) + "_matrix_float";
5965 
5966 				specs["cap"]					= CAPABILITIES[capIdx].cap;
5967 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
5968 
5969 				fragments["capability"]			= capabilities.specialize(specs);
5970 				fragments["decoration"]			= decoration.specialize(specs);
5971 
5972 				vector<float>		float32Data;
5973 				float32Data.reserve(numDataPoints);
5974 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
5975 					float32Data.push_back(deFloat16To32(float16Data[numIdx]));
5976 
5977 				resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5978 				resources.outputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5979 				resources.verifyIO = check32BitFloats;
5980 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5981 
5982 				features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5983 				features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
5984 				features.coreFeatures.fragmentStoresAndAtomics			= true;
5985 
5986 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
5987 		}
5988 	}
5989 }
5990 
addGraphics16BitStorageUniformStructFloat16To32Group(tcu::TestCaseGroup * testGroup)5991 void addGraphics16BitStorageUniformStructFloat16To32Group (tcu::TestCaseGroup* testGroup)
5992 {
5993 	de::Random							rnd					(deStringHash(testGroup->getName()));
5994 	map<string, string>					fragments;
5995 	vector<string>						extensions;
5996 	RGBA								defaultColors[4];
5997 	const StringTemplate				capabilities		("OpCapability ${cap}\n");
5998 	vector<float>						float32Data			(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430), 0.0f);
5999 
6000 	extensions.push_back("VK_KHR_16bit_storage");
6001 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
6002 
6003 	getDefaultColors(defaultColors);
6004 
6005 	const StringTemplate preMain		(
6006 		"\n"
6007 		"${types}\n"
6008 		"\n"
6009 		"%zero = OpConstant %i32 0\n"
6010 		"%c_i32_5 = OpConstant %i32 5\n"
6011 		"%c_i32_6 = OpConstant %i32 6\n"
6012 		"%c_i32_7 = OpConstant %i32 7\n"
6013 		"%c_i32_8 = OpConstant %i32 8\n"
6014 		"%c_i32_9 = OpConstant %i32 9\n"
6015 		"%c_i32_11 = OpConstant %i32 11\n"
6016 		"\n"
6017 		"%c_u32_7 = OpConstant %u32 7\n"
6018 		"%c_u32_11 = OpConstant %u32 11\n"
6019 		"\n"
6020 		"%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
6021 		"%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
6022 		"%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
6023 		"%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
6024 		"%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
6025 		"%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
6026 		"%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
6027 		"%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 %v2f16arr11 %f16 %v3f16arr11 %v4f16arr3\n"
6028 		"\n"
6029 		"%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
6030 		"%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
6031 		"%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
6032 		"%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
6033 		"%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
6034 		"%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
6035 		"%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
6036 		"%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 %v2f32arr11 %f32 %v3f32arr11 %v4f32arr3\n"
6037 		"\n"
6038 		"%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
6039 		"%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
6040 		"%SSBO_IN            = OpTypeStruct %f16StructArr7\n"
6041 		"%SSBO_OUT           = OpTypeStruct %f32StructArr7\n"
6042 		"%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
6043 		"%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
6044 		"%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
6045 		"%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
6046 		"\n");
6047 
6048 	const StringTemplate decoration		(
6049 		"${strideF16}"
6050 		"\n"
6051 		"${strideF32}"
6052 		"\n"
6053 		"OpMemberDecorate %SSBO_IN 0 Offset 0\n"
6054 		"OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
6055 		"OpDecorate %SSBO_IN ${indecor}\n"
6056 		"OpDecorate %SSBO_OUT BufferBlock\n"
6057 		"OpDecorate %ssboIN DescriptorSet 0\n"
6058 		"OpDecorate %ssboOUT DescriptorSet 0\n"
6059 		"OpDecorate %ssboIN Binding 0\n"
6060 		"OpDecorate %ssboOUT Binding 1\n"
6061 		"\n");
6062 
6063 	fragments["testfun"]			=
6064 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6065 		"    %param = OpFunctionParameter %v4f32\n"
6066 		"%label     = OpLabel\n"
6067 		"%loopNdx    = OpVariable %fp_i32 Function\n"
6068 		"%insideLoopNdx = OpVariable %fp_i32 Function\n"
6069 
6070 		"OpStore %loopNdx %zero\n"
6071 		"OpBranch %loop\n"
6072 		"%loop = OpLabel\n"
6073 		"OpLoopMerge %merge %13 None\n"
6074 		"OpBranch %14\n"
6075 		"%14 = OpLabel\n"
6076 		"%valLoopNdx = OpLoad %i32 %loopNdx\n"
6077 		"%18 = OpSLessThan %bool %valLoopNdx %c_i32_7\n"
6078 		"OpBranchConditional %18 %11 %merge\n"
6079 		"%11 = OpLabel\n"
6080 		"\n"
6081 		"%f16src  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %zero\n"
6082 		"%val_f16 = OpLoad %f16 %f16src\n"
6083 		"%val_f32 = OpFConvert %f32 %val_f16\n"
6084 		"%f32dst  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %zero\n"
6085 		"OpStore %f32dst %val_f32\n"
6086 		"\n"
6087 		"%v2f16src  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_1\n"
6088 		"%val_v2f16 = OpLoad %v2f16 %v2f16src\n"
6089 		"%val_v2f32 = OpFConvert %v2f32 %val_v2f16\n"
6090 		"%v2f32dst  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_1\n"
6091 		"OpStore %v2f32dst %val_v2f32\n"
6092 		"\n"
6093 		"%v3f16src  = OpAccessChain %v3f16ptr %ssboIN %zero %valLoopNdx %c_i32_2\n"
6094 		"%val_v3f16 = OpLoad %v3f16 %v3f16src\n"
6095 		"%val_v3f32 = OpFConvert %v3f32 %val_v3f16\n"
6096 		"%v3f32dst  = OpAccessChain %v3f32ptr %ssboOUT %zero %valLoopNdx %c_i32_2\n"
6097 		"OpStore %v3f32dst %val_v3f32\n"
6098 		"\n"
6099 		"%v4f16src  = OpAccessChain %v4f16ptr %ssboIN %zero %valLoopNdx %c_i32_3\n"
6100 		"%val_v4f16 = OpLoad %v4f16 %v4f16src\n"
6101 		"%val_v4f32 = OpFConvert %v4f32 %val_v4f16\n"
6102 		"%v4f32dst  = OpAccessChain %v4f32ptr %ssboOUT %zero %valLoopNdx %c_i32_3\n"
6103 		"OpStore %v4f32dst %val_v4f32\n"
6104 		"\n"
6105 		"%f16src2  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %c_i32_7\n"
6106 		"%val2_f16 = OpLoad %f16 %f16src2\n"
6107 		"%val2_f32 = OpFConvert %f32 %val2_f16\n"
6108 		"%f32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %c_i32_7\n"
6109 		"OpStore %f32dst2 %val2_f32\n"
6110 		"\n"
6111 		"OpStore %insideLoopNdx %zero\n"
6112 		"OpBranch %loopInside\n"
6113 		"%loopInside = OpLabel\n"
6114 		"OpLoopMerge %92 %93 None\n"
6115 		"OpBranch %94\n"
6116 		"%94 = OpLabel\n"
6117 		"%valInsideLoopNdx = OpLoad %i32 %insideLoopNdx\n"
6118 		"%96 = OpSLessThan %bool %valInsideLoopNdx %c_i32_11\n"
6119 		"OpBranchConditional %96 %91 %92\n"
6120 		"\n"
6121 		"%91 = OpLabel\n"
6122 		"\n"
6123 		"%v2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6124 		"%val2_v2f16 = OpLoad %v2f16 %v2f16src2\n"
6125 		"%val2_v2f32 = OpFConvert %v2f32 %val2_v2f16\n"
6126 		"%v2f32dst2  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6127 		"OpStore %v2f32dst2 %val2_v2f32\n"
6128 		"\n"
6129 		"%v3f16src2  = OpAccessChain %v3f16ptr %ssboIN %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6130 		"%val2_v3f16 = OpLoad %v3f16 %v3f16src2\n"
6131 		"%val2_v3f32 = OpFConvert %v3f32 %val2_v3f16\n"
6132 		"%v3f32dst2  = OpAccessChain %v3f32ptr %ssboOUT %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6133 		"OpStore %v3f32dst2 %val2_v3f32\n"
6134 		"\n"
6135 		//struct {f16, v2f16[3]}
6136 		"%Sf16src  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6137 		"%Sval_f16 = OpLoad %f16 %Sf16src\n"
6138 		"%Sval_f32 = OpFConvert %f32 %Sval_f16\n"
6139 		"%Sf32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6140 		"OpStore %Sf32dst2 %Sval_f32\n"
6141 		"\n"
6142 		"%Sv2f16src0   = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6143 		"%Sv2f16_0     = OpLoad %v2f16 %Sv2f16src0\n"
6144 		"%Sv2f32_0     = OpFConvert %v2f32 %Sv2f16_0\n"
6145 		"%Sv2f32dst_0  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6146 		"OpStore %Sv2f32dst_0 %Sv2f32_0\n"
6147 		"\n"
6148 		"%Sv2f16src1  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_1\n"
6149 		"%Sv2f16_1 = OpLoad %v2f16 %Sv2f16src1\n"
6150 		"%Sv2f32_1 = OpFConvert %v2f32 %Sv2f16_1\n"
6151 		"%Sv2f32dst_1  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_1\n"
6152 		"OpStore %Sv2f32dst_1 %Sv2f32_1\n"
6153 		"\n"
6154 		"%Sv2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_2\n"
6155 		"%Sv2f16_2 = OpLoad %v2f16 %Sv2f16src2\n"
6156 		"%Sv2f32_2 = OpFConvert %v2f32 %Sv2f16_2\n"
6157 		"%Sv2f32dst_2  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_2\n"
6158 		"OpStore %Sv2f32dst_2 %Sv2f32_2\n"
6159 		"\n"
6160 		//Array with 3 elements
6161 		"%LessThan3 = OpSLessThan %bool %valInsideLoopNdx %c_i32_3\n"
6162 		"OpSelectionMerge %BlockIf None\n"
6163 		"OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
6164 		"%LabelIf = OpLabel\n"
6165 		"  %f16src3  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6166 		"  %val3_f16 = OpLoad %f16 %f16src3\n"
6167 		"  %val3_f32 = OpFConvert %f32 %val3_f16\n"
6168 		"  %f32dst3  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6169 		"  OpStore %f32dst3 %val3_f32\n"
6170 		"\n"
6171 		"  %v4f16src2  = OpAccessChain %v4f16ptr %ssboIN %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6172 		"  %val2_v4f16 = OpLoad %v4f16 %v4f16src2\n"
6173 		"  %val2_v4f32 = OpFConvert %v4f32 %val2_v4f16\n"
6174 		"  %v4f32dst2  = OpAccessChain %v4f32ptr %ssboOUT %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6175 		"  OpStore %v4f32dst2 %val2_v4f32\n"
6176 		"OpBranch %BlockIf\n"
6177 		"%BlockIf = OpLabel\n"
6178 		"\n"
6179 		"OpBranch %93\n"
6180 		"%93 = OpLabel\n"
6181 		"%132 = OpLoad %i32 %insideLoopNdx\n"
6182 		"%133 = OpIAdd %i32 %132 %c_i32_1\n"
6183 		"OpStore %insideLoopNdx %133\n"
6184 		"OpBranch %loopInside\n"
6185 		"\n"
6186 		"%92 = OpLabel\n"
6187 		"OpBranch %13\n"
6188 		"%13 = OpLabel\n"
6189 		"%134 = OpLoad %i32 %loopNdx\n"
6190 		"%135 = OpIAdd %i32 %134 %c_i32_1\n"
6191 		"OpStore %loopNdx %135\n"
6192 		"OpBranch %loop\n"
6193 
6194 		"%merge = OpLabel\n"
6195 		"         OpReturnValue %param\n"
6196 		"         OpFunctionEnd\n";
6197 
6198 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6199 		{
6200 			vector<deFloat16>	float16Data	= (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? data16bitStd430(rnd) :  data16bitStd140(rnd);
6201 			GraphicsResources	resources;
6202 			map<string, string>	specs;
6203 			VulkanFeatures		features;
6204 			string				testName	= string(CAPABILITIES[capIdx].name);
6205 
6206 			specs["cap"]					= CAPABILITIES[capIdx].cap;
6207 			specs["indecor"]				= CAPABILITIES[capIdx].decor;
6208 			specs["strideF16"]				= getStructShaderComponet((VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? SHADERTEMPLATE_STRIDE16BIT_STD430 : SHADERTEMPLATE_STRIDE16BIT_STD140);
6209 			specs["strideF32"]				= getStructShaderComponet(SHADERTEMPLATE_STRIDE32BIT_STD430);
6210 			specs["types"]					= getStructShaderComponet(SHADERTEMPLATE_TYPES);
6211 
6212 			fragments["capability"]			= capabilities.specialize(specs);
6213 			fragments["decoration"]			= decoration.specialize(specs);
6214 			fragments["pre_main"]			= preMain.specialize(specs);
6215 
6216 			resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), CAPABILITIES[capIdx].dtype));
6217 			resources.outputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6218 			resources.verifyIO = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? graphicsCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD430, SHADERTEMPLATE_STRIDE32BIT_STD430> : graphicsCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD140, SHADERTEMPLATE_STRIDE32BIT_STD430>;
6219 
6220 			features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
6221 			features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
6222 			features.coreFeatures.fragmentStoresAndAtomics			= true;
6223 
6224 			createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
6225 		}
6226 }
6227 
addGraphics16BitStorageUniformStructFloat32To16Group(tcu::TestCaseGroup * testGroup)6228 void addGraphics16BitStorageUniformStructFloat32To16Group (tcu::TestCaseGroup* testGroup)
6229 {
6230 	de::Random							rnd					(deStringHash(testGroup->getName()));
6231 	map<string, string>					fragments;
6232 	vector<string>						extensions;
6233 	RGBA								defaultColors[4];
6234 	const StringTemplate				capabilities		("OpCapability ${cap}\n");
6235 	vector<deUint16>					float16Data			(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430), 0u);
6236 
6237 	extensions.push_back("VK_KHR_16bit_storage");
6238 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
6239 
6240 	getDefaultColors(defaultColors);
6241 
6242 	const StringTemplate preMain		(
6243 		"\n"
6244 		"${types}\n"
6245 		"\n"
6246 		"%zero = OpConstant %i32 0\n"
6247 		"%c_i32_5 = OpConstant %i32 5\n"
6248 		"%c_i32_6 = OpConstant %i32 6\n"
6249 		"%c_i32_7 = OpConstant %i32 7\n"
6250 		"%c_i32_8 = OpConstant %i32 8\n"
6251 		"%c_i32_9 = OpConstant %i32 9\n"
6252 		"%c_i32_11 = OpConstant %i32 11\n"
6253 		"\n"
6254 		"%c_u32_7 = OpConstant %u32 7\n"
6255 		"%c_u32_11 = OpConstant %u32 11\n"
6256 		"\n"
6257 		"%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
6258 		"%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
6259 		"%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
6260 		"%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
6261 		"%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
6262 		"%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
6263 		"%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
6264 		"%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 %v2f16arr11 %f16 %v3f16arr11 %v4f16arr3\n"
6265 		"\n"
6266 		"%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
6267 		"%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
6268 		"%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
6269 		"%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
6270 		"%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
6271 		"%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
6272 		"%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
6273 		"%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 %v2f32arr11 %f32 %v3f32arr11 %v4f32arr3\n"
6274 		"\n"
6275 		"%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
6276 		"%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
6277 		"%SSBO_IN            = OpTypeStruct %f32StructArr7\n"
6278 		"%SSBO_OUT           = OpTypeStruct %f16StructArr7\n"
6279 		"%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
6280 		"%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
6281 		"%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
6282 		"%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
6283 		"\n");
6284 
6285 	const StringTemplate decoration		(
6286 		"${strideF16}"
6287 		"\n"
6288 		"${strideF32}"
6289 		"\n"
6290 		"OpMemberDecorate %SSBO_IN 0 Offset 0\n"
6291 		"OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
6292 		"OpDecorate %SSBO_IN ${indecor}\n"
6293 		"OpDecorate %SSBO_OUT BufferBlock\n"
6294 		"OpDecorate %ssboIN DescriptorSet 0\n"
6295 		"OpDecorate %ssboOUT DescriptorSet 0\n"
6296 		"OpDecorate %ssboIN Binding 0\n"
6297 		"OpDecorate %ssboOUT Binding 1\n"
6298 		"\n");
6299 
6300 	fragments["testfun"]			=
6301 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6302 		"%param = OpFunctionParameter %v4f32\n"
6303 		"%label     = OpLabel\n"
6304 		"%loopNdx    = OpVariable %fp_i32 Function\n"
6305 		"%insideLoopNdx = OpVariable %fp_i32 Function\n"
6306 
6307 		"OpStore %loopNdx %zero\n"
6308 		"OpBranch %loop\n"
6309 		"%loop = OpLabel\n"
6310 		"OpLoopMerge %merge %13 None\n"
6311 		"OpBranch %14\n"
6312 		"%14 = OpLabel\n"
6313 		"%valLoopNdx = OpLoad %i32 %loopNdx\n"
6314 		"%18 = OpSLessThan %bool %valLoopNdx %c_i32_7\n"
6315 		"OpBranchConditional %18 %11 %merge\n"
6316 		"%11 = OpLabel\n"
6317 		"\n"
6318 		"%f32src  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %zero\n"
6319 		"%val_f32 = OpLoad %f32 %f32src\n"
6320 		"%val_f16 = OpFConvert %f16 %val_f32\n"
6321 		"%f16dst  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %zero\n"
6322 		"OpStore %f16dst %val_f16\n"
6323 		"\n"
6324 		"%v2f32src  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_1\n"
6325 		"%val_v2f32 = OpLoad %v2f32 %v2f32src\n"
6326 		"%val_v2f16 = OpFConvert %v2f16 %val_v2f32\n"
6327 		"%v2f16dst  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_1\n"
6328 		"OpStore %v2f16dst %val_v2f16\n"
6329 		"\n"
6330 		"%v3f32src  = OpAccessChain %v3f32ptr %ssboIN %zero %valLoopNdx %c_i32_2\n"
6331 		"%val_v3f32 = OpLoad %v3f32 %v3f32src\n"
6332 		"%val_v3f16 = OpFConvert %v3f16 %val_v3f32\n"
6333 		"%v3f16dst  = OpAccessChain %v3f16ptr %ssboOUT %zero %valLoopNdx %c_i32_2\n"
6334 		"OpStore %v3f16dst %val_v3f16\n"
6335 		"\n"
6336 		"%v4f32src  = OpAccessChain %v4f32ptr %ssboIN %zero %valLoopNdx %c_i32_3\n"
6337 		"%val_v4f32 = OpLoad %v4f32 %v4f32src\n"
6338 		"%val_v4f16 = OpFConvert %v4f16 %val_v4f32\n"
6339 		"%v4f16dst  = OpAccessChain %v4f16ptr %ssboOUT %zero %valLoopNdx %c_i32_3\n"
6340 		"OpStore %v4f16dst %val_v4f16\n"
6341 		"\n"
6342 		"%f32src2  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %c_i32_7\n"
6343 		"%val2_f32 = OpLoad %f32 %f32src2\n"
6344 		"%val2_f16 = OpFConvert %f16 %val2_f32\n"
6345 		"%f16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %c_i32_7\n"
6346 		"OpStore %f16dst2 %val2_f16\n"
6347 		"\n"
6348 		"OpStore %insideLoopNdx %zero\n"
6349 		"OpBranch %loopInside\n"
6350 		"%loopInside = OpLabel\n"
6351 		"OpLoopMerge %92 %93 None\n"
6352 		"OpBranch %94\n"
6353 		"%94 = OpLabel\n"
6354 		"%valInsideLoopNdx = OpLoad %i32 %insideLoopNdx\n"
6355 		"%96 = OpSLessThan %bool %valInsideLoopNdx %c_i32_11\n"
6356 		"OpBranchConditional %96 %91 %92\n"
6357 		"\n"
6358 		"%91 = OpLabel\n"
6359 		"\n"
6360 		//struct {f16, v2f16[3]}
6361 		"%Sf32src  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6362 		"%Sval_f32 = OpLoad %f32 %Sf32src\n"
6363 		"%Sval_f16 = OpFConvert %f16 %Sval_f32\n"
6364 		"%Sf16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6365 		"OpStore %Sf16dst2 %Sval_f16\n"
6366 		"\n"
6367 		"%Sv2f32src0   = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6368 		"%Sv2f32_0     = OpLoad %v2f32 %Sv2f32src0\n"
6369 		"%Sv2f16_0     = OpFConvert %v2f16 %Sv2f32_0\n"
6370 		"%Sv2f16dst_0  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6371 		"OpStore %Sv2f16dst_0 %Sv2f16_0\n"
6372 		"\n"
6373 		"%Sv2f32src1  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_1\n"
6374 		"%Sv2f32_1 = OpLoad %v2f32 %Sv2f32src1\n"
6375 		"%Sv2f16_1 = OpFConvert %v2f16 %Sv2f32_1\n"
6376 		"%Sv2f16dst_1  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_1\n"
6377 		"OpStore %Sv2f16dst_1 %Sv2f16_1\n"
6378 		"\n"
6379 		"%Sv2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_2\n"
6380 		"%Sv2f32_2 = OpLoad %v2f32 %Sv2f32src2\n"
6381 		"%Sv2f16_2 = OpFConvert %v2f16 %Sv2f32_2\n"
6382 		"%Sv2f16dst_2  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %c_i32_2\n"
6383 		"OpStore %Sv2f16dst_2 %Sv2f16_2\n"
6384 		"\n"
6385 
6386 		"%v2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6387 		"%val2_v2f32 = OpLoad %v2f32 %v2f32src2\n"
6388 		"%val2_v2f16 = OpFConvert %v2f16 %val2_v2f32\n"
6389 		"%v2f16dst2  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6390 		"OpStore %v2f16dst2 %val2_v2f16\n"
6391 		"\n"
6392 		"%v3f32src2  = OpAccessChain %v3f32ptr %ssboIN %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6393 		"%val2_v3f32 = OpLoad %v3f32 %v3f32src2\n"
6394 		"%val2_v3f16 = OpFConvert %v3f16 %val2_v3f32\n"
6395 		"%v3f16dst2  = OpAccessChain %v3f16ptr %ssboOUT %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6396 		"OpStore %v3f16dst2 %val2_v3f16\n"
6397 		"\n"
6398 
6399 		//Array with 3 elements
6400 		"%LessThan3 = OpSLessThan %bool %valInsideLoopNdx %c_i32_3\n"
6401 		"OpSelectionMerge %BlockIf None\n"
6402 		"OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
6403 		"  %LabelIf = OpLabel\n"
6404 		"  %f32src3  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6405 		"  %val3_f32 = OpLoad %f32 %f32src3\n"
6406 		"  %val3_f16 = OpFConvert %f16 %val3_f32\n"
6407 		"  %f16dst3  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6408 		"  OpStore %f16dst3 %val3_f16\n"
6409 		"\n"
6410 		"  %v4f32src2  = OpAccessChain %v4f32ptr %ssboIN %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6411 		"  %val2_v4f32 = OpLoad %v4f32 %v4f32src2\n"
6412 		"  %val2_v4f16 = OpFConvert %v4f16 %val2_v4f32\n"
6413 		"  %v4f16dst2  = OpAccessChain %v4f16ptr %ssboOUT %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6414 		"  OpStore %v4f16dst2 %val2_v4f16\n"
6415 		"OpBranch %BlockIf\n"
6416 		"%BlockIf = OpLabel\n"
6417 
6418 		"OpBranch %93\n"
6419 		"%93 = OpLabel\n"
6420 		"%132 = OpLoad %i32 %insideLoopNdx\n"
6421 		"%133 = OpIAdd %i32 %132 %c_i32_1\n"
6422 		"OpStore %insideLoopNdx %133\n"
6423 		"OpBranch %loopInside\n"
6424 		"\n"
6425 		"%92 = OpLabel\n"
6426 		"OpBranch %13\n"
6427 		"%13 = OpLabel\n"
6428 		"%134 = OpLoad %i32 %loopNdx\n"
6429 		"%135 = OpIAdd %i32 %134 %c_i32_1\n"
6430 		"OpStore %loopNdx %135\n"
6431 		"OpBranch %loop\n"
6432 
6433 		"%merge = OpLabel\n"
6434 		"         OpReturnValue %param\n"
6435 		"         OpFunctionEnd\n";
6436 
6437 	for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6438 	{
6439 		map<string, string>	specs;
6440 		string				testName	= string(CAPABILITIES[capIdx].name);
6441 		vector<float>		float32Data	= (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? data32bitStd430(rnd) : data32bitStd140(rnd);
6442 		GraphicsResources	resources;
6443 
6444 		specs["cap"]					= "StorageUniformBufferBlock16";
6445 		specs["indecor"]				= CAPABILITIES[capIdx].decor;
6446 		specs["strideF16"]				= getStructShaderComponet(SHADERTEMPLATE_STRIDE16BIT_STD430);
6447 		specs["strideF32"]				= getStructShaderComponet((VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? SHADERTEMPLATE_STRIDE32BIT_STD430 : SHADERTEMPLATE_STRIDE32BIT_STD140);
6448 		specs["types"]					= getStructShaderComponet(SHADERTEMPLATE_TYPES);
6449 
6450 		fragments["capability"]			= capabilities.specialize(specs);
6451 		fragments["decoration"]			= decoration.specialize(specs);
6452 		fragments["pre_main"]			= preMain.specialize(specs);
6453 
6454 		resources.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), CAPABILITIES[capIdx].dtype));
6455 		resources.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6456 		resources.verifyIO				=  (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? graphicsCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD430, SHADERTEMPLATE_STRIDE16BIT_STD430> : graphicsCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD140, SHADERTEMPLATE_STRIDE16BIT_STD430>;
6457 
6458 		VulkanFeatures features;
6459 
6460 		features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
6461 		features.coreFeatures.fragmentStoresAndAtomics			= true;
6462 		features.ext16BitStorage								= EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
6463 
6464 		createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
6465 	}
6466 }
6467 
addGraphics16bitStructMixedTypesGroup(tcu::TestCaseGroup * group)6468 void addGraphics16bitStructMixedTypesGroup (tcu::TestCaseGroup* group)
6469 {
6470 	de::Random							rnd					(deStringHash(group->getName()));
6471 	map<string, string>					fragments;
6472 	vector<string>						extensions;
6473 	RGBA								defaultColors[4];
6474 	const StringTemplate				capabilities		("OpCapability StorageUniformBufferBlock16\n"
6475 															"${cap}\n");
6476 	vector<deInt16>						outData				(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430), 0u);
6477 
6478 	extensions.push_back("VK_KHR_16bit_storage");
6479 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"\n";
6480 
6481 	getDefaultColors(defaultColors);
6482 
6483 	const StringTemplate				preMain				(
6484 		"\n"//Types
6485 		"%i16    = OpTypeInt 16 1\n"
6486 		"%v2i16  = OpTypeVector %i16 2\n"
6487 		"%v3i16  = OpTypeVector %i16 3\n"
6488 		"%v4i16  = OpTypeVector %i16 4\n"
6489 		"\n"//Consta value
6490 		"%zero     = OpConstant %i32 0\n"
6491 		"%c_i32_5  = OpConstant %i32 5\n"
6492 		"%c_i32_6  = OpConstant %i32 6\n"
6493 		"%c_i32_7  = OpConstant %i32 7\n"
6494 		"%c_i32_8  = OpConstant %i32 8\n"
6495 		"%c_i32_9  = OpConstant %i32 9\n"
6496 		"%c_i32_10 = OpConstant %i32 10\n"
6497 		"%c_i32_11 = OpConstant %i32 11\n"
6498 		"%c_u32_7  = OpConstant %u32 7\n"
6499 		"%c_u32_11 = OpConstant %u32 11\n"
6500 		"\n"//Arrays & Structs
6501 		"%v2b16NestedArr11In  = OpTypeArray %v2i16 %c_u32_11\n"
6502 		"%b32NestedArr11In   = OpTypeArray %i32 %c_u32_11\n"
6503 		"%sb16Arr11In         = OpTypeArray %i16 %c_u32_11\n"
6504 		"%sb32Arr11In        = OpTypeArray %i32 %c_u32_11\n"
6505 		"%sNestedIn          = OpTypeStruct %i16 %i32 %v2b16NestedArr11In %b32NestedArr11In\n"
6506 		"%sNestedArr11In     = OpTypeArray %sNestedIn %c_u32_11\n"
6507 		"%structIn           = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11In %sb16Arr11In %sb32Arr11In\n"
6508 		"%structArr7In       = OpTypeArray %structIn %c_u32_7\n"
6509 		"%v2b16NestedArr11Out = OpTypeArray %v2i16 %c_u32_11\n"
6510 		"%b32NestedArr11Out  = OpTypeArray %i32 %c_u32_11\n"
6511 		"%sb16Arr11Out        = OpTypeArray %i16 %c_u32_11\n"
6512 		"%sb32Arr11Out       = OpTypeArray %i32 %c_u32_11\n"
6513 		"%sNestedOut         = OpTypeStruct %i16 %i32 %v2b16NestedArr11Out %b32NestedArr11Out\n"
6514 		"%sNestedArr11Out    = OpTypeArray %sNestedOut %c_u32_11\n"
6515 		"%structOut          = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11Out %sb16Arr11Out %sb32Arr11Out\n"
6516 		"%structArr7Out      = OpTypeArray %structOut %c_u32_7\n"
6517 		"\n"//Pointers
6518 		"%i16outPtr    = OpTypePointer Uniform %i16\n"
6519 		"%v2i16outPtr  = OpTypePointer Uniform %v2i16\n"
6520 		"%v3i16outPtr  = OpTypePointer Uniform %v3i16\n"
6521 		"%v4i16outPtr  = OpTypePointer Uniform %v4i16\n"
6522 		"%i32outPtr   = OpTypePointer Uniform %i32\n"
6523 		"%v2i32outPtr = OpTypePointer Uniform %v2i32\n"
6524 		"%v3i32outPtr = OpTypePointer Uniform %v3i32\n"
6525 		"%v4i32outPtr = OpTypePointer Uniform %v4i32\n"
6526 		"%uvec3ptr = OpTypePointer Input %v3u32\n"
6527 		"\n"//SSBO IN
6528 		"%SSBO_IN    = OpTypeStruct %structArr7In\n"
6529 		"%up_SSBOIN  = OpTypePointer Uniform %SSBO_IN\n"
6530 		"%ssboIN     = OpVariable %up_SSBOIN Uniform\n"
6531 		"\n"//SSBO OUT
6532 		"%SSBO_OUT   = OpTypeStruct %structArr7Out\n"
6533 		"%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
6534 		"%ssboOUT    = OpVariable %up_SSBOOUT Uniform\n");
6535 
6536 		const StringTemplate			decoration			(
6537 		"${OutOffsets}"
6538 		"${InOffsets}"
6539 		"\n"//SSBO IN
6540 		"OpMemberDecorate %SSBO_IN 0 Offset 0\n"
6541 		"OpDecorate %ssboIN DescriptorSet 0\n"
6542 		"OpDecorate %SSBO_IN ${storage}\n"
6543 		"OpDecorate %SSBO_OUT BufferBlock\n"
6544 		"OpDecorate %ssboIN Binding 0\n"
6545 		"\n"//SSBO OUT
6546 		"OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
6547 		"OpDecorate %ssboOUT DescriptorSet 0\n"
6548 		"OpDecorate %ssboOUT Binding 1\n");
6549 
6550 		const StringTemplate			testFun				(
6551 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6552 		"%param     = OpFunctionParameter %v4f32\n"
6553 		"%label     = OpLabel\n"
6554 		"%ndxArrx   = OpVariable %fp_i32  Function\n"
6555 		"%ndxArry   = OpVariable %fp_i32  Function\n"
6556 		"%ndxArrz   = OpVariable %fp_i32  Function\n"
6557 		"${xBeginLoop}"
6558 		"\n"//strutOut.b16 = strutIn.b16
6559 		"%inP1  = OpAccessChain %i16${inPtr} %ssboIN %zero %Valx %zero\n"
6560 		"%inV1  = OpLoad %i16 %inP1\n"
6561 		"%outP1 = OpAccessChain %i16outPtr %ssboOUT %zero %Valx %zero\n"
6562 		"OpStore %outP1 %inV1\n"
6563 		"\n"//strutOut.b32 = strutIn.b32
6564 		"%inP2  = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_1\n"
6565 		"%inV2  = OpLoad %i32 %inP2\n"
6566 		"%outP2 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_1\n"
6567 		"OpStore %outP2 %inV2\n"
6568 		"\n"//strutOut.v2b16 = strutIn.v2b16
6569 		"%inP3  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %Valx %c_i32_2\n"
6570 		"%inV3  = OpLoad %v2i16 %inP3\n"
6571 		"%outP3 = OpAccessChain %v2i16outPtr %ssboOUT %zero %Valx %c_i32_2\n"
6572 		"OpStore %outP3 %inV3\n"
6573 		"\n"//strutOut.v2b32 = strutIn.v2b32
6574 		"%inP4  = OpAccessChain %v2i32${inPtr} %ssboIN %zero %Valx %c_i32_3\n"
6575 		"%inV4  = OpLoad %v2i32 %inP4\n"
6576 		"%outP4 = OpAccessChain %v2i32outPtr %ssboOUT %zero %Valx %c_i32_3\n"
6577 		"OpStore %outP4 %inV4\n"
6578 		"\n"//strutOut.v3b16 = strutIn.v3b16
6579 		"%inP5  = OpAccessChain %v3i16${inPtr} %ssboIN %zero %Valx %c_i32_4\n"
6580 		"%inV5  = OpLoad %v3i16 %inP5\n"
6581 		"%outP5 = OpAccessChain %v3i16outPtr %ssboOUT %zero %Valx %c_i32_4\n"
6582 		"OpStore %outP5 %inV5\n"
6583 		"\n"//strutOut.v3b32 = strutIn.v3b32
6584 		"%inP6  = OpAccessChain %v3i32${inPtr} %ssboIN %zero %Valx %c_i32_5\n"
6585 		"%inV6  = OpLoad %v3i32 %inP6\n"
6586 		"%outP6 = OpAccessChain %v3i32outPtr %ssboOUT %zero %Valx %c_i32_5\n"
6587 		"OpStore %outP6 %inV6\n"
6588 		"\n"//strutOut.v4b16 = strutIn.v4b16
6589 		"%inP7  = OpAccessChain %v4i16${inPtr} %ssboIN %zero %Valx %c_i32_6\n"
6590 		"%inV7  = OpLoad %v4i16 %inP7\n"
6591 		"%outP7 = OpAccessChain %v4i16outPtr %ssboOUT %zero %Valx %c_i32_6\n"
6592 		"OpStore %outP7 %inV7\n"
6593 		"\n"//strutOut.v4b32 = strutIn.v4b32
6594 		"%inP8  = OpAccessChain %v4i32${inPtr} %ssboIN %zero %Valx %c_i32_7\n"
6595 		"%inV8  = OpLoad %v4i32 %inP8\n"
6596 		"%outP8 = OpAccessChain %v4i32outPtr %ssboOUT %zero %Valx %c_i32_7\n"
6597 		"OpStore %outP8 %inV8\n"
6598 		"${yBeginLoop}"
6599 		"\n"//strutOut.b16[y] = strutIn.b16[y]
6600 		"%inP9  = OpAccessChain %i16${inPtr} %ssboIN %zero %Valx %c_i32_9 %Valy\n"
6601 		"%inV9  = OpLoad %i16 %inP9\n"
6602 		"%outP9 = OpAccessChain %i16outPtr %ssboOUT %zero %Valx %c_i32_9 %Valy\n"
6603 		"OpStore %outP9 %inV9\n"
6604 		"\n"//strutOut.b32[y] = strutIn.b32[y]
6605 		"%inP10  = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_10 %Valy\n"
6606 		"%inV10  = OpLoad %i32 %inP10\n"
6607 		"%outP10 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_10 %Valy\n"
6608 		"OpStore %outP10 %inV10\n"
6609 		"\n"//strutOut.strutNestedOut[y].b16 = strutIn.strutNestedIn[y].b16
6610 		"%inP11 = OpAccessChain %i16${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %zero\n"
6611 		"%inV11 = OpLoad %i16 %inP11\n"
6612 		"%outP11 = OpAccessChain %i16outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %zero\n"
6613 		"OpStore %outP11 %inV11\n"
6614 		"\n"//strutOut.strutNestedOut[y].b32 = strutIn.strutNestedIn[y].b32
6615 		"%inP12 = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %c_i32_1\n"
6616 		"%inV12 = OpLoad %i32 %inP12\n"
6617 		"%outP12 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %c_i32_1\n"
6618 		"OpStore %outP12 %inV12\n"
6619 		"${zBeginLoop}"
6620 		"\n"//strutOut.strutNestedOut[y].v2b16[valNdx] = strutIn.strutNestedIn[y].v2b16[valNdx]
6621 		"%inP13  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %c_i32_2 %Valz\n"
6622 		"%inV13  = OpLoad %v2i16 %inP13\n"
6623 		"%outP13 = OpAccessChain %v2i16outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %c_i32_2 %Valz\n"
6624 		"OpStore %outP13 %inV13\n"
6625 		"\n"//strutOut.strutNestedOut[y].b32[valNdx] = strutIn.strutNestedIn[y].b32[valNdx]
6626 		"%inP14  = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %c_i32_3 %Valz\n"
6627 		"%inV14  = OpLoad %i32 %inP14\n"
6628 		"%outP14 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %c_i32_3 %Valz\n"
6629 		"OpStore %outP14 %inV14\n"
6630 		"${zEndLoop}"
6631 		"${yEndLoop}"
6632 		"${xEndLoop}"
6633 		"\n"
6634 		"OpBranch %ExitLabel\n"
6635 		"%ExitLabel = OpLabel\n"
6636 		"OpReturnValue %param\n"
6637 		"OpFunctionEnd\n");
6638 
6639 	for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6640 	{  // int
6641 		const bool				isUniform	= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER == CAPABILITIES[capIdx].dtype;
6642 		vector<deInt16>			inData		= isUniform ? dataMixStd140(rnd) : dataMixStd430(rnd);
6643 		GraphicsResources		resources;
6644 		map<string, string>		specsLoop;
6645 		map<string, string>		specsOffset;
6646 		map<string, string>		specs;
6647 		VulkanFeatures			features;
6648 		string					testName	= string(CAPABILITIES[capIdx].name);
6649 
6650 		specsLoop["exeCount"]	= "c_i32_7";
6651 		specsLoop["loopName"]	= "x";
6652 		specs["xBeginLoop"]		= beginLoop(specsLoop);
6653 		specs["xEndLoop"]		= endLoop(specsLoop);
6654 
6655 		specsLoop["exeCount"]	= "c_i32_11";
6656 		specsLoop["loopName"]	= "y";
6657 		specs["yBeginLoop"]		= beginLoop(specsLoop);
6658 		specs["yEndLoop"]		= endLoop(specsLoop);
6659 
6660 		specsLoop["exeCount"]	= "c_i32_11";
6661 		specsLoop["loopName"]	= "z";
6662 		specs["zBeginLoop"]		= beginLoop(specsLoop);
6663 		specs["zEndLoop"]		= endLoop(specsLoop);
6664 
6665 		specs["storage"]		= isUniform ? "Block" : "BufferBlock";
6666 		specs["cap"]			= isUniform ?"OpCapability " + string( CAPABILITIES[capIdx].cap) : "";
6667 		specs["inPtr"]			= "outPtr";
6668 		specsOffset["InOut"]	= "In";
6669 		specs["InOffsets"]		= StringTemplate(isUniform ? getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD140) : getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430)).specialize(specsOffset);
6670 		specsOffset["InOut"]	= "Out";
6671 		specs["OutOffsets"]		= StringTemplate(getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430)).specialize(specsOffset);
6672 
6673 		fragments["capability"]			= capabilities.specialize(specs);
6674 		fragments["decoration"]			= decoration.specialize(specs);
6675 		fragments["pre_main"]			= preMain.specialize(specs);
6676 		fragments["testfun"]			= testFun.specialize(specs);
6677 
6678 		resources.verifyIO				= isUniform ? graphicsCheckStruct<deInt16, deInt16, SHADERTEMPLATE_STRIDEMIX_STD140, SHADERTEMPLATE_STRIDEMIX_STD430> : graphicsCheckStruct<deInt16, deInt16, SHADERTEMPLATE_STRIDEMIX_STD430, SHADERTEMPLATE_STRIDEMIX_STD430>;
6679 		resources.inputs.push_back(Resource(BufferSp(new Int16Buffer(inData)), CAPABILITIES[capIdx].dtype));
6680 		resources.outputs.push_back(Resource(BufferSp(new Int16Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6681 
6682 		features												= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
6683 		features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
6684 		features.coreFeatures.fragmentStoresAndAtomics			= true;
6685 
6686 		createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, group, features);
6687 	}
6688 }
6689 
addGraphics16BitStorageInputOutputFloat16To64Group(tcu::TestCaseGroup * testGroup)6690 void addGraphics16BitStorageInputOutputFloat16To64Group (tcu::TestCaseGroup* testGroup)
6691 {
6692 	de::Random				rnd					(deStringHash(testGroup->getName()));
6693 	RGBA					defaultColors[4];
6694 	vector<string>			extensions;
6695 	map<string, string>		fragments			= passthruFragments();
6696 	const deUint32			numDataPoints		= 64;
6697 	vector<deFloat16>		float16Data			(getFloat16s(rnd, numDataPoints));
6698 	vector<double>			float64Data;
6699 
6700 	float64Data.reserve(numDataPoints);
6701 	for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
6702 		float64Data.push_back(deFloat16To64(float16Data[numIdx]));
6703 
6704 	extensions.push_back("VK_KHR_16bit_storage");
6705 
6706 	fragments["capability"]				=
6707 		"OpCapability StorageInputOutput16\n"
6708 		"OpCapability Float64\n";
6709 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"\n";
6710 
6711 	getDefaultColors(defaultColors);
6712 
6713 	struct Case
6714 	{
6715 		const char*	name;
6716 		const char*	interfaceOpCall;
6717 		const char*	interfaceOpFunc;
6718 		const char*	preMain;
6719 		const char*	inputType;
6720 		const char*	outputType;
6721 		deUint32	numPerCase;
6722 		deUint32	numElements;
6723 	};
6724 
6725 	Case	cases[]		=
6726 	{
6727 		{ // Scalar cases
6728 			"scalar",
6729 
6730 			"OpFConvert %f64",
6731 			"",
6732 
6733 			"             %f16 = OpTypeFloat 16\n"
6734 			"             %f64 = OpTypeFloat 64\n"
6735 			"		        %v4f64 = OpTypeVector %f64 4\n"
6736 			"          %ip_f16 = OpTypePointer Input %f16\n"
6737 			"           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
6738 			"        %ip_a3f16 = OpTypePointer Input %a3f16\n"
6739 			"%f64_f16_function = OpTypeFunction %f64 %f16\n"
6740 			"           %a3f64 = OpTypeArray %f64 %c_i32_3\n"
6741 			"            %op_f64 = OpTypePointer Output %f64\n"
6742 			"        %op_a3f64 = OpTypePointer Output %a3f64\n",
6743 
6744 			"f16",
6745 			"f64",
6746 			4,
6747 			1,
6748 		},
6749 		{ // Vector cases
6750 			"vector",
6751 
6752 			"OpFConvert %v2f64",
6753 			"",
6754 
6755 			"                 %f16 = OpTypeFloat 16\n"
6756 			"		        %v2f16 = OpTypeVector %f16 2\n"
6757 			"                 %f64 = OpTypeFloat 64\n"
6758 			"		        %v2f64 = OpTypeVector %f64 2\n"
6759 			"		        %v4f64 = OpTypeVector %f64 4\n"
6760 			"            %ip_v2f16 = OpTypePointer Input %v2f16\n"
6761 			"             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
6762 			"          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
6763 			"%v2f64_v2f16_function = OpTypeFunction %v2f64 %v2f16\n"
6764 			"             %a3v2f64 = OpTypeArray %v2f64 %c_i32_3\n"
6765 			"            %op_f64 = OpTypePointer Output %f64\n"
6766 			"            %op_v2f64 = OpTypePointer Output %v2f64\n"
6767 			"            %op_v4f64 = OpTypePointer Output %v4f64\n"
6768 			"          %op_a3v2f64 = OpTypePointer Output %a3v2f64\n",
6769 
6770 			"v2f16",
6771 			"v2f64",
6772 			2 * 4,
6773 			2,
6774 		}
6775 	};
6776 
6777 	VulkanFeatures	requiredFeatures;
6778 
6779 	requiredFeatures.coreFeatures.shaderFloat64	= DE_TRUE;
6780 	requiredFeatures.ext16BitStorage			= EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
6781 
6782 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
6783 	{
6784 		fragments["interface_op_call"]	= cases[caseIdx].interfaceOpCall;
6785 		fragments["interface_op_func"]	= cases[caseIdx].interfaceOpFunc;
6786 		fragments["pre_main"]			= cases[caseIdx].preMain;
6787 
6788 		fragments["input_type"]			= cases[caseIdx].inputType;
6789 		fragments["output_type"]		= cases[caseIdx].outputType;
6790 
6791 		GraphicsInterfaces	interfaces;
6792 		const deUint32		numPerCase	= cases[caseIdx].numPerCase;
6793 		vector<deFloat16>	subInputs	(numPerCase);
6794 		vector<double>		subOutputs	(numPerCase);
6795 
6796 		for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
6797 		{
6798 			string			testName	= string(cases[caseIdx].name) + numberToString(caseNdx);
6799 
6800 			for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
6801 			{
6802 				subInputs[numNdx]	= float16Data[caseNdx * numPerCase + numNdx];
6803 				subOutputs[numNdx]	= float64Data[caseNdx * numPerCase + numNdx];
6804 			}
6805 			interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subInputs))),
6806 									  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT64), BufferSp(new Float64Buffer(subOutputs))));
6807 			createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
6808 		}
6809 	}
6810 }
6811 
addGraphics16BitStorageUniformFloat16To64Group(tcu::TestCaseGroup * testGroup)6812 void addGraphics16BitStorageUniformFloat16To64Group (tcu::TestCaseGroup* testGroup)
6813 {
6814 	de::Random							rnd					(deStringHash(testGroup->getName()));
6815 	map<string, string>					fragments;
6816 	vector<string>						extensions;
6817 	const deUint32						numDataPoints		= 256;
6818 	RGBA								defaultColors[4];
6819 	const StringTemplate				capabilities		("OpCapability ${cap}\n"
6820 															 "OpCapability Float64\n");
6821 	vector<deFloat16>					float16Data			= getFloat16s(rnd, numDataPoints);
6822 
6823 	struct ConstantIndex
6824 	{
6825 		bool		useConstantIndex;
6826 		deUint32	constantIndex;
6827 	};
6828 
6829 	ConstantIndex	constantIndices[] =
6830 	{
6831 		{ false,	0 },
6832 		{ true,		4 },
6833 		{ true,		5 },
6834 		{ true,		6 }
6835 	};
6836 
6837 	extensions.push_back("VK_KHR_16bit_storage");
6838 
6839 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
6840 
6841 	getDefaultColors(defaultColors);
6842 
6843 	{ // scalar cases
6844 		const StringTemplate preMain		(
6845 			"      %f16 = OpTypeFloat 16\n"
6846 			"      %f64 = OpTypeFloat 64\n"
6847 			"%c_i32_256 = OpConstant %i32 256\n"
6848 			" %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
6849 			"   %up_f64 = OpTypePointer Uniform %f64\n"
6850 			"   %up_f16 = OpTypePointer Uniform %f16\n"
6851 			"   %ra_f64 = OpTypeArray %f64 %c_i32_256\n"
6852 			"   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
6853 			"   %SSBO64 = OpTypeStruct %ra_f64\n"
6854 			"   %SSBO16 = OpTypeStruct %ra_f16\n"
6855 			"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
6856 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
6857 			"   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
6858 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
6859 
6860 		const StringTemplate decoration		(
6861 			"OpDecorate %ra_f64 ArrayStride 8\n"
6862 			"OpDecorate %ra_f16 ArrayStride ${stride16}\n"
6863 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
6864 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
6865 			"OpDecorate %SSBO64 BufferBlock\n"
6866 			"OpDecorate %SSBO16 ${indecor}\n"
6867 			"OpDecorate %ssbo64 DescriptorSet 0\n"
6868 			"OpDecorate %ssbo16 DescriptorSet 0\n"
6869 			"OpDecorate %ssbo64 Binding 1\n"
6870 			"OpDecorate %ssbo16 Binding 0\n");
6871 
6872 		// ssbo64[] <- convert ssbo16[] to 64bit float
6873 		const StringTemplate testFun		(
6874 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6875 			"    %param = OpFunctionParameter %v4f32\n"
6876 
6877 			"%entry = OpLabel\n"
6878 			"    %i = OpVariable %fp_i32 Function\n"
6879 			"         OpStore %i %c_i32_0\n"
6880 			"         OpBranch %loop\n"
6881 
6882 			" %loop = OpLabel\n"
6883 			"   %15 = OpLoad %i32 %i\n"
6884 			"   %lt = OpSLessThan %bool %15 %c_i32_256\n"
6885 			"         OpLoopMerge %merge %inc None\n"
6886 			"         OpBranchConditional %lt %write %merge\n"
6887 
6888 			"%write = OpLabel\n"
6889 			"   %30 = OpLoad %i32 %i\n"
6890 			"  %src = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
6891 			"%val16 = OpLoad %f16 %src\n"
6892 			"%val64 = OpFConvert %f64 %val16\n"
6893 			"  %dst = OpAccessChain %up_f64 %ssbo64 %c_i32_0 %30\n"
6894 			"         OpStore %dst %val64\n"
6895 			"         OpBranch %inc\n"
6896 
6897 			"  %inc = OpLabel\n"
6898 			"   %37 = OpLoad %i32 %i\n"
6899 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
6900 			"         OpStore %i %39\n"
6901 			"         OpBranch %loop\n"
6902 
6903 			"%merge = OpLabel\n"
6904 			"         OpReturnValue %param\n"
6905 
6906 			"OpFunctionEnd\n");
6907 
6908 		for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
6909 		{
6910 			for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6911 			{
6912 				GraphicsResources	resources;
6913 				map<string, string>	specs;
6914 				string				testName	= string(CAPABILITIES[capIdx].name) + "_scalar_float";
6915 				bool				useConstIdx	= constantIndices[constIndexIdx].useConstantIndex;
6916 				deUint32			constIdx	= constantIndices[constIndexIdx].constantIndex;
6917 				const bool			isUBO		= CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
6918 
6919 				specs["cap"]					= CAPABILITIES[capIdx].cap;
6920 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
6921 				specs["constarrayidx"]			= de::toString(constIdx);
6922 				specs["stride16"]				= isUBO ? "16" : "2";
6923 
6924 				if (useConstIdx)
6925 					specs["arrayindex"] = "c_i32_ci";
6926 				else
6927 					specs["arrayindex"] = "30";
6928 
6929 				fragments["capability"]			= capabilities.specialize(specs);
6930 				fragments["decoration"]			= decoration.specialize(specs);
6931 				fragments["pre_main"]			= preMain.specialize(specs);
6932 				fragments["testfun"]			= testFun.specialize(specs);
6933 
6934 				vector<double>		float64Data;
6935 				float64Data.reserve(numDataPoints);
6936 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
6937 					float64Data.push_back(deFloat16To64(float16Data[useConstIdx ? constIdx : numIdx]));
6938 
6939 				resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data, isUBO ? 14 : 0)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6940 				resources.outputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6941 				resources.verifyIO = check64BitFloats;
6942 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
6943 
6944 				if (useConstIdx)
6945 					testName += string("_const_idx_") + de::toString(constIdx);
6946 
6947 				VulkanFeatures features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
6948 
6949 				features.coreFeatures.shaderFloat64	= DE_TRUE;
6950 
6951 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
6952 			}
6953 		}
6954 	}
6955 
6956 	{ // vector cases
6957 		const StringTemplate preMain		(
6958 			"      %f16 = OpTypeFloat 16\n"
6959 			"      %f64 = OpTypeFloat 64\n"
6960 			"%c_i32_128 = OpConstant %i32 128\n"
6961 			"%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
6962 			"	 %v2f16 = OpTypeVector %f16 2\n"
6963 			"	 %v2f64 = OpTypeVector %f64 2\n"
6964 			" %up_v2f64 = OpTypePointer Uniform %v2f64\n"
6965 			" %up_v2f16 = OpTypePointer Uniform %v2f16\n"
6966 			" %ra_v2f64 = OpTypeArray %v2f64 %c_i32_128\n"
6967 			" %ra_v2f16 = OpTypeArray %v2f16 %c_i32_128\n"
6968 			"   %SSBO64 = OpTypeStruct %ra_v2f64\n"
6969 			"   %SSBO16 = OpTypeStruct %ra_v2f16\n"
6970 			"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
6971 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
6972 			"   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
6973 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
6974 
6975 		const StringTemplate decoration		(
6976 			"OpDecorate %ra_v2f64 ArrayStride 16\n"
6977 			"OpDecorate %ra_v2f16 ArrayStride ${stride16}\n"
6978 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
6979 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
6980 			"OpDecorate %SSBO64 BufferBlock\n"
6981 			"OpDecorate %SSBO16 ${indecor}\n"
6982 			"OpDecorate %ssbo64 DescriptorSet 0\n"
6983 			"OpDecorate %ssbo16 DescriptorSet 0\n"
6984 			"OpDecorate %ssbo64 Binding 1\n"
6985 			"OpDecorate %ssbo16 Binding 0\n");
6986 
6987 		// ssbo64[] <- convert ssbo16[] to 64bit float
6988 		const StringTemplate testFun		(
6989 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6990 			"    %param = OpFunctionParameter %v4f32\n"
6991 
6992 			"%entry = OpLabel\n"
6993 			"    %i = OpVariable %fp_i32 Function\n"
6994 			"         OpStore %i %c_i32_0\n"
6995 			"         OpBranch %loop\n"
6996 
6997 			" %loop = OpLabel\n"
6998 			"   %15 = OpLoad %i32 %i\n"
6999 			"   %lt = OpSLessThan %bool %15 %c_i32_128\n"
7000 			"         OpLoopMerge %merge %inc None\n"
7001 			"         OpBranchConditional %lt %write %merge\n"
7002 
7003 			"%write = OpLabel\n"
7004 			"   %30 = OpLoad %i32 %i\n"
7005 			"  %src = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
7006 			"%val16 = OpLoad %v2f16 %src\n"
7007 			"%val64 = OpFConvert %v2f64 %val16\n"
7008 			"  %dst = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30\n"
7009 			"         OpStore %dst %val64\n"
7010 			"         OpBranch %inc\n"
7011 
7012 			"  %inc = OpLabel\n"
7013 			"   %37 = OpLoad %i32 %i\n"
7014 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7015 			"         OpStore %i %39\n"
7016 			"         OpBranch %loop\n"
7017 
7018 			"%merge = OpLabel\n"
7019 			"         OpReturnValue %param\n"
7020 
7021 			"OpFunctionEnd\n");
7022 
7023 		for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
7024 		{
7025 			for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7026 			{
7027 				GraphicsResources	resources;
7028 				map<string, string>	specs;
7029 				string				testName	= string(CAPABILITIES[capIdx].name) + "_vector_float";
7030 				bool				useConstIdx	= constantIndices[constIndexIdx].useConstantIndex;
7031 				deUint32			constIdx	= constantIndices[constIndexIdx].constantIndex;
7032 				const bool			isUBO		= CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
7033 
7034 				specs["cap"]					= CAPABILITIES[capIdx].cap;
7035 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
7036 				specs["constarrayidx"]			= de::toString(constIdx);
7037 				specs["stride16"]				= isUBO ? "16" : "4";
7038 
7039 				if (useConstIdx)
7040 					specs["arrayindex"] = "c_i32_ci";
7041 				else
7042 					specs["arrayindex"] = "30";
7043 
7044 				fragments["capability"]			= capabilities.specialize(specs);
7045 				fragments["decoration"]			= decoration.specialize(specs);
7046 				fragments["pre_main"]			= preMain.specialize(specs);
7047 				fragments["testfun"]			= testFun.specialize(specs);
7048 
7049 				vector<double>		float64Data;
7050 				float64Data.reserve(numDataPoints);
7051 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
7052 					float64Data.push_back(deFloat16To64(float16Data[constantIndices[constIndexIdx].useConstantIndex ? (constantIndices[constIndexIdx].constantIndex * 2 + numIdx % 2) : numIdx]));
7053 
7054 				vector<tcu::Vector<deFloat16, 2> >	float16Vec2Data(float16Data.size() / 2);
7055 				for (size_t elemIdx = 0; elemIdx < float16Data.size(); elemIdx++)
7056 				{
7057 					float16Vec2Data[elemIdx / 2][elemIdx % 2] = float16Data[elemIdx];
7058 				}
7059 				typedef Buffer<tcu::Vector<deFloat16, 2> > Float16Vec2Buffer;
7060 				resources.inputs.push_back(Resource(BufferSp(new Float16Vec2Buffer(float16Vec2Data, isUBO ? 12 : 0)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7061 				resources.outputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7062 				resources.verifyIO = check64BitFloats;
7063 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
7064 
7065 				if (constantIndices[constIndexIdx].useConstantIndex)
7066 					testName += string("_const_idx_") + de::toString(constantIndices[constIndexIdx].constantIndex);
7067 
7068 				VulkanFeatures features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7069 
7070 				features.coreFeatures.shaderFloat64	= DE_TRUE;
7071 
7072 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
7073 			}
7074 		}
7075 	}
7076 
7077 	{ // matrix cases
7078 		fragments["pre_main"]				=
7079 			" %c_i32_32 = OpConstant %i32 32\n"
7080 			"      %f16 = OpTypeFloat 16\n"
7081 			"      %f64 = OpTypeFloat 64\n"
7082 			"    %v2f16 = OpTypeVector %f16 2\n"
7083 			"    %v2f64 = OpTypeVector %f64 2\n"
7084 			"  %m4x2f64 = OpTypeMatrix %v2f64 4\n"
7085 			"  %m4x2f16 = OpTypeMatrix %v2f16 4\n"
7086 			" %up_v2f64 = OpTypePointer Uniform %v2f64\n"
7087 			" %up_v2f16 = OpTypePointer Uniform %v2f16\n"
7088 			"%a8m4x2f64 = OpTypeArray %m4x2f64 %c_i32_32\n"
7089 			"%a8m4x2f16 = OpTypeArray %m4x2f16 %c_i32_32\n"
7090 			"   %SSBO64 = OpTypeStruct %a8m4x2f64\n"
7091 			"   %SSBO16 = OpTypeStruct %a8m4x2f16\n"
7092 			"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7093 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7094 			"   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7095 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7096 
7097 		const StringTemplate decoration		(
7098 			"OpDecorate %a8m4x2f64 ArrayStride 64\n"
7099 			"OpDecorate %a8m4x2f16 ArrayStride 16\n"
7100 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
7101 			"OpMemberDecorate %SSBO64 0 ColMajor\n"
7102 			"OpMemberDecorate %SSBO64 0 MatrixStride 16\n"
7103 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
7104 			"OpMemberDecorate %SSBO16 0 ColMajor\n"
7105 			"OpMemberDecorate %SSBO16 0 MatrixStride 4\n"
7106 			"OpDecorate %SSBO64 BufferBlock\n"
7107 			"OpDecorate %SSBO16 ${indecor}\n"
7108 			"OpDecorate %ssbo64 DescriptorSet 0\n"
7109 			"OpDecorate %ssbo16 DescriptorSet 0\n"
7110 			"OpDecorate %ssbo64 Binding 1\n"
7111 			"OpDecorate %ssbo16 Binding 0\n");
7112 
7113 		fragments["testfun"]				=
7114 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7115 			"    %param = OpFunctionParameter %v4f32\n"
7116 
7117 			"%entry = OpLabel\n"
7118 			"    %i = OpVariable %fp_i32 Function\n"
7119 			"         OpStore %i %c_i32_0\n"
7120 			"         OpBranch %loop\n"
7121 
7122 			" %loop = OpLabel\n"
7123 			"   %15 = OpLoad %i32 %i\n"
7124 			"   %lt = OpSLessThan %bool %15 %c_i32_32\n"
7125 			"         OpLoopMerge %merge %inc None\n"
7126 			"         OpBranchConditional %lt %write %merge\n"
7127 
7128 			"  %write = OpLabel\n"
7129 			"     %30 = OpLoad %i32 %i\n"
7130 			"  %src_0 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
7131 			"  %src_1 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
7132 			"  %src_2 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
7133 			"  %src_3 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
7134 			"%val16_0 = OpLoad %v2f16 %src_0\n"
7135 			"%val16_1 = OpLoad %v2f16 %src_1\n"
7136 			"%val16_2 = OpLoad %v2f16 %src_2\n"
7137 			"%val16_3 = OpLoad %v2f16 %src_3\n"
7138 			"%val64_0 = OpFConvert %v2f64 %val16_0\n"
7139 			"%val64_1 = OpFConvert %v2f64 %val16_1\n"
7140 			"%val64_2 = OpFConvert %v2f64 %val16_2\n"
7141 			"%val64_3 = OpFConvert %v2f64 %val16_3\n"
7142 			"  %dst_0 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_0\n"
7143 			"  %dst_1 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_1\n"
7144 			"  %dst_2 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_2\n"
7145 			"  %dst_3 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_3\n"
7146 			"           OpStore %dst_0 %val64_0\n"
7147 			"           OpStore %dst_1 %val64_1\n"
7148 			"           OpStore %dst_2 %val64_2\n"
7149 			"           OpStore %dst_3 %val64_3\n"
7150 			"           OpBranch %inc\n"
7151 
7152 			"  %inc = OpLabel\n"
7153 			"   %37 = OpLoad %i32 %i\n"
7154 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7155 			"         OpStore %i %39\n"
7156 			"         OpBranch %loop\n"
7157 
7158 			"%merge = OpLabel\n"
7159 			"         OpReturnValue %param\n"
7160 
7161 			"OpFunctionEnd\n";
7162 
7163 			for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7164 			{
7165 				GraphicsResources	resources;
7166 				map<string, string>	specs;
7167 				string				testName	= string(CAPABILITIES[capIdx].name) + "_matrix_float";
7168 
7169 				specs["cap"]					= CAPABILITIES[capIdx].cap;
7170 				specs["indecor"]				= CAPABILITIES[capIdx].decor;
7171 
7172 				fragments["capability"]			= capabilities.specialize(specs);
7173 				fragments["decoration"]			= decoration.specialize(specs);
7174 
7175 				vector<double>		float64Data;
7176 				float64Data.reserve(numDataPoints);
7177 				for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
7178 					float64Data.push_back(deFloat16To64(float16Data[numIdx]));
7179 
7180 				resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7181 				resources.outputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7182 				resources.verifyIO = check64BitFloats;
7183 				resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
7184 
7185 				VulkanFeatures features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7186 
7187 				features.coreFeatures.shaderFloat64	= DE_TRUE;
7188 
7189 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
7190 		}
7191 	}
7192 }
7193 
addGraphics16BitStoragePushConstantFloat16To64Group(tcu::TestCaseGroup * testGroup)7194 void addGraphics16BitStoragePushConstantFloat16To64Group (tcu::TestCaseGroup* testGroup)
7195 {
7196 	de::Random							rnd					(deStringHash(testGroup->getName()));
7197 	map<string, string>					fragments;
7198 	RGBA								defaultColors[4];
7199 	vector<string>						extensions;
7200 	GraphicsResources					resources;
7201 	PushConstants						pcs;
7202 	const deUint32						numDataPoints		= 64;
7203 	vector<deFloat16>					float16Data			(getFloat16s(rnd, numDataPoints));
7204 	vector<double>						float64Data;
7205 	VulkanFeatures						requiredFeatures;
7206 
7207 	float64Data.reserve(numDataPoints);
7208 	for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
7209 		float64Data.push_back(deFloat16To64(float16Data[numIdx]));
7210 
7211 	extensions.push_back("VK_KHR_16bit_storage");
7212 
7213 	requiredFeatures.coreFeatures.shaderFloat64	= DE_TRUE;
7214 	requiredFeatures.ext16BitStorage			= EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
7215 
7216 	fragments["capability"]						=
7217 		"OpCapability StoragePushConstant16\n"
7218 		"OpCapability Float64\n";
7219 
7220 	fragments["extension"]						= "OpExtension \"SPV_KHR_16bit_storage\"";
7221 
7222 	pcs.setPushConstant(BufferSp(new Float16Buffer(float16Data)));
7223 	resources.outputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7224 	resources.verifyIO = check64BitFloats;
7225 
7226 	getDefaultColors(defaultColors);
7227 
7228 	const StringTemplate	testFun		(
7229 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7230 		"    %param = OpFunctionParameter %v4f32\n"
7231 
7232 		"%entry = OpLabel\n"
7233 		"    %i = OpVariable %fp_i32 Function\n"
7234 		"         OpStore %i %c_i32_0\n"
7235 		"         OpBranch %loop\n"
7236 
7237 		" %loop = OpLabel\n"
7238 		"   %15 = OpLoad %i32 %i\n"
7239 		"   %lt = OpSLessThan %bool %15 ${count}\n"
7240 		"         OpLoopMerge %merge %inc None\n"
7241 		"         OpBranchConditional %lt %write %merge\n"
7242 
7243 		"%write = OpLabel\n"
7244 		"   %30 = OpLoad %i32 %i\n"
7245 		"  %src = OpAccessChain ${pp_type16} %pc16 %c_i32_0 %30 ${index0:opt}\n"
7246 		"%val16 = OpLoad ${f_type16} %src\n"
7247 		"%val64 = OpFConvert ${f_type64} %val16\n"
7248 		"  %dst = OpAccessChain ${up_type64} %ssbo64 %c_i32_0 %30 ${index0:opt}\n"
7249 		"         OpStore %dst %val64\n"
7250 
7251 		"${store:opt}\n"
7252 
7253 		"         OpBranch %inc\n"
7254 
7255 		"  %inc = OpLabel\n"
7256 		"   %37 = OpLoad %i32 %i\n"
7257 		"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7258 		"         OpStore %i %39\n"
7259 		"         OpBranch %loop\n"
7260 
7261 		"%merge = OpLabel\n"
7262 		"         OpReturnValue %param\n"
7263 
7264 		"OpFunctionEnd\n");
7265 
7266 	{  // Scalar cases
7267 		fragments["pre_main"]				=
7268 			"           %f16 = OpTypeFloat 16\n"
7269 			"           %f64 = OpTypeFloat 64\n"
7270 			"      %c_i32_64 = OpConstant %i32 64\n"					// Should be the same as numDataPoints
7271 			"         %v4f64 = OpTypeVector %f64 4\n"
7272 			"        %a64f16 = OpTypeArray %f16 %c_i32_64\n"
7273 			"        %a64f64 = OpTypeArray %f64 %c_i32_64\n"
7274 			"        %pp_f16 = OpTypePointer PushConstant %f16\n"
7275 			"        %up_f64 = OpTypePointer Uniform %f64\n"
7276 			"        %SSBO64 = OpTypeStruct %a64f64\n"
7277 			"     %up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7278 			"        %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7279 			"          %PC16 = OpTypeStruct %a64f16\n"
7280 			"       %pp_PC16 = OpTypePointer PushConstant %PC16\n"
7281 			"          %pc16 = OpVariable %pp_PC16 PushConstant\n";
7282 
7283 		fragments["decoration"]				=
7284 			"OpDecorate %a64f16 ArrayStride 2\n"
7285 			"OpDecorate %a64f64 ArrayStride 8\n"
7286 			"OpDecorate %SSBO64 BufferBlock\n"
7287 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
7288 			"OpDecorate %PC16 Block\n"
7289 			"OpMemberDecorate %PC16 0 Offset 0\n"
7290 			"OpDecorate %ssbo64 DescriptorSet 0\n"
7291 			"OpDecorate %ssbo64 Binding 0\n";
7292 
7293 		map<string, string>		specs;
7294 
7295 		specs["count"]			= "%c_i32_64";
7296 		specs["pp_type16"]		= "%pp_f16";
7297 		specs["f_type16"]		= "%f16";
7298 		specs["f_type64"]		= "%f64";
7299 		specs["up_type64"]		= "%up_f64";
7300 
7301 		fragments["testfun"]	= testFun.specialize(specs);
7302 
7303 		createTestsForAllStages("scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
7304 	}
7305 
7306 	{  // Vector cases
7307 		fragments["pre_main"]				=
7308 			"      %f16 = OpTypeFloat 16\n"
7309 			"      %f64 = OpTypeFloat 64\n"
7310 			"    %v4f16 = OpTypeVector %f16 4\n"
7311 			"    %v4f64 = OpTypeVector %f64 4\n"
7312 			"    %v2f64 = OpTypeVector %f64 2\n"
7313 			" %c_i32_16 = OpConstant %i32 16\n"
7314 			" %a16v4f16 = OpTypeArray %v4f16 %c_i32_16\n"
7315 			" %a16v4f64 = OpTypeArray %v4f64 %c_i32_16\n"
7316 			" %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
7317 			" %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7318 			"   %SSBO64 = OpTypeStruct %a16v4f64\n"
7319 			"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7320 			"   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7321 			"     %PC16 = OpTypeStruct %a16v4f16\n"
7322 			"  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
7323 			"     %pc16 = OpVariable %pp_PC16 PushConstant\n";
7324 
7325 		fragments["decoration"]				=
7326 			"OpDecorate %a16v4f16 ArrayStride 8\n"
7327 			"OpDecorate %a16v4f64 ArrayStride 32\n"
7328 			"OpDecorate %SSBO64 BufferBlock\n"
7329 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
7330 			"OpDecorate %PC16 Block\n"
7331 			"OpMemberDecorate %PC16 0 Offset 0\n"
7332 			"OpDecorate %ssbo64 DescriptorSet 0\n"
7333 			"OpDecorate %ssbo64 Binding 0\n";
7334 
7335 		map<string, string>		specs;
7336 
7337 		specs["count"]			= "%c_i32_16";
7338 		specs["pp_type16"]		= "%pp_v4f16";
7339 		specs["f_type16"]		= "%v4f16";
7340 		specs["f_type64"]		= "%v4f64";
7341 		specs["up_type64"]		= "%up_v4f64";
7342 
7343 		fragments["testfun"]	= testFun.specialize(specs);
7344 
7345 		createTestsForAllStages("vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
7346 	}
7347 
7348 	{  // Matrix cases
7349 		fragments["pre_main"]				=
7350 			"  %c_i32_8 = OpConstant %i32 8\n"
7351 			"      %f16 = OpTypeFloat 16\n"
7352 			"    %v4f16 = OpTypeVector %f16 4\n"
7353 			"      %f64 = OpTypeFloat 64\n"
7354 			"    %v4f64 = OpTypeVector %f64 4\n"
7355 			"  %m2v4f16 = OpTypeMatrix %v4f16 2\n"
7356 			"  %m2v4f64 = OpTypeMatrix %v4f64 2\n"
7357 			"%a8m2v4f16 = OpTypeArray %m2v4f16 %c_i32_8\n"
7358 			"%a8m2v4f64 = OpTypeArray %m2v4f64 %c_i32_8\n"
7359 			" %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
7360 			" %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7361 			"   %SSBO64 = OpTypeStruct %a8m2v4f64\n"
7362 			"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7363 			"   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7364 			"     %PC16 = OpTypeStruct %a8m2v4f16\n"
7365 			"  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
7366 			"     %pc16 = OpVariable %pp_PC16 PushConstant\n";
7367 
7368 		fragments["decoration"]				=
7369 			"OpDecorate %a8m2v4f16 ArrayStride 16\n"
7370 			"OpDecorate %a8m2v4f64 ArrayStride 64\n"
7371 			"OpDecorate %SSBO64 BufferBlock\n"
7372 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
7373 			"OpMemberDecorate %SSBO64 0 ColMajor\n"
7374 			"OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
7375 			"OpDecorate %PC16 Block\n"
7376 			"OpMemberDecorate %PC16 0 Offset 0\n"
7377 			"OpMemberDecorate %PC16 0 ColMajor\n"
7378 			"OpMemberDecorate %PC16 0 MatrixStride 8\n"
7379 			"OpDecorate %ssbo64 DescriptorSet 0\n"
7380 			"OpDecorate %ssbo64 Binding 0\n";
7381 
7382 		map<string, string>		specs;
7383 
7384 		specs["count"]			= "%c_i32_8";
7385 		specs["pp_type16"]		= "%pp_v4f16";
7386 		specs["up_type64"]		= "%up_v4f64";
7387 		specs["f_type16"]		= "%v4f16";
7388 		specs["f_type64"]		= "%v4f64";
7389 		specs["index0"]			= "%c_i32_0";
7390 		specs["store"]			=
7391 			"  %src_1 = OpAccessChain %pp_v4f16 %pc16 %c_i32_0 %30 %c_i32_1\n"
7392 			"%val16_1 = OpLoad %v4f16 %src_1\n"
7393 			"%val64_1 = OpFConvert %v4f64 %val16_1\n"
7394 			"  %dst_1 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_1\n"
7395 			"           OpStore %dst_1 %val64_1\n";
7396 
7397 		fragments["testfun"]	= testFun.specialize(specs);
7398 
7399 		createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
7400 	}
7401 }
7402 
addCompute16bitStorageUniform64To16Group(tcu::TestCaseGroup * group)7403 void addCompute16bitStorageUniform64To16Group (tcu::TestCaseGroup* group)
7404 {
7405 	tcu::TestContext&				testCtx			= group->getTestContext();
7406 	de::Random						rnd				(deStringHash(group->getName()));
7407 	const int						numElements		= 128;
7408 
7409 	const StringTemplate			shaderTemplate	(
7410 		"OpCapability Shader\n"
7411 		"OpCapability ${capability}\n"
7412 		"OpCapability Float64\n"
7413 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
7414 		"OpMemoryModel Logical GLSL450\n"
7415 		"OpEntryPoint GLCompute %main \"main\" %id\n"
7416 		"OpExecutionMode %main LocalSize 1 1 1\n"
7417 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
7418 
7419 		"${stride}\n"
7420 
7421 		"OpMemberDecorate %SSBO64 0 Offset 0\n"
7422 		"OpMemberDecorate %SSBO16 0 Offset 0\n"
7423 		"OpDecorate %SSBO64 ${storage}\n"
7424 		"OpDecorate %SSBO16 BufferBlock\n"
7425 		"OpDecorate %ssbo64 DescriptorSet 0\n"
7426 		"OpDecorate %ssbo16 DescriptorSet 0\n"
7427 		"OpDecorate %ssbo64 Binding 0\n"
7428 		"OpDecorate %ssbo16 Binding 1\n"
7429 
7430 		"${matrix_decor:opt}\n"
7431 
7432 		"${rounding:opt}\n"
7433 
7434 		"%bool      = OpTypeBool\n"
7435 		"%void      = OpTypeVoid\n"
7436 		"%voidf     = OpTypeFunction %void\n"
7437 		"%u32       = OpTypeInt 32 0\n"
7438 		"%i32       = OpTypeInt 32 1\n"
7439 		"%f32       = OpTypeFloat 32\n"
7440 		"%f64       = OpTypeFloat 64\n"
7441 		"%uvec3     = OpTypeVector %u32 3\n"
7442 		"%fvec3     = OpTypeVector %f32 3\n"
7443 		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
7444 		"%i32ptr    = OpTypePointer Uniform %i32\n"
7445 		"%f64ptr    = OpTypePointer Uniform %f64\n"
7446 
7447 		"%zero      = OpConstant %i32 0\n"
7448 		"%c_i32_1   = OpConstant %i32 1\n"
7449 		"%c_i32_16  = OpConstant %i32 16\n"
7450 		"%c_i32_32  = OpConstant %i32 32\n"
7451 		"%c_i32_64  = OpConstant %i32 64\n"
7452 		"%c_i32_128 = OpConstant %i32 128\n"
7453 
7454 		"%i32arr    = OpTypeArray %i32 %c_i32_128\n"
7455 		"%f64arr    = OpTypeArray %f64 %c_i32_128\n"
7456 
7457 		"${types}\n"
7458 		"${matrix_types:opt}\n"
7459 
7460 		"%SSBO64    = OpTypeStruct %${matrix_prefix:opt}${base64}arr\n"
7461 		"%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
7462 		"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7463 		"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7464 		"%ssbo64    = OpVariable %up_SSBO64 Uniform\n"
7465 		"%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
7466 
7467 		"%id        = OpVariable %uvec3ptr Input\n"
7468 
7469 		"%main      = OpFunction %void None %voidf\n"
7470 		"%label     = OpLabel\n"
7471 		"%idval     = OpLoad %uvec3 %id\n"
7472 		"%x         = OpCompositeExtract %u32 %idval 0\n"
7473 		"%inloc     = OpAccessChain %${base64}ptr %ssbo64 %zero %x ${index0:opt}\n"
7474 		"%val64     = OpLoad %${base64} %inloc\n"
7475 		"%val16     = ${convert} %${base16} %val64\n"
7476 		"%outloc    = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
7477 		"             OpStore %outloc %val16\n"
7478 		"${matrix_store:opt}\n"
7479 		"             OpReturn\n"
7480 		"             OpFunctionEnd\n");
7481 
7482 	{  // Floats
7483 		const char						floatTypes[]	=
7484 			"%f16       = OpTypeFloat 16\n"
7485 			"%f16ptr    = OpTypePointer Uniform %f16\n"
7486 			"%f16arr    = OpTypeArray %f16 %c_i32_128\n"
7487 			"%v4f16     = OpTypeVector %f16 4\n"
7488 			"%v4f64     = OpTypeVector %f64 4\n"
7489 			"%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
7490 			"%v4f64ptr  = OpTypePointer Uniform %v4f64\n"
7491 			"%v4f16arr  = OpTypeArray %v4f16 %c_i32_32\n"
7492 			"%v4f64arr  = OpTypeArray %v4f64 %c_i32_32\n";
7493 
7494 		struct RndMode
7495 		{
7496 			const char*				name;
7497 			const char*				decor;
7498 			VerifyIOFunc			func;
7499 		};
7500 
7501 		const RndMode		rndModes[]		=
7502 		{
7503 			{"rtz",						"OpDecorate %val16  FPRoundingMode RTZ",	computeCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7504 			{"rte",						"OpDecorate %val16  FPRoundingMode RTE",	computeCheck16BitFloats64<ROUNDINGMODE_RTE>},
7505 			{"unspecified_rnd_mode",	"",											computeCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7506 		};
7507 
7508 		struct CompositeType
7509 		{
7510 			const char*	name;
7511 			const char*	base64;
7512 			const char*	base16;
7513 			const char*	strideStr;
7514 			const char* stride64UBO;
7515 			unsigned	padding64UBO;
7516 			const char* stride64SSBO;
7517 			unsigned	padding64SSBO;
7518 			unsigned	count;
7519 		};
7520 
7521 		const CompositeType	cTypes[]	=
7522 		{
7523 			{"scalar",	"f64",		"f16",		"OpDecorate %f16arr ArrayStride 2\nOpDecorate %f64arr ArrayStride ",			"16",	8,	"8",	0,	numElements},
7524 			{"vector",	"v4f64",	"v4f16",	"OpDecorate %v4f16arr ArrayStride 8\nOpDecorate %v4f64arr ArrayStride ",		"32",	0,	"32",	0,	numElements / 4},
7525 			{"matrix",	"v4f64",	"v4f16",	"OpDecorate %m2v4f16arr ArrayStride 16\nOpDecorate %m2v4f64arr ArrayStride ",	"64",	0,	"64",	0,	numElements / 8},
7526 		};
7527 
7528 		vector<double>		float64Data			= getFloat64s(rnd, numElements);
7529 		vector<deFloat16>	float16DummyData	(numElements, 0);
7530 
7531 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7532 			for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
7533 				for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7534 				{
7535 					ComputeShaderSpec		spec;
7536 					map<string, string>		specs;
7537 					string					testName	= string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
7538 					const bool				isUBO		= CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
7539 
7540 					specs["capability"]		= CAPABILITIES[capIdx].cap;
7541 					specs["storage"]		= CAPABILITIES[capIdx].decor;
7542 					specs["stride"]			= cTypes[tyIdx].strideStr;
7543 					specs["base64"]			= cTypes[tyIdx].base64;
7544 					specs["base16"]			= cTypes[tyIdx].base16;
7545 					specs["rounding"]		= rndModes[rndModeIdx].decor;
7546 					specs["types"]			= floatTypes;
7547 					specs["convert"]		= "OpFConvert";
7548 
7549 					if (isUBO)
7550 						specs["stride"] += cTypes[tyIdx].stride64UBO;
7551 					else
7552 						specs["stride"] += cTypes[tyIdx].stride64SSBO;
7553 
7554 					if (deStringEqual(cTypes[tyIdx].name, "matrix"))
7555 					{
7556 						if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
7557 							specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTZ\n";
7558 						else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
7559 							specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTE\n";
7560 
7561 						specs["index0"]			= "%zero";
7562 						specs["matrix_prefix"]	= "m2";
7563 						specs["matrix_types"]	=
7564 							"%m2v4f16 = OpTypeMatrix %v4f16 2\n"
7565 							"%m2v4f64 = OpTypeMatrix %v4f64 2\n"
7566 							"%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
7567 							"%m2v4f64arr = OpTypeArray %m2v4f64 %c_i32_16\n";
7568 						specs["matrix_decor"]	=
7569 							"OpMemberDecorate %SSBO64 0 ColMajor\n"
7570 							"OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
7571 							"OpMemberDecorate %SSBO16 0 ColMajor\n"
7572 							"OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
7573 						specs["matrix_store"]	=
7574 							"%inloc_1  = OpAccessChain %v4f64ptr %ssbo64 %zero %x %c_i32_1\n"
7575 							"%val64_1  = OpLoad %v4f64 %inloc_1\n"
7576 							"%val16_1  = OpFConvert %v4f16 %val64_1\n"
7577 							"%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
7578 							"            OpStore %outloc_1 %val16_1\n";
7579 					}
7580 
7581 					spec.assembly			= shaderTemplate.specialize(specs);
7582 					spec.numWorkGroups		= IVec3(cTypes[tyIdx].count, 1, 1);
7583 					spec.verifyIO			= rndModes[rndModeIdx].func;
7584 					const unsigned padding	= isUBO ? cTypes[tyIdx].padding64UBO : cTypes[tyIdx].padding64SSBO;
7585 
7586 					spec.inputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data, padding)), CAPABILITIES[capIdx].dtype));
7587 
7588 					// We provided a custom verifyIO in the above in which inputs will be used for checking.
7589 					// So put dummy data in the expected values.
7590 					spec.outputs.push_back(BufferSp(new Float16Buffer(float16DummyData)));
7591 
7592 					spec.extensions.push_back("VK_KHR_16bit_storage");
7593 
7594 					spec.requestedVulkanFeatures							= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7595 					spec.requestedVulkanFeatures.coreFeatures.shaderFloat64	= VK_TRUE;
7596 
7597 					group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
7598 				}
7599 	}
7600 }
7601 
addGraphics16BitStorageUniformFloat64To16Group(tcu::TestCaseGroup * testGroup)7602 void addGraphics16BitStorageUniformFloat64To16Group (tcu::TestCaseGroup* testGroup)
7603 {
7604 	de::Random							rnd					(deStringHash(testGroup->getName()));
7605 	map<string, string>					fragments;
7606 	GraphicsResources					resources;
7607 	vector<string>						extensions;
7608 	const deUint32						numDataPoints		= 256;
7609 	RGBA								defaultColors[4];
7610 	vector<double>						float64Data			= getFloat64s(rnd, numDataPoints);
7611 	vector<deFloat16>					float16DummyData	(numDataPoints, 0);
7612 	const StringTemplate				capabilities		("OpCapability Float64\n"
7613 															 "OpCapability ${cap}\n");
7614 	// We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
7615 	resources.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7616 
7617 	extensions.push_back("VK_KHR_16bit_storage");
7618 
7619 	fragments["extension"]	= "OpExtension \"SPV_KHR_16bit_storage\"";
7620 
7621 	struct RndMode
7622 	{
7623 		const char*				name;
7624 		const char*				decor;
7625 		VerifyIOFunc			f;
7626 	};
7627 
7628 	getDefaultColors(defaultColors);
7629 
7630 	{  // scalar cases
7631 		fragments["pre_main"]				=
7632 			"      %f16 = OpTypeFloat 16\n"
7633 			"      %f64 = OpTypeFloat 64\n"
7634 			"%c_i32_256 = OpConstant %i32 256\n"
7635 			"   %up_f64 = OpTypePointer Uniform %f64\n"
7636 			"   %up_f16 = OpTypePointer Uniform %f16\n"
7637 			"   %ra_f64 = OpTypeArray %f64 %c_i32_256\n"
7638 			"   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
7639 			"   %SSBO64 = OpTypeStruct %ra_f64\n"
7640 			"   %SSBO16 = OpTypeStruct %ra_f16\n"
7641 			"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7642 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7643 			"   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7644 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7645 
7646 		const StringTemplate decoration		(
7647 			"OpDecorate %ra_f64 ArrayStride ${stride64}\n"
7648 			"OpDecorate %ra_f16 ArrayStride 2\n"
7649 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
7650 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
7651 			"OpDecorate %SSBO64 ${indecor}\n"
7652 			"OpDecorate %SSBO16 BufferBlock\n"
7653 			"OpDecorate %ssbo64 DescriptorSet 0\n"
7654 			"OpDecorate %ssbo16 DescriptorSet 0\n"
7655 			"OpDecorate %ssbo64 Binding 0\n"
7656 			"OpDecorate %ssbo16 Binding 1\n"
7657 			"${rounddecor}\n");
7658 
7659 		fragments["testfun"]				=
7660 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7661 			"    %param = OpFunctionParameter %v4f32\n"
7662 
7663 			"%entry = OpLabel\n"
7664 			"    %i = OpVariable %fp_i32 Function\n"
7665 			"         OpStore %i %c_i32_0\n"
7666 			"         OpBranch %loop\n"
7667 
7668 			" %loop = OpLabel\n"
7669 			"   %15 = OpLoad %i32 %i\n"
7670 			"   %lt = OpSLessThan %bool %15 %c_i32_256\n"
7671 			"         OpLoopMerge %merge %inc None\n"
7672 			"         OpBranchConditional %lt %write %merge\n"
7673 
7674 			"%write = OpLabel\n"
7675 			"   %30 = OpLoad %i32 %i\n"
7676 			"  %src = OpAccessChain %up_f64 %ssbo64 %c_i32_0 %30\n"
7677 			"%val64 = OpLoad %f64 %src\n"
7678 			"%val16 = OpFConvert %f16 %val64\n"
7679 			"  %dst = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
7680 			"         OpStore %dst %val16\n"
7681 			"         OpBranch %inc\n"
7682 
7683 			"  %inc = OpLabel\n"
7684 			"   %37 = OpLoad %i32 %i\n"
7685 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7686 			"         OpStore %i %39\n"
7687 			"         OpBranch %loop\n"
7688 
7689 			"%merge = OpLabel\n"
7690 			"         OpReturnValue %param\n"
7691 
7692 			"OpFunctionEnd\n";
7693 
7694 		const RndMode	rndModes[] =
7695 		{
7696 			{"rtz",						"OpDecorate %val16  FPRoundingMode RTZ",	graphicsCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7697 			{"rte",						"OpDecorate %val16  FPRoundingMode RTE",	graphicsCheck16BitFloats64<ROUNDINGMODE_RTE>},
7698 			{"unspecified_rnd_mode",	"",											graphicsCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7699 		};
7700 
7701 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7702 			for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7703 			{
7704 				map<string, string>	specs;
7705 				string				testName	= string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
7706 				const bool			isUBO		= CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
7707 				VulkanFeatures		features;
7708 
7709 				specs["cap"]						= CAPABILITIES[capIdx].cap;
7710 				specs["indecor"]					= CAPABILITIES[capIdx].decor;
7711 				specs["rounddecor"]					= rndModes[rndModeIdx].decor;
7712 				specs["stride64"]					= isUBO ? "16" : "8";
7713 
7714 				fragments["capability"]				= capabilities.specialize(specs);
7715 				fragments["decoration"]				= decoration.specialize(specs);
7716 
7717 				resources.inputs.clear();
7718 				resources.inputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data, isUBO ? 8 : 0)), CAPABILITIES[capIdx].dtype));
7719 
7720 				resources.verifyIO					= rndModes[rndModeIdx].f;
7721 
7722 				features							= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7723 				features.coreFeatures.shaderFloat64 = DE_TRUE;
7724 
7725 
7726 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
7727 			}
7728 	}
7729 
7730 	{  // vector cases
7731 		fragments["pre_main"]				=
7732 			"      %f16 = OpTypeFloat 16\n"
7733 			"      %f64 = OpTypeFloat 64\n"
7734 			" %c_i32_64 = OpConstant %i32 64\n"
7735 			"	 %v4f16 = OpTypeVector %f16 4\n"
7736 			"	 %v4f64 = OpTypeVector %f64 4\n"
7737 			" %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7738 			" %up_v4f16 = OpTypePointer Uniform %v4f16\n"
7739 			" %ra_v4f64 = OpTypeArray %v4f64 %c_i32_64\n"
7740 			" %ra_v4f16 = OpTypeArray %v4f16 %c_i32_64\n"
7741 			"   %SSBO64 = OpTypeStruct %ra_v4f64\n"
7742 			"   %SSBO16 = OpTypeStruct %ra_v4f16\n"
7743 			"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7744 			"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7745 			"   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7746 			"   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7747 
7748 		const StringTemplate decoration		(
7749 			"OpDecorate %ra_v4f64 ArrayStride 32\n"
7750 			"OpDecorate %ra_v4f16 ArrayStride 8\n"
7751 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
7752 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
7753 			"OpDecorate %SSBO64 ${indecor}\n"
7754 			"OpDecorate %SSBO16 BufferBlock\n"
7755 			"OpDecorate %ssbo64 DescriptorSet 0\n"
7756 			"OpDecorate %ssbo16 DescriptorSet 0\n"
7757 			"OpDecorate %ssbo64 Binding 0\n"
7758 			"OpDecorate %ssbo16 Binding 1\n"
7759 			"${rounddecor}\n");
7760 
7761 		// ssbo16[] <- convert ssbo64[] to 16bit float
7762 		fragments["testfun"]				=
7763 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7764 			"    %param = OpFunctionParameter %v4f32\n"
7765 
7766 			"%entry = OpLabel\n"
7767 			"    %i = OpVariable %fp_i32 Function\n"
7768 			"         OpStore %i %c_i32_0\n"
7769 			"         OpBranch %loop\n"
7770 
7771 			" %loop = OpLabel\n"
7772 			"   %15 = OpLoad %i32 %i\n"
7773 			"   %lt = OpSLessThan %bool %15 %c_i32_64\n"
7774 			"         OpLoopMerge %merge %inc None\n"
7775 			"         OpBranchConditional %lt %write %merge\n"
7776 
7777 			"%write = OpLabel\n"
7778 			"   %30 = OpLoad %i32 %i\n"
7779 			"  %src = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30\n"
7780 			"%val64 = OpLoad %v4f64 %src\n"
7781 			"%val16 = OpFConvert %v4f16 %val64\n"
7782 			"  %dst = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30\n"
7783 			"         OpStore %dst %val16\n"
7784 			"         OpBranch %inc\n"
7785 
7786 			"  %inc = OpLabel\n"
7787 			"   %37 = OpLoad %i32 %i\n"
7788 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7789 			"         OpStore %i %39\n"
7790 			"         OpBranch %loop\n"
7791 
7792 			"%merge = OpLabel\n"
7793 			"         OpReturnValue %param\n"
7794 
7795 			"OpFunctionEnd\n";
7796 
7797 		const RndMode	rndModes[] =
7798 		{
7799 			{"rtz",						"OpDecorate %val16  FPRoundingMode RTZ",	graphicsCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7800 			{"rte",						"OpDecorate %val16  FPRoundingMode RTE",	graphicsCheck16BitFloats64<ROUNDINGMODE_RTE>},
7801 			{"unspecified_rnd_mode",	"",											graphicsCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7802 		};
7803 
7804 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7805 			for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7806 			{
7807 				map<string, string>	specs;
7808 				VulkanFeatures		features;
7809 				string				testName = string(CAPABILITIES[capIdx].name) + "_vector_float_" + rndModes[rndModeIdx].name;
7810 
7811 				specs["cap"]						= CAPABILITIES[capIdx].cap;
7812 				specs["indecor"]					= CAPABILITIES[capIdx].decor;
7813 				specs["rounddecor"]					= rndModes[rndModeIdx].decor;
7814 
7815 				fragments["capability"]				= capabilities.specialize(specs);
7816 				fragments["decoration"]				= decoration.specialize(specs);
7817 
7818 				resources.inputs.clear();
7819 				resources.inputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data)), CAPABILITIES[capIdx].dtype));
7820 				resources.verifyIO					= rndModes[rndModeIdx].f;
7821 
7822 				features							= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7823 				features.coreFeatures.shaderFloat64	= DE_TRUE;
7824 
7825 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
7826 			}
7827 	}
7828 
7829 	{  // matrix cases
7830 		fragments["pre_main"]				=
7831 			"       %f16 = OpTypeFloat 16\n"
7832 			"       %f64 = OpTypeFloat 64\n"
7833 			"  %c_i32_16 = OpConstant %i32 16\n"
7834 			"     %v4f16 = OpTypeVector %f16 4\n"
7835 			"     %v4f64 = OpTypeVector %f64 4\n"
7836 			"   %m4x4f64 = OpTypeMatrix %v4f64 4\n"
7837 			"   %m4x4f16 = OpTypeMatrix %v4f16 4\n"
7838 			"  %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7839 			"  %up_v4f16 = OpTypePointer Uniform %v4f16\n"
7840 			"%a16m4x4f64 = OpTypeArray %m4x4f64 %c_i32_16\n"
7841 			"%a16m4x4f16 = OpTypeArray %m4x4f16 %c_i32_16\n"
7842 			"    %SSBO64 = OpTypeStruct %a16m4x4f64\n"
7843 			"    %SSBO16 = OpTypeStruct %a16m4x4f16\n"
7844 			" %up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7845 			" %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7846 			"    %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7847 			"    %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7848 
7849 		const StringTemplate decoration		(
7850 			"OpDecorate %a16m4x4f64 ArrayStride 128\n"
7851 			"OpDecorate %a16m4x4f16 ArrayStride 32\n"
7852 			"OpMemberDecorate %SSBO64 0 Offset 0\n"
7853 			"OpMemberDecorate %SSBO64 0 ColMajor\n"
7854 			"OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
7855 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
7856 			"OpMemberDecorate %SSBO16 0 ColMajor\n"
7857 			"OpMemberDecorate %SSBO16 0 MatrixStride 8\n"
7858 			"OpDecorate %SSBO64 ${indecor}\n"
7859 			"OpDecorate %SSBO16 BufferBlock\n"
7860 			"OpDecorate %ssbo64 DescriptorSet 0\n"
7861 			"OpDecorate %ssbo16 DescriptorSet 0\n"
7862 			"OpDecorate %ssbo64 Binding 0\n"
7863 			"OpDecorate %ssbo16 Binding 1\n"
7864 			"${rounddecor}\n");
7865 
7866 		fragments["testfun"]				=
7867 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7868 			"    %param = OpFunctionParameter %v4f32\n"
7869 
7870 			"%entry = OpLabel\n"
7871 			"    %i = OpVariable %fp_i32 Function\n"
7872 			"         OpStore %i %c_i32_0\n"
7873 			"         OpBranch %loop\n"
7874 
7875 			" %loop = OpLabel\n"
7876 			"   %15 = OpLoad %i32 %i\n"
7877 			"   %lt = OpSLessThan %bool %15 %c_i32_16\n"
7878 			"         OpLoopMerge %merge %inc None\n"
7879 			"         OpBranchConditional %lt %write %merge\n"
7880 
7881 			"  %write = OpLabel\n"
7882 			"     %30 = OpLoad %i32 %i\n"
7883 			"  %src_0 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_0\n"
7884 			"  %src_1 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_1\n"
7885 			"  %src_2 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_2\n"
7886 			"  %src_3 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_3\n"
7887 			"%val64_0 = OpLoad %v4f64 %src_0\n"
7888 			"%val64_1 = OpLoad %v4f64 %src_1\n"
7889 			"%val64_2 = OpLoad %v4f64 %src_2\n"
7890 			"%val64_3 = OpLoad %v4f64 %src_3\n"
7891 			"%val16_0 = OpFConvert %v4f16 %val64_0\n"
7892 			"%val16_1 = OpFConvert %v4f16 %val64_1\n"
7893 			"%val16_2 = OpFConvert %v4f16 %val64_2\n"
7894 			"%val16_3 = OpFConvert %v4f16 %val64_3\n"
7895 			"  %dst_0 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
7896 			"  %dst_1 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
7897 			"  %dst_2 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
7898 			"  %dst_3 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
7899 			"           OpStore %dst_0 %val16_0\n"
7900 			"           OpStore %dst_1 %val16_1\n"
7901 			"           OpStore %dst_2 %val16_2\n"
7902 			"           OpStore %dst_3 %val16_3\n"
7903 			"           OpBranch %inc\n"
7904 
7905 			"  %inc = OpLabel\n"
7906 			"   %37 = OpLoad %i32 %i\n"
7907 			"   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7908 			"         OpStore %i %39\n"
7909 			"         OpBranch %loop\n"
7910 
7911 			"%merge = OpLabel\n"
7912 			"         OpReturnValue %param\n"
7913 
7914 			"OpFunctionEnd\n";
7915 
7916 		const RndMode	rndModes[] =
7917 		{
7918 			{"rte",						"OpDecorate %val16_0  FPRoundingMode RTE\nOpDecorate %val16_1  FPRoundingMode RTE\nOpDecorate %val16_2  FPRoundingMode RTE\nOpDecorate %val16_3  FPRoundingMode RTE",	graphicsCheck16BitFloats64<ROUNDINGMODE_RTE>},
7919 			{"rtz",						"OpDecorate %val16_0  FPRoundingMode RTZ\nOpDecorate %val16_1  FPRoundingMode RTZ\nOpDecorate %val16_2  FPRoundingMode RTZ\nOpDecorate %val16_3  FPRoundingMode RTZ",	graphicsCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7920 			{"unspecified_rnd_mode",	"",																																										graphicsCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7921 		};
7922 
7923 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7924 			for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7925 			{
7926 				map<string, string>	specs;
7927 				VulkanFeatures		features;
7928 				string				testName = string(CAPABILITIES[capIdx].name) + "_matrix_float_" + rndModes[rndModeIdx].name;
7929 
7930 				specs["cap"]						= CAPABILITIES[capIdx].cap;
7931 				specs["indecor"]					= CAPABILITIES[capIdx].decor;
7932 				specs["rounddecor"]					= rndModes[rndModeIdx].decor;
7933 
7934 				fragments["capability"]				= capabilities.specialize(specs);
7935 				fragments["decoration"]				= decoration.specialize(specs);
7936 
7937 				resources.inputs.clear();
7938 				resources.inputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data)), CAPABILITIES[capIdx].dtype));
7939 				resources.verifyIO					= rndModes[rndModeIdx].f;
7940 
7941 				features							= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7942 				features.coreFeatures.shaderFloat64	= DE_TRUE;
7943 
7944 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
7945 			}
7946 	}
7947 }
7948 
addGraphics16BitStorageInputOutputFloat64To16Group(tcu::TestCaseGroup * testGroup)7949 void addGraphics16BitStorageInputOutputFloat64To16Group (tcu::TestCaseGroup* testGroup)
7950 {
7951 	de::Random			rnd					(deStringHash(testGroup->getName()));
7952 	RGBA				defaultColors[4];
7953 	vector<string>		extensions;
7954 	map<string, string>	fragments			= passthruFragments();
7955 	const deUint32		numDataPoints		= 64;
7956 	vector<double>		float64Data			= getFloat64s(rnd, numDataPoints);
7957 
7958 	extensions.push_back("VK_KHR_16bit_storage");
7959 
7960 	fragments["capability"]				=
7961 		"OpCapability StorageInputOutput16\n"
7962 		"OpCapability Float64\n";
7963 	fragments["extension"]				= "OpExtension \"SPV_KHR_16bit_storage\"\n";
7964 
7965 	getDefaultColors(defaultColors);
7966 
7967 	struct RndMode
7968 	{
7969 		const char*				name;
7970 		const char*				decor;
7971 		const char*				decor_tessc;
7972 		RoundingModeFlags		flags;
7973 	};
7974 
7975 	const RndMode		rndModes[]		=
7976 	{
7977 		{"rtz",
7978 		 "OpDecorate %ret0  FPRoundingMode RTZ\n",
7979 		 "OpDecorate %ret1  FPRoundingMode RTZ\n"
7980 		 "OpDecorate %ret2  FPRoundingMode RTZ\n",
7981 		 ROUNDINGMODE_RTZ},
7982 		{"rte",
7983 		 "OpDecorate %ret0  FPRoundingMode RTE\n",
7984 		 "OpDecorate %ret1  FPRoundingMode RTE\n"
7985 		 "OpDecorate %ret2  FPRoundingMode RTE\n",
7986 		  ROUNDINGMODE_RTE},
7987 		{"unspecified_rnd_mode",	"",		"",			RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)},
7988 	};
7989 
7990 	struct Case
7991 	{
7992 		const char*	name;
7993 		const char*	interfaceOpCall;
7994 		const char*	interfaceOpFunc;
7995 		const char* postInterfaceOp;
7996 		const char* postInterfaceOpGeom;
7997 		const char* postInterfaceOpTessc;
7998 		const char*	preMain;
7999 		const char*	inputType;
8000 		const char*	outputType;
8001 		deUint32	numPerCase;
8002 		deUint32	numElements;
8003 	};
8004 
8005 	const Case		cases[]				=
8006 	{
8007 		{ // Scalar cases
8008 			"scalar",
8009 
8010 			"OpFConvert %f16",
8011 
8012 			"",
8013 
8014 			"             %ret0 = OpFConvert %f16 %IF_input_val\n"
8015 			"                OpStore %IF_output %ret0\n",
8016 
8017 			"             %ret0 = OpFConvert %f16 %IF_input_val0\n"
8018 			"                OpStore %IF_output %ret0\n",
8019 
8020 			"             %ret0 = OpFConvert %f16 %IF_input_val0\n"
8021 			"                OpStore %IF_output_ptr0 %ret0\n"
8022 			"             %ret1 = OpFConvert %f16 %IF_input_val1\n"
8023 			"                OpStore %IF_output_ptr1 %ret1\n"
8024 			"             %ret2 = OpFConvert %f16 %IF_input_val2\n"
8025 			"                OpStore %IF_output_ptr2 %ret2\n",
8026 
8027 			"             %f16 = OpTypeFloat 16\n"
8028 			"             %f64 = OpTypeFloat 64\n"
8029 			"          %op_f16 = OpTypePointer Output %f16\n"
8030 			"           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
8031 			"        %op_a3f16 = OpTypePointer Output %a3f16\n"
8032 			"%f16_f64_function = OpTypeFunction %f16 %f64\n"
8033 			"           %a3f64 = OpTypeArray %f64 %c_i32_3\n"
8034 			"        %ip_a3f64 = OpTypePointer Input %a3f64\n"
8035 			"          %ip_f64 = OpTypePointer Input %f64\n",
8036 
8037 			"f64",
8038 			"f16",
8039 			4,
8040 			1,
8041 		},
8042 		{ // Vector cases
8043 			"vector",
8044 
8045 			"OpFConvert %v2f16",
8046 
8047 			"",
8048 
8049 			"             %ret0 = OpFConvert %v2f16 %IF_input_val\n"
8050 			"                OpStore %IF_output %ret0\n",
8051 
8052 			"             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
8053 			"                OpStore %IF_output %ret0\n",
8054 
8055 			"             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
8056 			"                OpStore %IF_output_ptr0 %ret0\n"
8057 			"             %ret1 = OpFConvert %v2f16 %IF_input_val1\n"
8058 			"                OpStore %IF_output_ptr1 %ret1\n"
8059 			"             %ret2 = OpFConvert %v2f16 %IF_input_val2\n"
8060 			"                OpStore %IF_output_ptr2 %ret2\n",
8061 
8062 			"                 %f16 = OpTypeFloat 16\n"
8063 			"                 %f64 = OpTypeFloat 64\n"
8064 			"               %v2f16 = OpTypeVector %f16 2\n"
8065 			"               %v2f64 = OpTypeVector %f64 2\n"
8066 			"            %op_v2f16 = OpTypePointer Output %v2f16\n"
8067 			"             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
8068 			"          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n"
8069 			"%v2f16_v2f64_function = OpTypeFunction %v2f16 %v2f64\n"
8070 			"             %a3v2f64 = OpTypeArray %v2f64 %c_i32_3\n"
8071 			"          %ip_a3v2f64 = OpTypePointer Input %a3v2f64\n"
8072 			"          %ip_v2f64 = OpTypePointer Input %v2f64\n",
8073 
8074 			"v2f64",
8075 			"v2f16",
8076 			2 * 4,
8077 			2,
8078 		}
8079 	};
8080 
8081 	VulkanFeatures	requiredFeatures;
8082 
8083 	requiredFeatures.coreFeatures.shaderFloat64	= DE_TRUE;
8084 	requiredFeatures.ext16BitStorage			= EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
8085 
8086 	for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
8087 		for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
8088 		{
8089 			fragments["interface_op_func"]			= cases[caseIdx].interfaceOpFunc;
8090 			fragments["interface_op_call"]      = cases[caseIdx].interfaceOpCall;
8091 			fragments["post_interface_op_frag"]		= cases[caseIdx].postInterfaceOp;
8092 			fragments["post_interface_op_vert"]		= cases[caseIdx].postInterfaceOp;
8093 			fragments["post_interface_op_geom"]		= cases[caseIdx].postInterfaceOpGeom;
8094 			fragments["post_interface_op_tesse"]	= cases[caseIdx].postInterfaceOpGeom;
8095 			fragments["post_interface_op_tessc"]	= cases[caseIdx].postInterfaceOpTessc;
8096 			fragments["pre_main"]					= cases[caseIdx].preMain;
8097 			fragments["decoration"]					= rndModes[rndModeIdx].decor;
8098 			fragments["decoration_tessc"]			= rndModes[rndModeIdx].decor_tessc;
8099 
8100 			fragments["input_type"]			= cases[caseIdx].inputType;
8101 			fragments["output_type"]		= cases[caseIdx].outputType;
8102 
8103 			GraphicsInterfaces	interfaces;
8104 			const deUint32		numPerCase	= cases[caseIdx].numPerCase;
8105 			vector<double>		subInputs	(numPerCase);
8106 			vector<deFloat16>	subOutputs	(numPerCase);
8107 
8108 			// The pipeline need this to call compare16BitFloat() when checking the result.
8109 			interfaces.setRoundingMode(rndModes[rndModeIdx].flags);
8110 
8111 			for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
8112 			{
8113 				string			testName	= string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
8114 
8115 				for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
8116 				{
8117 					subInputs[numNdx]	= float64Data[caseNdx * numPerCase + numNdx];
8118 					// We derive the expected result from inputs directly in the graphics pipeline.
8119 					subOutputs[numNdx]	= 0;
8120 				}
8121 				interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT64), BufferSp(new Float64Buffer(subInputs))),
8122 										  std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subOutputs))));
8123 				createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
8124 			}
8125 		}
8126 }
8127 
addCompute16bitStorageUniform16To64Group(tcu::TestCaseGroup * group)8128 void addCompute16bitStorageUniform16To64Group (tcu::TestCaseGroup* group)
8129 {
8130 	tcu::TestContext&				testCtx			= group->getTestContext();
8131 	de::Random						rnd				(deStringHash(group->getName()));
8132 	const int						numElements		= 128;
8133 
8134 	const StringTemplate			shaderTemplate	(
8135 		"OpCapability Shader\n"
8136 		"OpCapability Float64\n"
8137 		"OpCapability ${capability}\n"
8138 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
8139 		"OpMemoryModel Logical GLSL450\n"
8140 		"OpEntryPoint GLCompute %main \"main\" %id\n"
8141 		"OpExecutionMode %main LocalSize 1 1 1\n"
8142 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
8143 
8144 		"${stride}\n"
8145 
8146 		"OpMemberDecorate %SSBO64 0 Offset 0\n"
8147 		"OpMemberDecorate %SSBO16 0 Offset 0\n"
8148 		"OpDecorate %SSBO64 BufferBlock\n"
8149 		"OpDecorate %SSBO16 ${storage}\n"
8150 		"OpDecorate %ssbo64 DescriptorSet 0\n"
8151 		"OpDecorate %ssbo16 DescriptorSet 0\n"
8152 		"OpDecorate %ssbo64 Binding 1\n"
8153 		"OpDecorate %ssbo16 Binding 0\n"
8154 
8155 		"${matrix_decor:opt}\n"
8156 
8157 		"%bool      = OpTypeBool\n"
8158 		"%void      = OpTypeVoid\n"
8159 		"%voidf     = OpTypeFunction %void\n"
8160 		"%u32       = OpTypeInt 32 0\n"
8161 		"%i32       = OpTypeInt 32 1\n"
8162 		"%f64       = OpTypeFloat 64\n"
8163 		"%v3u32     = OpTypeVector %u32 3\n"
8164 		"%uvec3ptr  = OpTypePointer Input %v3u32\n"
8165 		"%i32ptr    = OpTypePointer Uniform %i32\n"
8166 		"%f64ptr    = OpTypePointer Uniform %f64\n"
8167 
8168 		"%zero      = OpConstant %i32 0\n"
8169 		"%c_i32_1   = OpConstant %i32 1\n"
8170 		"%c_i32_2   = OpConstant %i32 2\n"
8171 		"%c_i32_3   = OpConstant %i32 3\n"
8172 		"%c_i32_16  = OpConstant %i32 16\n"
8173 		"%c_i32_32  = OpConstant %i32 32\n"
8174 		"%c_i32_64  = OpConstant %i32 64\n"
8175 		"%c_i32_128 = OpConstant %i32 128\n"
8176 		"%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
8177 
8178 		"%i32arr    = OpTypeArray %i32 %c_i32_128\n"
8179 		"%f64arr    = OpTypeArray %f64 %c_i32_128\n"
8180 
8181 		"${types}\n"
8182 		"${matrix_types:opt}\n"
8183 
8184 		"%SSBO64    = OpTypeStruct %${matrix_prefix:opt}${base64}arr\n"
8185 		"%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
8186 		"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
8187 		"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
8188 		"%ssbo64    = OpVariable %up_SSBO64 Uniform\n"
8189 		"%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
8190 
8191 		"%id        = OpVariable %uvec3ptr Input\n"
8192 
8193 		"%main      = OpFunction %void None %voidf\n"
8194 		"%label     = OpLabel\n"
8195 		"%idval     = OpLoad %v3u32 %id\n"
8196 		"%x         = OpCompositeExtract %u32 %idval 0\n"
8197 		"%inloc     = OpAccessChain %${base16}ptr %ssbo16 %zero %${arrayindex} ${index0:opt}\n"
8198 		"%val16     = OpLoad %${base16} %inloc\n"
8199 		"%val64     = ${convert} %${base64} %val16\n"
8200 		"%outloc    = OpAccessChain %${base64}ptr %ssbo64 %zero %x ${index0:opt}\n"
8201 		"             OpStore %outloc %val64\n"
8202 		"${matrix_store:opt}\n"
8203 		"             OpReturn\n"
8204 		"             OpFunctionEnd\n");
8205 
8206 	{  // floats
8207 		const char										floatTypes[]	=
8208 			"%f16       = OpTypeFloat 16\n"
8209 			"%f16ptr    = OpTypePointer Uniform %f16\n"
8210 			"%f16arr    = OpTypeArray %f16 %c_i32_128\n"
8211 			"%v2f16     = OpTypeVector %f16 2\n"
8212 			"%v2f64     = OpTypeVector %f64 2\n"
8213 			"%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
8214 			"%v2f64ptr  = OpTypePointer Uniform %v2f64\n"
8215 			"%v2f16arr  = OpTypeArray %v2f16 %c_i32_64\n"
8216 			"%v2f64arr  = OpTypeArray %v2f64 %c_i32_64\n";
8217 
8218 		enum DataType
8219 		{
8220 			SCALAR,
8221 			VEC2,
8222 			MAT2X2,
8223 		};
8224 
8225 
8226 		struct CompositeType
8227 		{
8228 			const char*	name;
8229 			const char*	base64;
8230 			const char*	base16;
8231 			const char*	strideStr;
8232 			const char*	stride16UBO;
8233 			unsigned	padding16UBO;
8234 			const char*	stride16SSBO;
8235 			unsigned	padding16SSBO;
8236 			bool		useConstantIndex;
8237 			unsigned	constantIndex;
8238 			unsigned	count;
8239 			DataType	dataType;
8240 		};
8241 
8242 		const CompositeType	cTypes[] =
8243 		{
8244 			{"scalar",				"f64",		"f16",		"OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride ",			"16",	14,	"2",	0,	false,	0,	numElements		, SCALAR },
8245 			{"scalar_const_idx_5",	"f64",		"f16",		"OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride ",			"16",	14,	"2",	0,	true,	5,	numElements		, SCALAR },
8246 			{"scalar_const_idx_8",	"f64",		"f16",		"OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride ",			"16",	14,	"2",	0,	true,	8,	numElements		, SCALAR },
8247 			{"vector",				"v2f64",	"v2f16",	"OpDecorate %v2f64arr ArrayStride 16\nOpDecorate %v2f16arr ArrayStride ",		"16",	12,	"4",	0,	false,	0,	numElements / 2	, VEC2 },
8248 			{"matrix",				"v2f64",	"v2f16",	"OpDecorate %m4v2f64arr ArrayStride 64\nOpDecorate %m4v2f16arr ArrayStride ",	"16",	0, "16",	0,	false,	0,	numElements / 8	, MAT2X2 }
8249 		};
8250 
8251 		vector<deFloat16>	float16Data			= getFloat16s(rnd, numElements);
8252 		vector<double>		float64Data;
8253 
8254 		float64Data.reserve(numElements);
8255 		for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
8256 			float64Data.push_back(deFloat16To64(float16Data[numIdx]));
8257 
8258 		for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
8259 			for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
8260 			{
8261 				ComputeShaderSpec		spec;
8262 				map<string, string>		specs;
8263 				string					testName	= string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float";
8264 				const bool				isUBO		= CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
8265 
8266 				specs["capability"]		= CAPABILITIES[capIdx].cap;
8267 				specs["storage"]		= CAPABILITIES[capIdx].decor;
8268 				specs["stride"]			= cTypes[tyIdx].strideStr;
8269 				specs["base64"]			= cTypes[tyIdx].base64;
8270 				specs["base16"]			= cTypes[tyIdx].base16;
8271 				specs["types"]			= floatTypes;
8272 				specs["convert"]		= "OpFConvert";
8273 				specs["constarrayidx"]	= de::toString(cTypes[tyIdx].constantIndex);
8274 
8275 				if (isUBO)
8276 					specs["stride"] += cTypes[tyIdx].stride16UBO;
8277 				else
8278 					specs["stride"] += cTypes[tyIdx].stride16SSBO;
8279 
8280 				if (cTypes[tyIdx].useConstantIndex)
8281 					specs["arrayindex"] = "c_i32_ci";
8282 				else
8283 					specs["arrayindex"] = "x";
8284 
8285 				vector<double>			float64DataConstIdx;
8286 				if (cTypes[tyIdx].useConstantIndex)
8287 				{
8288 					const deUint32 numFloats = numElements / cTypes[tyIdx].count;
8289 					for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
8290 						float64DataConstIdx.push_back(float64Data[cTypes[tyIdx].constantIndex * numFloats + numIdx % numFloats]);
8291 				}
8292 
8293 				if (deStringEqual(cTypes[tyIdx].name, "matrix"))
8294 				{
8295 					specs["index0"]			= "%zero";
8296 					specs["matrix_prefix"]	= "m4";
8297 					specs["matrix_types"]	=
8298 						"%m4v2f16 = OpTypeMatrix %v2f16 4\n"
8299 						"%m4v2f64 = OpTypeMatrix %v2f64 4\n"
8300 						"%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
8301 						"%m4v2f64arr = OpTypeArray %m4v2f64 %c_i32_16\n";
8302 					specs["matrix_decor"]	=
8303 						"OpMemberDecorate %SSBO64 0 ColMajor\n"
8304 						"OpMemberDecorate %SSBO64 0 MatrixStride 16\n"
8305 						"OpMemberDecorate %SSBO16 0 ColMajor\n"
8306 						"OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
8307 					specs["matrix_store"]	=
8308 						"%inloc_1  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
8309 						"%val16_1  = OpLoad %v2f16 %inloc_1\n"
8310 						"%val64_1  = OpFConvert %v2f64 %val16_1\n"
8311 						"%outloc_1 = OpAccessChain %v2f64ptr %ssbo64 %zero %x %c_i32_1\n"
8312 						"            OpStore %outloc_1 %val64_1\n"
8313 
8314 						"%inloc_2  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
8315 						"%val16_2  = OpLoad %v2f16 %inloc_2\n"
8316 						"%val64_2  = OpFConvert %v2f64 %val16_2\n"
8317 						"%outloc_2 = OpAccessChain %v2f64ptr %ssbo64 %zero %x %c_i32_2\n"
8318 						"            OpStore %outloc_2 %val64_2\n"
8319 
8320 						"%inloc_3  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
8321 						"%val16_3  = OpLoad %v2f16 %inloc_3\n"
8322 						"%val64_3  = OpFConvert %v2f64 %val16_3\n"
8323 						"%outloc_3 = OpAccessChain %v2f64ptr %ssbo64 %zero %x %c_i32_3\n"
8324 						"            OpStore %outloc_3 %val64_3\n";
8325 				}
8326 
8327 				spec.assembly			= shaderTemplate.specialize(specs);
8328 				spec.numWorkGroups		= IVec3(cTypes[tyIdx].count, 1, 1);
8329 				spec.verifyIO			= check64BitFloats;
8330 				const unsigned padding	= isUBO ? cTypes[tyIdx].padding16UBO : cTypes[tyIdx].padding16SSBO;
8331 
8332 				if (cTypes[tyIdx].dataType == SCALAR || cTypes[tyIdx].dataType == MAT2X2)
8333 				{
8334 					DE_ASSERT(cTypes[tyIdx].dataType != MAT2X2 || padding == 0);
8335 					spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data, padding)), CAPABILITIES[capIdx].dtype));
8336 				}
8337 				else if (cTypes[tyIdx].dataType == VEC2)
8338 				{
8339 					vector<tcu::Vector<deFloat16, 2> >	float16Vec2Data(numElements / 2);
8340 					for (size_t elemIdx = 0; elemIdx < numElements; elemIdx++)
8341 					{
8342 						float16Vec2Data[elemIdx / 2][elemIdx % 2] = float16Data[elemIdx];
8343 					}
8344 
8345 					typedef Buffer<tcu::Vector<deFloat16, 2> > Float16Vec2Buffer;
8346 					spec.inputs.push_back(Resource(BufferSp(new Float16Vec2Buffer(float16Vec2Data, padding)), CAPABILITIES[capIdx].dtype));
8347 				}
8348 
8349 				spec.outputs.push_back(Resource(BufferSp(new Float64Buffer(cTypes[tyIdx].useConstantIndex ? float64DataConstIdx : float64Data))));
8350 				spec.extensions.push_back("VK_KHR_16bit_storage");
8351 
8352 				spec.requestedVulkanFeatures							= get16BitStorageFeatures(CAPABILITIES[capIdx].name);
8353 				spec.requestedVulkanFeatures.coreFeatures.shaderFloat64	= VK_TRUE;
8354 
8355 				group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
8356 			}
8357 	}
8358 }
8359 
addCompute16bitStoragePushConstant16To64Group(tcu::TestCaseGroup * group)8360 void addCompute16bitStoragePushConstant16To64Group (tcu::TestCaseGroup* group)
8361 {
8362 	tcu::TestContext&				testCtx			= group->getTestContext();
8363 	de::Random						rnd				(deStringHash(group->getName()));
8364 	const int						numElements		= 64;
8365 
8366 	const StringTemplate			shaderTemplate	(
8367 		"OpCapability Shader\n"
8368 		"OpCapability StoragePushConstant16\n"
8369 		"OpCapability Float64\n"
8370 		"OpExtension \"SPV_KHR_16bit_storage\"\n"
8371 		"OpMemoryModel Logical GLSL450\n"
8372 		"OpEntryPoint GLCompute %main \"main\" %id\n"
8373 		"OpExecutionMode %main LocalSize 1 1 1\n"
8374 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
8375 
8376 		"${stride}"
8377 
8378 		"OpDecorate %PC16 Block\n"
8379 		"OpMemberDecorate %PC16 0 Offset 0\n"
8380 		"OpMemberDecorate %SSBO64 0 Offset 0\n"
8381 		"OpDecorate %SSBO64 BufferBlock\n"
8382 		"OpDecorate %ssbo64 DescriptorSet 0\n"
8383 		"OpDecorate %ssbo64 Binding 0\n"
8384 
8385 		"${matrix_decor:opt}\n"
8386 
8387 		"%bool      = OpTypeBool\n"
8388 		"%void      = OpTypeVoid\n"
8389 		"%voidf     = OpTypeFunction %void\n"
8390 		"%u32       = OpTypeInt 32 0\n"
8391 		"%i32       = OpTypeInt 32 1\n"
8392 		"%f32       = OpTypeFloat 32\n"
8393 		"%uvec3     = OpTypeVector %u32 3\n"
8394 		"%fvec3     = OpTypeVector %f32 3\n"
8395 		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
8396 		"%i32ptr    = OpTypePointer Uniform %i32\n"
8397 		"%f32ptr    = OpTypePointer Uniform %f32\n"
8398 
8399 		"%zero      = OpConstant %i32 0\n"
8400 		"%c_i32_1   = OpConstant %i32 1\n"
8401 		"%c_i32_8   = OpConstant %i32 8\n"
8402 		"%c_i32_16  = OpConstant %i32 16\n"
8403 		"%c_i32_32  = OpConstant %i32 32\n"
8404 		"%c_i32_64  = OpConstant %i32 64\n"
8405 
8406 		"%i32arr    = OpTypeArray %i32 %c_i32_64\n"
8407 		"%f32arr    = OpTypeArray %f32 %c_i32_64\n"
8408 
8409 		"${types}\n"
8410 		"${matrix_types:opt}\n"
8411 
8412 		"%PC16      = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
8413 		"%pp_PC16   = OpTypePointer PushConstant %PC16\n"
8414 		"%pc16      = OpVariable %pp_PC16 PushConstant\n"
8415 		"%SSBO64    = OpTypeStruct %${matrix_prefix:opt}${base64}arr\n"
8416 		"%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
8417 		"%ssbo64    = OpVariable %up_SSBO64 Uniform\n"
8418 
8419 		"%id        = OpVariable %uvec3ptr Input\n"
8420 
8421 		"%main      = OpFunction %void None %voidf\n"
8422 		"%label     = OpLabel\n"
8423 		"%idval     = OpLoad %uvec3 %id\n"
8424 		"%x         = OpCompositeExtract %u32 %idval 0\n"
8425 		"%inloc     = OpAccessChain %${base16}ptr %pc16 %zero %x ${index0:opt}\n"
8426 		"%val16     = OpLoad %${base16} %inloc\n"
8427 		"%val64     = ${convert} %${base64} %val16\n"
8428 		"%outloc    = OpAccessChain %${base64}ptr %ssbo64 %zero %x ${index0:opt}\n"
8429 		"             OpStore %outloc %val64\n"
8430 		"${matrix_store:opt}\n"
8431 		"             OpReturn\n"
8432 		"             OpFunctionEnd\n");
8433 
8434 	{  // floats
8435 		const char										floatTypes[]	=
8436 			"%f16       = OpTypeFloat 16\n"
8437 			"%f16ptr    = OpTypePointer PushConstant %f16\n"
8438 			"%f16arr    = OpTypeArray %f16 %c_i32_64\n"
8439 			"%f64       = OpTypeFloat 64\n"
8440 			"%f64ptr    = OpTypePointer Uniform %f64\n"
8441 			"%f64arr    = OpTypeArray %f64 %c_i32_64\n"
8442 			"%v4f16     = OpTypeVector %f16 4\n"
8443 			"%v4f32     = OpTypeVector %f32 4\n"
8444 			"%v4f64     = OpTypeVector %f64 4\n"
8445 			"%v4f16ptr  = OpTypePointer PushConstant %v4f16\n"
8446 			"%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
8447 			"%v4f64ptr  = OpTypePointer Uniform %v4f64\n"
8448 			"%v4f16arr  = OpTypeArray %v4f16 %c_i32_16\n"
8449 			"%v4f32arr  = OpTypeArray %v4f32 %c_i32_16\n"
8450 			"%v4f64arr  = OpTypeArray %v4f64 %c_i32_16\n";
8451 
8452 		struct CompositeType
8453 		{
8454 			const char*	name;
8455 			const char*	base64;
8456 			const char*	base16;
8457 			const char*	stride;
8458 			unsigned	count;
8459 		};
8460 
8461 		const CompositeType	cTypes[]	=
8462 		{
8463 			{"scalar",	"f64",		"f16",		"OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride 2\n",				numElements},
8464 			{"vector",	"v4f64",	"v4f16",	"OpDecorate %v4f64arr ArrayStride 32\nOpDecorate %v4f16arr ArrayStride 8\n",		numElements / 4},
8465 			{"matrix",	"v4f64",	"v4f16",	"OpDecorate %m2v4f64arr ArrayStride 64\nOpDecorate %m2v4f16arr ArrayStride 16\n",	numElements / 8},
8466 		};
8467 
8468 		vector<deFloat16>	float16Data			= getFloat16s(rnd, numElements);
8469 		vector<double>		float64Data;
8470 
8471 		float64Data.reserve(numElements);
8472 		for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
8473 			float64Data.push_back(deFloat16To64(float16Data[numIdx]));
8474 
8475 		for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
8476 		{
8477 			ComputeShaderSpec		spec;
8478 			map<string, string>		specs;
8479 			string					testName	= string(cTypes[tyIdx].name) + "_float";
8480 
8481 			specs["stride"]			= cTypes[tyIdx].stride;
8482 			specs["base64"]			= cTypes[tyIdx].base64;
8483 			specs["base16"]			= cTypes[tyIdx].base16;
8484 			specs["types"]			= floatTypes;
8485 			specs["convert"]		= "OpFConvert";
8486 
8487 			if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
8488 			{
8489 				specs["index0"]			= "%zero";
8490 				specs["matrix_prefix"]	= "m2";
8491 				specs["matrix_types"]	=
8492 					"%m2v4f16 = OpTypeMatrix %v4f16 2\n"
8493 					"%m2v4f64 = OpTypeMatrix %v4f64 2\n"
8494 					"%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
8495 					"%m2v4f64arr = OpTypeArray %m2v4f64 %c_i32_8\n";
8496 				specs["matrix_decor"]	=
8497 					"OpMemberDecorate %SSBO64 0 ColMajor\n"
8498 					"OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
8499 					"OpMemberDecorate %PC16 0 ColMajor\n"
8500 					"OpMemberDecorate %PC16 0 MatrixStride 8\n";
8501 				specs["matrix_store"]	=
8502 					"%inloc_1  = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
8503 					"%val16_1  = OpLoad %v4f16 %inloc_1\n"
8504 					"%val64_1  = OpFConvert %v4f64 %val16_1\n"
8505 					"%outloc_1 = OpAccessChain %v4f64ptr %ssbo64 %zero %x %c_i32_1\n"
8506 					"            OpStore %outloc_1 %val64_1\n";
8507 			}
8508 
8509 			spec.assembly			= shaderTemplate.specialize(specs);
8510 			spec.numWorkGroups		= IVec3(cTypes[tyIdx].count, 1, 1);
8511 			spec.verifyIO			= check64BitFloats;
8512 			spec.pushConstants		= BufferSp(new Float16Buffer(float16Data));
8513 
8514 			spec.outputs.push_back(BufferSp(new Float64Buffer(float64Data)));
8515 
8516 			spec.extensions.push_back("VK_KHR_16bit_storage");
8517 
8518 			spec.requestedVulkanFeatures.coreFeatures.shaderFloat64	= VK_TRUE;
8519 			spec.requestedVulkanFeatures.ext16BitStorage			= EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
8520 
8521 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
8522 		}
8523 	}
8524 }
8525 
8526 } // anonymous
8527 
create16BitStorageComputeGroup(tcu::TestContext & testCtx)8528 tcu::TestCaseGroup* create16BitStorageComputeGroup (tcu::TestContext& testCtx)
8529 {
8530 	de::MovePtr<tcu::TestCaseGroup> group		(new tcu::TestCaseGroup(testCtx, "16bit_storage", "Compute tests for VK_KHR_16bit_storage extension"));
8531 	addTestGroup(group.get(), "uniform_64_to_16", "64bit floats to 16bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform64To16Group);
8532 	addTestGroup(group.get(), "uniform_32_to_16", "32bit floats/ints to 16bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform32To16Group);
8533 	addTestGroup(group.get(), "uniform_16_to_32", "16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16To32Group);
8534 	addTestGroup(group.get(), "uniform_16_to_64", "16bit floats to 64bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16To64Group);
8535 	addTestGroup(group.get(), "push_constant_16_to_32", "16bit floats/ints to 32bit tests under capability StoragePushConstant16", addCompute16bitStoragePushConstant16To32Group);
8536 	addTestGroup(group.get(), "push_constant_16_to_64", "16bit floats to 64bit tests under capability StoragePushConstant16", addCompute16bitStoragePushConstant16To64Group);
8537 	addTestGroup(group.get(), "uniform_16struct_to_32struct", "16bit floats struct to 32bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16StructTo32StructGroup);
8538 	addTestGroup(group.get(), "uniform_32struct_to_16struct", "32bit floats struct to 16bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform32StructTo16StructGroup);
8539 	addTestGroup(group.get(), "struct_mixed_types", "mixed type of 8bit and 32bit struct", addCompute16bitStructMixedTypesGroup);
8540 	addTestGroup(group.get(), "uniform_16_to_16", "16bit floats/ints to 16bit tests under capability StorageUniformBufferBlock16", addCompute16bitStorageUniform16To16Group);
8541 	addTestGroup(group.get(), "uniform_16_to_32_chainaccess", "chain access 16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16To32ChainAccessGroup);
8542 
8543 	return group.release();
8544 }
8545 
create16BitStorageGraphicsGroup(tcu::TestContext & testCtx)8546 tcu::TestCaseGroup* create16BitStorageGraphicsGroup (tcu::TestContext& testCtx)
8547 {
8548 	de::MovePtr<tcu::TestCaseGroup> group		(new tcu::TestCaseGroup(testCtx, "16bit_storage", "Graphics tests for VK_KHR_16bit_storage extension"));
8549 
8550 	addTestGroup(group.get(), "uniform_float_64_to_16", "64-bit floats into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat64To16Group);
8551 	addTestGroup(group.get(), "uniform_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat32To16Group);
8552 	addTestGroup(group.get(), "uniform_float_16_to_32", "16-bit floats into 32-bit testsunder capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat16To32Group);
8553 	addTestGroup(group.get(), "uniform_float_16_to_64", "16-bit floats into 64-bit testsunder capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat16To64Group);
8554 	addTestGroup(group.get(), "uniform_int_32_to_16", "32-bit int into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt32To16Group);
8555 	addTestGroup(group.get(), "uniform_int_16_to_32", "16-bit int into 32-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt16To32Group);
8556 	addTestGroup(group.get(), "input_output_float_64_to_16", "64-bit floats into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat64To16Group);
8557 	addTestGroup(group.get(), "input_output_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat32To16Group);
8558 	addTestGroup(group.get(), "input_output_float_16_to_32", "16-bit floats into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat16To32Group);
8559 	addTestGroup(group.get(), "input_output_float_16_to_16", "16-bit floats pass-through tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat16To16Group);
8560 	addTestGroup(group.get(), "input_output_float_16_to_64", "16-bit floats into 64-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat16To64Group);
8561 	addTestGroup(group.get(), "input_output_float_16_to_16x2", "16-bit floats pass-through to two outputs tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat16To16x2Group);
8562 	addTestGroup(group.get(), "input_output_int_16_to_16x2", "16-bit ints pass-through to two outputs tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt16To16x2Group);
8563 	addTestGroup(group.get(), "input_output_int_32_to_16", "32-bit int into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt32To16Group);
8564 	addTestGroup(group.get(), "input_output_int_16_to_32", "16-bit int into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt16To32Group);
8565 	addTestGroup(group.get(), "input_output_int_16_to_16", "16-bit int into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt16To16Group);
8566 	addTestGroup(group.get(), "push_constant_float_16_to_32", "16-bit floats into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantFloat16To32Group);
8567 	addTestGroup(group.get(), "push_constant_float_16_to_64", "16-bit floats into 64-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantFloat16To64Group);
8568 	addTestGroup(group.get(), "push_constant_int_16_to_32", "16-bit int into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantInt16To32Group);
8569 	addTestGroup(group.get(), "uniform_16struct_to_32struct", "16-bit float struct into 32-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformStructFloat16To32Group);
8570 	addTestGroup(group.get(), "uniform_32struct_to_16struct", "32-bit float struct into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformStructFloat32To16Group);
8571 	addTestGroup(group.get(), "struct_mixed_types", "mixed type of 8bit and 32bit struct", addGraphics16bitStructMixedTypesGroup);
8572 
8573 	return group.release();
8574 }
8575 
8576 } // SpirVAssembly
8577 } // vkt
8578