1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief VK_KHR_shader_float_controls tests.
22  *//*--------------------------------------------------------------------*/
23 
24 
25 #include "vktSpvAsmFloatControlsTests.hpp"
26 #include "vktSpvAsmComputeShaderCase.hpp"
27 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "tcuFloat.hpp"
30 #include "tcuFloatFormat.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "deUniquePtr.hpp"
33 #include "deFloat16.h"
34 #include "vkRefUtil.hpp"
35 #include <vector>
36 #include <limits>
37 #include <fenv.h>
38 
39 namespace vkt
40 {
41 namespace SpirVAssembly
42 {
43 
44 namespace
45 {
46 
47 using namespace std;
48 using namespace tcu;
49 
50 enum FloatType
51 {
52 	FP16 = 0,
53 	FP32,
54 	FP64
55 };
56 
57 // Enum containing float behaviors that its possible to test.
58 enum BehaviorFlagBits
59 {
60 	B_DENORM_PERSERVE	= 0x00000001,		// DenormPreserve
61 	B_DENORM_FLUSH		= 0x00000002,		// DenormFlushToZero
62 	B_ZIN_PERSERVE		= 0x00000004,		// SignedZeroInfNanPreserve
63 	B_RTE_ROUNDING		= 0x00000008,		// RoundingModeRTE
64 	B_RTZ_ROUNDING		= 0x00000010		// RoundingModeRTZ
65 };
66 
67 typedef deUint32 BehaviorFlags;
68 
69 // Codes for all float values used in tests as arguments and operation results
70 // This approach allows to replace values with different types reducing complexity of the tests implementation
71 enum ValueId
72 {
73 	// common values used as both arguments and results
74 	V_UNUSED = 0,		//  used to mark arguments that are not used in operation
75 	V_MINUS_INF,		//    or results of tests cases that should be skipped
76 	V_MINUS_ONE,		// -1.0
77 	V_MINUS_ZERO,		// -0.0
78 	V_ZERO,				//  0.0
79 	V_HALF,				//  0.5
80 	V_ONE,				//  1.0
81 	V_INF,
82 	V_DENORM,
83 	V_NAN,
84 
85 	// arguments for rounding mode tests - used only when arguments are passed from input
86 	V_ADD_ARG_A,
87 	V_ADD_ARG_B,
88 	V_SUB_ARG_A,
89 	V_SUB_ARG_B,
90 	V_MUL_ARG_A,
91 	V_MUL_ARG_B,
92 	V_DOT_ARG_A,
93 	V_DOT_ARG_B,
94 
95 	// arguments of conversion operations - used only when arguments are passed from input
96 	V_CONV_FROM_FP32_ARG,
97 	V_CONV_FROM_FP64_ARG,
98 
99 	// arguments of rounding operations
100 	V_ADD_RTZ_RESULT,
101 	V_ADD_RTE_RESULT,
102 	V_SUB_RTZ_RESULT,
103 	V_SUB_RTE_RESULT,
104 	V_MUL_RTZ_RESULT,
105 	V_MUL_RTE_RESULT,
106 	V_DOT_RTZ_RESULT,
107 	V_DOT_RTE_RESULT,
108 
109 	// non comon results of some operation - corner cases
110 	V_MINUS_ONE_OR_CLOSE,			// value used only fur fp16 subtraction result of preserved denorm and one
111 	V_PI_DIV_2,
112 	V_ZERO_OR_MINUS_ZERO,			// both +0 and -0 are accepted
113 	V_ZERO_OR_FP16_DENORM_TO_FP32,	// both 0 and fp32 representation of fp16 denorm are accepted
114 	V_ZERO_OR_FP16_DENORM_TO_FP64,
115 	V_ZERO_OR_FP32_DENORM_TO_FP64,
116 	V_DENORM_TIMES_TWO,
117 	V_DEGREES_DENORM,
118 	V_TRIG_ONE,						// 1.0 trigonometric operations, including precision margin
119 
120 	//results of conversion operations
121 	V_CONV_TO_FP16_RTZ_RESULT,
122 	V_CONV_TO_FP16_RTE_RESULT,
123 	V_CONV_TO_FP32_RTZ_RESULT,
124 	V_CONV_TO_FP32_RTE_RESULT,
125 	V_CONV_DENORM_SMALLER,			// used e.g. when converting fp16 denorm to fp32
126 	V_CONV_DENORM_BIGGER,
127 };
128 
129 // Enum containing all tested operatios. Operations are defined in generic way so that
130 // they can be used to generate tests operating on arguments with different values of
131 // specified float type.
132 enum OperationId
133 {
134 	// spir-v unary operations
135 	O_NEGATE = 0,
136 	O_COMPOSITE,
137 	O_COMPOSITE_INS,
138 	O_COPY,
139 	O_D_EXTRACT,
140 	O_D_INSERT,
141 	O_SHUFFLE,
142 	O_TRANSPOSE,
143 	O_CONV_FROM_FP16,
144 	O_CONV_FROM_FP32,
145 	O_CONV_FROM_FP64,
146 	O_SCONST_CONV_FROM_FP32_TO_FP16,
147 	O_SCONST_CONV_FROM_FP64_TO_FP32,
148 	O_SCONST_CONV_FROM_FP64_TO_FP16,
149 	O_RETURN_VAL,
150 
151 	// spir-v binary operations
152 	O_ADD,
153 	O_SUB,
154 	O_MUL,
155 	O_DIV,
156 	O_REM,
157 	O_MOD,
158 	O_PHI,
159 	O_SELECT,
160 	O_DOT,
161 	O_VEC_MUL_S,
162 	O_VEC_MUL_M,
163 	O_MAT_MUL_S,
164 	O_MAT_MUL_V,
165 	O_MAT_MUL_M,
166 	O_OUT_PROD,
167 	O_ORD_EQ,
168 	O_UORD_EQ,
169 	O_ORD_NEQ,
170 	O_UORD_NEQ,
171 	O_ORD_LS,
172 	O_UORD_LS,
173 	O_ORD_GT,
174 	O_UORD_GT,
175 	O_ORD_LE,
176 	O_UORD_LE,
177 	O_ORD_GE,
178 	O_UORD_GE,
179 
180 	// glsl unary operations
181 	O_ROUND,
182 	O_ROUND_EV,
183 	O_TRUNC,
184 	O_ABS,
185 	O_SIGN,
186 	O_FLOOR,
187 	O_CEIL,
188 	O_FRACT,
189 	O_RADIANS,
190 	O_DEGREES,
191 	O_SIN,
192 	O_COS,
193 	O_TAN,
194 	O_ASIN,
195 	O_ACOS,
196 	O_ATAN,
197 	O_SINH,
198 	O_COSH,
199 	O_TANH,
200 	O_ASINH,
201 	O_ACOSH,
202 	O_ATANH,
203 	O_EXP,
204 	O_LOG,
205 	O_EXP2,
206 	O_LOG2,
207 	O_SQRT,
208 	O_INV_SQRT,
209 	O_MODF,
210 	O_MODF_ST,
211 	O_FREXP,
212 	O_FREXP_ST,
213 	O_LENGHT,
214 	O_NORMALIZE,
215 	O_REFLECT,
216 	O_REFRACT,
217 	O_MAT_DET,
218 	O_MAT_INV,
219 	O_PH_DENORM,	// PackHalf2x16
220 	O_UPH_DENORM,
221 	O_PD_DENORM,	// PackDouble2x32
222 	O_UPD_DENORM_FLUSH,
223 	O_UPD_DENORM_PRESERVE,
224 
225 	// glsl binary operations
226 	O_ATAN2,
227 	O_POW,
228 	O_MIX,
229 	O_FMA,
230 	O_MIN,
231 	O_MAX,
232 	O_CLAMP,
233 	O_STEP,
234 	O_SSTEP,
235 	O_DIST,
236 	O_CROSS,
237 	O_FACE_FWD,
238 	O_NMIN,
239 	O_NMAX,
240 	O_NCLAMP,
241 
242 	O_ORTE_ROUND,
243 	O_ORTZ_ROUND
244 };
245 
246 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
247 // Operations are separated into binary and unary lists because binary operations can be tested with
248 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
249 // Unary operations are only tested with denorms.
250 struct BinaryCase
251 {
252 	OperationId	operationId;
253 	ValueId		opVarResult;
254 	ValueId		opDenormResult;
255 	ValueId		opInfResult;
256 	ValueId		opNanResult;
257 };
258 struct UnaryCase
259 {
260 	OperationId	operationId;
261 	ValueId		result;
262 };
263 
264 // Function replacing all occurrences of substring with string passed in last parameter.
replace(string str,const string & from,const string & to)265 string replace(string str, const string& from, const string& to)
266 {
267 	// to keep spir-v code clean and easier to read parts of it are processed
268 	// with this method instead of StringTemplate; main usage of this method is the
269 	// replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
270 
271 	size_t start_pos = 0;
272 	while((start_pos = str.find(from, start_pos)) != std::string::npos)
273 	{
274 		str.replace(start_pos, from.length(), to);
275 		start_pos += to.length();
276 	}
277 	return str;
278 }
279 
280 // Structure used to perform bits conversion int type <-> float type.
281 template<typename FLOAT_TYPE, typename UINT_TYPE>
282 struct RawConvert
283 {
284 	union Value
285 	{
286 		FLOAT_TYPE	fp;
287 		UINT_TYPE	ui;
288 	};
289 };
290 
291 // Traits used to get int type that can store equivalent float type.
292 template<typename FLOAT_TYPE>
293 struct GetCoresponding
294 {
295 	typedef deUint16 uint_type;
296 };
297 template<>
298 struct GetCoresponding<float>
299 {
300 	typedef deUint32 uint_type;
301 };
302 template<>
303 struct GetCoresponding<double>
304 {
305 	typedef deUint64 uint_type;
306 };
307 
308 // All values used for arguments and operation results are stored in single map.
309 // Each float type (fp16, fp32, fp64) has its own map that is used during
310 // test setup and during verification. TypeValuesBase is interface to that map.
311 class TypeValuesBase
312 {
313 public:
314 	TypeValuesBase();
~TypeValuesBase()315 	virtual ~TypeValuesBase()	{}
316 
317 	virtual BufferSp	constructInputBuffer(const ValueId* twoArguments) const = 0;
318 	virtual BufferSp	constructOutputBuffer(ValueId result) const = 0;
319 
320 protected:
321 	const double	pi;
322 };
323 
TypeValuesBase()324 TypeValuesBase::TypeValuesBase()
325 	: pi(3.14159265358979323846)
326 {
327 }
328 
329 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
330 
331 template <typename FLOAT_TYPE>
332 class TypeValues: public TypeValuesBase
333 {
334 public:
335 	TypeValues();
336 
337 	BufferSp constructInputBuffer(const ValueId* twoArguments) const;
338 	BufferSp constructOutputBuffer(ValueId result) const;
339 
340 	FLOAT_TYPE getValue(ValueId id) const;
341 
342 	template <typename UINT_TYPE>
343 	FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
344 
345 private:
346 	typedef map<ValueId, FLOAT_TYPE> ValueMap;
347 	ValueMap m_valueIdToFloatType;
348 };
349 
350 template <typename FLOAT_TYPE>
constructInputBuffer(const ValueId * twoArguments) const351 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId* twoArguments) const
352 {
353 	std::vector<FLOAT_TYPE> inputData(2);
354 	inputData[0] = m_valueIdToFloatType.at(twoArguments[0]);
355 	inputData[1] = m_valueIdToFloatType.at(twoArguments[1]);
356 	return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
357 }
358 
359 template <typename FLOAT_TYPE>
constructOutputBuffer(ValueId result) const360 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
361 {
362 	// note: we are not doing maping here, ValueId is directly saved in
363 	// float type in order to be able to retireve it during verification
364 
365 	typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
366 	uint_t value = static_cast<uint_t>(result);
367 
368 	std::vector<FLOAT_TYPE> outputData(1, exactByteEquivalent<uint_t>(value));
369 	return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
370 }
371 
372 template <typename FLOAT_TYPE>
getValue(ValueId id) const373 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
374 {
375 	return m_valueIdToFloatType.at(id);
376 }
377 
378 template <typename FLOAT_TYPE>
379 template <typename UINT_TYPE>
exactByteEquivalent(UINT_TYPE byteValue) const380 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
381 {
382 	typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
383 	value.ui = byteValue;
384 	return value.fp;
385 }
386 
387 template <>
TypeValues()388 TypeValues<deFloat16>::TypeValues()
389 	: TypeValuesBase()
390 {
391 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
392 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
393 	ValueMap& vm = m_valueIdToFloatType;
394 	vm[V_UNUSED]			= deFloat32To16(0.0f);
395 	vm[V_MINUS_INF]			= 0xfc00;
396 	vm[V_MINUS_ONE]			= deFloat32To16(-1.0f);
397 	vm[V_MINUS_ZERO]		= 0x8000;
398 	vm[V_ZERO]				= 0x0000;
399 	vm[V_HALF]				= deFloat32To16(0.5f);
400 	vm[V_ONE]				= deFloat32To16(1.0f);
401 	vm[V_INF]				= 0x7c00;
402 	vm[V_DENORM]			= 0x03f0; // this value should be the same as the result of denormBase - epsilon
403 	vm[V_NAN]				= 0x7cf0;
404 
405 	vm[V_PI_DIV_2]			= 0x3e48;
406 	vm[V_DENORM_TIMES_TWO]	= 0x07e0;
407 	vm[V_DEGREES_DENORM]	= 0x1b0c;
408 
409 	vm[V_ADD_ARG_A]					= 0x3c03;
410 	vm[V_ADD_ARG_B]					= vm[V_ONE];
411 	vm[V_SUB_ARG_A]					= vm[V_ADD_ARG_A];
412 	vm[V_SUB_ARG_B]					= 0x4203;
413 	vm[V_MUL_ARG_A]					= vm[V_ADD_ARG_A];
414 	vm[V_MUL_ARG_B]					= 0x1900;
415 	vm[V_DOT_ARG_A]					= vm[V_ADD_ARG_A];
416 	vm[V_DOT_ARG_B]					= vm[V_MUL_ARG_B];
417 	vm[V_CONV_FROM_FP32_ARG]		= vm[V_UNUSED];
418 	vm[V_CONV_FROM_FP64_ARG]		= vm[V_UNUSED];
419 
420 	vm[V_ADD_RTZ_RESULT]			= 0x4001;	// deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
421 	vm[V_SUB_RTZ_RESULT]			= 0xc001;	// deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
422 	vm[V_MUL_RTZ_RESULT]			= 0x1903;	// deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
423 	vm[V_DOT_RTZ_RESULT]			= 0x1d03;
424 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_ZERO);
425 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= vm[V_UNUSED];
426 
427 	vm[V_ADD_RTE_RESULT]			= 0x4002;	// deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
428 	vm[V_SUB_RTE_RESULT]			= 0xc002;	// deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
429 	vm[V_MUL_RTE_RESULT]			= 0x1904;	// deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
430 	vm[V_DOT_RTE_RESULT]			= 0x1d04;
431 	vm[V_CONV_TO_FP16_RTE_RESULT]	= deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_NEAREST_EVEN);
432 	vm[V_CONV_TO_FP32_RTE_RESULT]	= vm[V_UNUSED];
433 
434 	// there is no precision to store fp32 denorm nor fp64 denorm
435 	vm[V_CONV_DENORM_SMALLER]		= vm[V_ZERO];
436 	vm[V_CONV_DENORM_BIGGER]		= vm[V_ZERO];
437 }
438 
439 template <>
TypeValues()440 TypeValues<float>::TypeValues()
441 	: TypeValuesBase()
442 {
443 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
444 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
445 	ValueMap& vm = m_valueIdToFloatType;
446 	vm[V_UNUSED]			=  0.0f;
447 	vm[V_MINUS_INF]			= -std::numeric_limits<float>::infinity();
448 	vm[V_MINUS_ONE]			= -1.0f;
449 	vm[V_MINUS_ZERO]		= -0.0f;
450 	vm[V_ZERO]				=  0.0f;
451 	vm[V_HALF]				=  0.5f;
452 	vm[V_ONE]				=  1.0f;
453 	vm[V_INF]				=  std::numeric_limits<float>::infinity();
454 	vm[V_DENORM]			=  static_cast<float>(1.413e-42); // 0x000003f0
455 	vm[V_NAN]				=  std::numeric_limits<float>::quiet_NaN();
456 
457 	vm[V_PI_DIV_2]			=  static_cast<float>(pi / 2);
458 	vm[V_DENORM_TIMES_TWO]	=  vm[V_DENORM] + vm[V_DENORM];
459 	vm[V_DEGREES_DENORM]	=  deFloatDegrees(vm[V_DENORM]);
460 
461 	float e = std::numeric_limits<float>::epsilon();
462 	vm[V_ADD_ARG_A]					= 1.0f + 3 * e;
463 	vm[V_ADD_ARG_B]					= 1.0f;
464 	vm[V_SUB_ARG_A]					= vm[V_ADD_ARG_A];
465 	vm[V_SUB_ARG_B]					= 3.0f + 6 * e;
466 	vm[V_MUL_ARG_A]					= vm[V_ADD_ARG_A];
467 	vm[V_MUL_ARG_B]					= 5 * e;
468 	vm[V_DOT_ARG_A]					= vm[V_ADD_ARG_A];
469 	vm[V_DOT_ARG_B]					= 5 * e;
470 	vm[V_CONV_FROM_FP32_ARG]		= 1.22334445f;
471 	vm[V_CONV_FROM_FP64_ARG]		= vm[V_UNUSED];
472 
473 	int prevRound = fegetround();
474 	fesetround(FE_TOWARDZERO);
475 	vm[V_ADD_RTZ_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
476 	vm[V_SUB_RTZ_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
477 	vm[V_MUL_RTZ_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
478 	vm[V_DOT_RTZ_RESULT]			= vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
479 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= vm[V_UNUSED];
480 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= exactByteEquivalent<deUint32>(0x3f9c968d); // result of conversion from double(1.22334455)
481 
482 	fesetround(FE_TONEAREST);
483 	vm[V_ADD_RTE_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
484 	vm[V_SUB_RTE_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
485 	vm[V_MUL_RTE_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
486 	vm[V_DOT_RTE_RESULT]			= vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
487 	vm[V_CONV_TO_FP16_RTE_RESULT]	= vm[V_UNUSED];
488 	vm[V_CONV_TO_FP32_RTE_RESULT]	= exactByteEquivalent<deUint32>(0x3f9c968e); // result of conversion from double(1.22334455)
489 	fesetround(prevRound);
490 
491 	// there is no precision to store fp64 denorm
492 	vm[V_CONV_DENORM_SMALLER]		= exactByteEquivalent<deUint32>(0x387c0000); // fp16 denorm
493 	vm[V_CONV_DENORM_BIGGER]		= vm[V_ZERO];
494 }
495 
496 template <>
TypeValues()497 TypeValues<double>::TypeValues()
498 	: TypeValuesBase()
499 {
500 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
501 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
502 	ValueMap& vm = m_valueIdToFloatType;
503 	vm[V_UNUSED]			=  0.0;
504 	vm[V_MINUS_INF]			= -std::numeric_limits<double>::infinity();
505 	vm[V_MINUS_ONE]			= -1.0;
506 	vm[V_MINUS_ZERO]		= -0.0;
507 	vm[V_ZERO]				=  0.0;
508 	vm[V_HALF]				=  0.5;
509 	vm[V_ONE]				=  1.0;
510 	vm[V_INF]				=  std::numeric_limits<double>::infinity();
511 	vm[V_DENORM]			=  4.98e-321; // 0x00000000000003F0
512 	vm[V_NAN]				=  std::numeric_limits<double>::quiet_NaN();
513 
514 	vm[V_PI_DIV_2]			=  pi / 2;
515 	vm[V_DENORM_TIMES_TWO]	=  vm[V_DENORM] + vm[V_DENORM];
516 	vm[V_DEGREES_DENORM]	=  vm[V_UNUSED];
517 
518 	double e = std::numeric_limits<double>::epsilon();
519 	vm[V_ADD_ARG_A]				= 1.0 + 3 * e;
520 	vm[V_ADD_ARG_B]				= 1.0;
521 	vm[V_SUB_ARG_A]				= vm[V_ADD_ARG_A];
522 	vm[V_SUB_ARG_B]				= 3.0 + 6 * e;
523 	vm[V_MUL_ARG_A]				= vm[V_ADD_ARG_A];
524 	vm[V_MUL_ARG_B]				= 5 * e;
525 	vm[V_DOT_ARG_A]				= vm[V_ADD_ARG_A];
526 	vm[V_DOT_ARG_B]				= 5 * e;
527 	vm[V_CONV_FROM_FP32_ARG]	= vm[V_UNUSED];
528 	vm[V_CONV_FROM_FP64_ARG]	= 1.22334455;
529 
530 	int prevRound = fegetround();
531 	fesetround(FE_TOWARDZERO);
532 	vm[V_ADD_RTZ_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
533 	vm[V_SUB_RTZ_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
534 	vm[V_MUL_RTZ_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
535 	vm[V_DOT_RTZ_RESULT]			= vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
536 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= vm[V_UNUSED];
537 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= vm[V_UNUSED];
538 
539 	fesetround(FE_TONEAREST);
540 	vm[V_ADD_RTE_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
541 	vm[V_SUB_RTE_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
542 	vm[V_MUL_RTE_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
543 	vm[V_DOT_RTE_RESULT]			= vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
544 	vm[V_CONV_TO_FP16_RTE_RESULT]	= vm[V_UNUSED];
545 	vm[V_CONV_TO_FP32_RTE_RESULT]	= vm[V_UNUSED];
546 	fesetround(prevRound);
547 
548 	vm[V_CONV_DENORM_SMALLER]		= exactByteEquivalent<deUint64>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
549 	vm[V_CONV_DENORM_BIGGER]		= exactByteEquivalent<deUint64>(0x373f800000000000); // 0x000003f0 is fp32 denorm
550 }
551 
552 // Each float type (fp16, fp32, fp64) has specific set of SPIR-V snippets
553 // that was extracted to separate template specialization. Those snippets
554 // are used to compose final test shaders. With this approach
555 // parameterization can be done just once per type and reused for many tests.
556 class TypeSnippetsBase
557 {
558 public:
~TypeSnippetsBase()559 	virtual ~TypeSnippetsBase() {}
560 
561 protected:
562 	void updateSpirvSnippets();
563 
564 public: // Type specific data:
565 
566 	// Number of bits consumed by float type
567 	string bitWidth;
568 
569 	// Minimum positive normal
570 	string epsilon;
571 
572 	// denormBase is a normal value (found empirically) used to generate denorm value.
573 	// Denorm is generated by substracting epsilon from denormBase.
574 	// denormBase is not a denorm - it is used to create denorm.
575 	// This value is needed when operations are tested with arguments that were
576 	// generated in the code. Generated denorm should be the same as denorm
577 	// used when arguments are passed via input (m_valueIdToFloatType[V_DENORM]).
578 	// This is required as result of some operations depends on actual denorm value
579 	// e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
580 	string denormBase;
581 
582 	string capabilities;
583 	string extensions;
584 	string arrayStride;
585 
586 public: // Type specific spir-v snippets:
587 
588 	// Common annotations
589 	string typeAnnotationsSnippet;
590 
591 	// Definitions of all types commonly used by tests
592 	string typeDefinitionsSnippet;
593 
594 	// Definitions of all constants commonly used by tests
595 	string constantsDefinitionsSnippet;
596 
597 	// Map that stores instructions that generate arguments of specified value.
598 	// Every test that uses generated inputod will select up to two items from this map
599 	typedef map<ValueId, string> SnippetMap;
600 	SnippetMap valueIdToSnippetArgMap;
601 
602 	// Spir-v snippet that reads argument from SSBO
603 	string argumentsFromInputSnippet;
604 
605 	// SSBO with stage input/output definitions
606 	string inputAnnotationsSnippet;
607 	string inputDefinitionsSnippet;
608 	string outputAnnotationsSnippet;
609 	string outputDefinitionsSnippet;
610 
611 	// Varying is required to pass result from vertex stage to fragment stage,
612 	// one of requirements was to not use SSBO writes in vertex stage so we
613 	// need to do that in fragment stage; we also cant pass operation result
614 	// directly because of interpolation, to avoid it we do a bitcast to uint
615 	string varyingsTypesSnippet;
616 	string inputVaryingsSnippet;
617 	string outputVaryingsSnippet;
618 	string storeVertexResultSnippet;
619 	string loadVertexResultSnippet;
620 
621 	string storeResultsSnippet;
622 };
623 
updateSpirvSnippets()624 void TypeSnippetsBase::updateSpirvSnippets()
625 {
626 	// annotations to types that are commonly used by tests
627 	const string typeAnnotationsTemplate =
628 		"OpDecorate %type_float_arr_1 ArrayStride " + arrayStride + "\n"
629 		"OpDecorate %type_float_arr_2 ArrayStride " + arrayStride + "\n";
630 
631 	// definition off all types that are commonly used by tests
632 	const string typeDefinitionsTemplate =
633 		"%type_float             = OpTypeFloat " + bitWidth + "\n"
634 		"%type_float_uptr        = OpTypePointer Uniform %type_float\n"
635 		"%type_float_fptr        = OpTypePointer Function %type_float\n"
636 		"%type_float_vec2        = OpTypeVector %type_float 2\n"
637 		"%type_float_vec3        = OpTypeVector %type_float 3\n"
638 		"%type_float_vec4        = OpTypeVector %type_float 4\n"
639 		"%type_float_vec4_iptr   = OpTypePointer Input %type_float_vec4\n"
640 		"%type_float_vec4_optr   = OpTypePointer Output %type_float_vec4\n"
641 		"%type_float_mat2x2      = OpTypeMatrix %type_float_vec2 2\n"
642 		"%type_float_arr_1       = OpTypeArray %type_float %c_i32_1\n"
643 		"%type_float_arr_2       = OpTypeArray %type_float %c_i32_2\n";
644 
645 	// definition off all constans that are used by tests
646 	const string constantsDefinitionsTemplate =
647 		"%c_float_n1             = OpConstant %type_float -1\n"
648 		"%c_float_0              = OpConstant %type_float 0.0\n"
649 		"%c_float_0_5            = OpConstant %type_float 0.5\n"
650 		"%c_float_1              = OpConstant %type_float 1\n"
651 		"%c_float_2              = OpConstant %type_float 2\n"
652 		"%c_float_3              = OpConstant %type_float 3\n"
653 		"%c_float_4              = OpConstant %type_float 4\n"
654 		"%c_float_5              = OpConstant %type_float 5\n"
655 		"%c_float_6              = OpConstant %type_float 6\n"
656 		"%c_float_eps            = OpConstant %type_float " + epsilon + "\n"
657 		"%c_float_denorm_base    = OpConstant %type_float " + denormBase + "\n";
658 
659 	// when arguments are read from SSBO this snipped is placed in main function
660 	const string argumentsFromInputTemplate =
661 		"%arg1loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
662 		"%arg1                   = OpLoad %type_float %arg1loc\n"
663 		"%arg2loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
664 		"%arg2                   = OpLoad %type_float %arg2loc\n";
665 
666 	// when tested shader stage reads from SSBO it has to have this snippet
667 	inputAnnotationsSnippet =
668 		"OpMemberDecorate %SSBO_in 0 Offset 0\n"
669 		"OpDecorate %SSBO_in BufferBlock\n"
670 		"OpDecorate %ssbo_in DescriptorSet 0\n"
671 		"OpDecorate %ssbo_in Binding 0\n"
672 		"OpDecorate %ssbo_in NonWritable\n";
673 
674 	const string inputDefinitionsTemplate =
675 		"%SSBO_in              = OpTypeStruct %type_float_arr_2\n"
676 		"%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
677 		"%ssbo_in              = OpVariable %up_SSBO_in Uniform\n";
678 
679 	outputAnnotationsSnippet =
680 		"OpMemberDecorate %SSBO_out 0 Offset 0\n"
681 		"OpDecorate %SSBO_out BufferBlock\n"
682 		"OpDecorate %ssbo_out DescriptorSet 0\n"
683 		"OpDecorate %ssbo_out Binding 1\n";
684 
685 	const string outputDefinitionsTemplate =
686 		"%SSBO_out             = OpTypeStruct %type_float_arr_1\n"
687 		"%up_SSBO_out          = OpTypePointer Uniform %SSBO_out\n"
688 		"%ssbo_out             = OpVariable %up_SSBO_out Uniform\n";
689 
690 	// this snippet is used by compute and fragment stage but not by vertex stage
691 	const string storeResultsTemplate =
692 		"%outloc               = OpAccessChain %type_float_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
693 		"OpStore %outloc %result\n";
694 
695 	const string typeToken	= "_float";
696 	const string typeName	= "_f" + bitWidth;
697 
698 	typeAnnotationsSnippet		= replace(typeAnnotationsTemplate, typeToken, typeName);
699 	typeDefinitionsSnippet		= replace(typeDefinitionsTemplate, typeToken, typeName);
700 	constantsDefinitionsSnippet	= replace(constantsDefinitionsTemplate, typeToken, typeName);
701 	argumentsFromInputSnippet	= replace(argumentsFromInputTemplate, typeToken, typeName);
702 	inputDefinitionsSnippet		= replace(inputDefinitionsTemplate, typeToken, typeName);
703 	outputDefinitionsSnippet	= replace(outputDefinitionsTemplate, typeToken, typeName);
704 	storeResultsSnippet			= replace(storeResultsTemplate, typeToken, typeName);
705 
706 	// NOTE: only values used as _generated_ arguments in test operations
707 	// need to be in this map, arguments that are only used by tests,
708 	// that grab arguments from input, do need to be in this map
709 	// NOTE: when updating entries in valueIdToSnippetArgMap make
710 	// sure to update also m_valueIdToFloatType for all float width
711 	SnippetMap& sm = valueIdToSnippetArgMap;
712 	sm[V_UNUSED]		= "OpFSub %type_float %c_float_0 %c_float_0\n";
713 	sm[V_MINUS_INF]		= "OpFDiv %type_float %c_float_n1 %c_float_0\n";
714 	sm[V_MINUS_ONE]		= "OpFAdd %type_float %c_float_n1 %c_float_0\n";
715 	sm[V_MINUS_ZERO]	= "OpFMul %type_float %c_float_n1 %c_float_0\n";
716 	sm[V_ZERO]			= "OpFMul %type_float %c_float_0 %c_float_0\n";
717 	sm[V_HALF]			= "OpFAdd %type_float %c_float_0_5 %c_float_0\n";
718 	sm[V_ONE]			= "OpFAdd %type_float %c_float_1 %c_float_0\n";
719 	sm[V_INF]			= "OpFDiv %type_float %c_float_1 %c_float_0\n";					// x / 0		== Inf
720 	sm[V_DENORM]		= "OpFSub %type_float %c_float_denorm_base %c_float_eps\n";
721 	sm[V_NAN]			= "OpFDiv %type_float %c_float_0 %c_float_0\n";					// 0 / 0		== Nan
722 
723 	map<ValueId, string>::iterator it;
724 	for ( it = sm.begin(); it != sm.end(); it++ )
725 		sm[it->first] = replace(it->second, typeToken, typeName);
726 }
727 
728 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
729 
730 template<typename FLOAT_TYPE>
731 class TypeSnippets: public TypeSnippetsBase
732 {
733 public:
734 	TypeSnippets();
735 };
736 
737 template<>
TypeSnippets()738 TypeSnippets<deFloat16>::TypeSnippets()
739 {
740 	bitWidth		= "16";
741 	epsilon			= "6.104e-5";	// 2^-14 = 0x0400
742 
743 	// 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
744 	// NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
745 	denormBase		= "1.2113e-4";
746 
747 	capabilities	= "OpCapability StorageUniform16\n";
748 	extensions		= "OpExtension \"SPV_KHR_16bit_storage\"\n";
749 	arrayStride		= "2";
750 
751 	varyingsTypesSnippet =
752 					"%type_u32_iptr        = OpTypePointer Input %type_u32\n"
753 					"%type_u32_optr        = OpTypePointer Output %type_u32\n";
754 	inputVaryingsSnippet =
755 					"%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
756 	outputVaryingsSnippet =
757 					"%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
758 	storeVertexResultSnippet =
759 					"%tmp_vec2            = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
760 					"%packed_result       = OpBitcast %type_u32 %tmp_vec2\n"
761 					"OpStore %BP_vertex_result %packed_result\n";
762 	loadVertexResultSnippet =
763 					"%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
764 					"%tmp_vec2            = OpBitcast %type_f16_vec2 %packed_result\n"
765 					"%result              = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
766 
767 	updateSpirvSnippets();
768 }
769 
770 template<>
TypeSnippets()771 TypeSnippets<float>::TypeSnippets()
772 {
773 	bitWidth		= "32";
774 	epsilon			= "1.175494351e-38";
775 	denormBase		= "1.1756356e-38";
776 	capabilities	= "";
777 	extensions		= "";
778 	arrayStride		= "4";
779 
780 	varyingsTypesSnippet =
781 					"%type_u32_iptr        = OpTypePointer Input %type_u32\n"
782 					"%type_u32_optr        = OpTypePointer Output %type_u32\n";
783 	inputVaryingsSnippet =
784 					"%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
785 	outputVaryingsSnippet =
786 					"%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
787 	storeVertexResultSnippet =
788 					"%packed_result       = OpBitcast %type_u32 %result\n"
789 					"OpStore %BP_vertex_result %packed_result\n";
790 	loadVertexResultSnippet =
791 					"%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
792 					"%result              = OpBitcast %type_f32 %packed_result\n";
793 
794 	updateSpirvSnippets();
795 }
796 
797 template<>
TypeSnippets()798 TypeSnippets<double>::TypeSnippets()
799 {
800 	bitWidth		= "64";
801 	epsilon			= "2.2250738585072014e-308"; // 0x0010000000000000
802 	denormBase		= "2.2250738585076994e-308"; // 0x00100000000003F0
803 	capabilities	= "OpCapability Float64\n";
804 	extensions		= "";
805 	arrayStride		= "8";
806 
807 	varyingsTypesSnippet =
808 					"%type_u32_vec2_iptr   = OpTypePointer Input %type_u32_vec2\n"
809 					"%type_u32_vec2_optr   = OpTypePointer Output %type_u32_vec2\n";
810 	inputVaryingsSnippet =
811 					"%BP_vertex_result     = OpVariable %type_u32_vec2_iptr Input\n";
812 	outputVaryingsSnippet =
813 					"%BP_vertex_result     = OpVariable %type_u32_vec2_optr Output\n";
814 	storeVertexResultSnippet =
815 					"%packed_result        = OpBitcast %type_u32_vec2 %result\n"
816 					"OpStore %BP_vertex_result %packed_result\n";
817 	loadVertexResultSnippet =
818 					"%packed_result        = OpLoad %type_u32_vec2 %BP_vertex_result\n"
819 					"%result               = OpBitcast %type_f64 %packed_result\n";
820 
821 	updateSpirvSnippets();
822 }
823 
824 class TypeTestResultsBase
825 {
826 public:
~TypeTestResultsBase()827 	virtual ~TypeTestResultsBase() {}
828 	FloatType floatType() const;
829 
830 protected:
831 	FloatType m_floatType;
832 
833 public:
834 	// Vectors containing test data for float controls
835 	vector<BinaryCase>	binaryOpFTZ;
836 	vector<UnaryCase>	unaryOpFTZ;
837 	vector<BinaryCase>	binaryOpDenormPreserve;
838 	vector<UnaryCase>	unaryOpDenormPreserve;
839 };
840 
floatType() const841 FloatType TypeTestResultsBase::floatType() const
842 {
843 	return m_floatType;
844 }
845 
846 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
847 
848 template<typename FLOAT_TYPE>
849 class TypeTestResults: public TypeTestResultsBase
850 {
851 public:
852 	TypeTestResults();
853 };
854 
855 template<>
TypeTestResults()856 TypeTestResults<deFloat16>::TypeTestResults()
857 {
858 	m_floatType = FP16;
859 
860 	// note: there are many FTZ test cases that can produce diferent result depending
861 	// on input denorm being flushed or not; because of that FTZ tests can be limited
862 	// to those that return denorm as those are the ones affected by tested extension
863 	const BinaryCase binaryOpFTZArr[] = {
864 		//operation		den op one		den op den		den op inf		den op nan
865 		{ O_ADD,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
866 		{ O_SUB,		V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
867 		{ O_MUL,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
868 		{ O_DIV,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
869 		{ O_REM,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
870 		{ O_MOD,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
871 		{ O_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
872 		{ O_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
873 		{ O_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
874 		{ O_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
875 		{ O_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
876 		{ O_OUT_PROD,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
877 		{ O_DOT,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
878 		{ O_ATAN2,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
879 		{ O_POW,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
880 		{ O_MIX,		V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
881 		{ O_MIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
882 		{ O_MAX,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
883 		{ O_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
884 		{ O_STEP,		V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
885 		{ O_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
886 		{ O_FMA,		V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
887 		{ O_FACE_FWD,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
888 		{ O_NMIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
889 		{ O_NMAX,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
890 		{ O_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
891 		{ O_DIST,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
892 		{ O_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
893 	};
894 
895 	const UnaryCase unaryOpFTZArr[] = {
896 		//operation			op den
897 		{ O_NEGATE,			V_MINUS_ZERO },
898 		{ O_ROUND,			V_ZERO },
899 		{ O_ROUND_EV,		V_ZERO },
900 		{ O_TRUNC,			V_ZERO },
901 		{ O_ABS,			V_ZERO },
902 		{ O_FLOOR,			V_ZERO },
903 		{ O_CEIL,			V_ZERO },
904 		{ O_FRACT,			V_ZERO },
905 		{ O_RADIANS,		V_ZERO },
906 		{ O_DEGREES,		V_ZERO },
907 		{ O_SIN,			V_ZERO },
908 		{ O_COS,			V_TRIG_ONE },
909 		{ O_TAN,			V_ZERO },
910 		{ O_ASIN,			V_ZERO },
911 		{ O_ACOS,			V_PI_DIV_2 },
912 		{ O_ATAN,			V_ZERO },
913 		{ O_SINH,			V_ZERO },
914 		{ O_COSH,			V_ONE },
915 		{ O_TANH,			V_ZERO },
916 		{ O_ASINH,			V_ZERO },
917 		{ O_ACOSH,			V_UNUSED },
918 		{ O_ATANH,			V_ZERO },
919 		{ O_EXP,			V_ONE },
920 		{ O_LOG,			V_MINUS_INF },
921 		{ O_EXP2,			V_ONE },
922 		{ O_LOG2,			V_MINUS_INF },
923 		{ O_SQRT,			V_ZERO },
924 		{ O_INV_SQRT,		V_INF },
925 		{ O_MAT_DET,		V_ZERO },
926 		{ O_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
927 		{ O_MODF,			V_ZERO },
928 		{ O_MODF_ST,		V_ZERO },
929 		{ O_NORMALIZE,		V_ZERO },
930 		{ O_REFLECT,		V_ZERO },
931 		{ O_REFRACT,		V_ZERO },
932 		{ O_LENGHT,			V_ZERO },
933 	};
934 
935 	const BinaryCase binaryOpDenormPreserveArr[] = {
936 		//operation			den op one				den op den				den op inf		den op nan
937 		{ O_PHI,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
938 		{ O_SELECT,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
939 		{ O_ADD,			V_ONE,					V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
940 		{ O_SUB,			V_MINUS_ONE_OR_CLOSE,	V_ZERO,					V_MINUS_INF,	V_NAN },
941 		{ O_MUL,			V_DENORM,				V_ZERO,					V_INF,			V_NAN },
942 		{ O_VEC_MUL_S,		V_DENORM,				V_ZERO,					V_INF,			V_NAN },
943 		{ O_VEC_MUL_M,		V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
944 		{ O_MAT_MUL_S,		V_DENORM,				V_ZERO,					V_INF,			V_NAN },
945 		{ O_MAT_MUL_V,		V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
946 		{ O_MAT_MUL_M,		V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
947 		{ O_OUT_PROD,		V_DENORM,				V_ZERO,					V_INF,			V_NAN },
948 		{ O_DOT,			V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
949 		{ O_MIX,			V_HALF,					V_DENORM,				V_INF,			V_NAN },
950 		{ O_FMA,			V_HALF,					V_HALF,					V_INF,			V_NAN },
951 		{ O_MIN,			V_DENORM,				V_DENORM,				V_DENORM,		V_UNUSED },
952 		{ O_MAX,			V_ONE,					V_DENORM,				V_INF,			V_UNUSED },
953 		{ O_CLAMP,			V_ONE,					V_DENORM,				V_INF,			V_UNUSED },
954 		{ O_NMIN,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
955 		{ O_NMAX,			V_ONE,					V_DENORM,				V_INF,			V_DENORM },
956 		{ O_NCLAMP,			V_ONE,					V_DENORM,				V_INF,			V_DENORM },
957 	};
958 
959 	const UnaryCase unaryOpDenormPreserveArr[] = {
960 		//operation			op den
961 		{ O_RETURN_VAL,		V_DENORM },
962 		{ O_D_EXTRACT,		V_DENORM },
963 		{ O_D_INSERT,		V_DENORM },
964 		{ O_SHUFFLE,		V_DENORM },
965 		{ O_COMPOSITE,		V_DENORM },
966 		{ O_COMPOSITE_INS,	V_DENORM },
967 		{ O_COPY,			V_DENORM },
968 		{ O_TRANSPOSE,		V_DENORM },
969 		{ O_NEGATE,			V_DENORM },
970 		{ O_ABS,			V_DENORM },
971 		{ O_SIGN,			V_ONE },
972 		{ O_RADIANS,		V_DENORM },
973 		{ O_DEGREES,		V_DEGREES_DENORM },
974 	};
975 
976 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
977 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
978 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
979 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
980 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
981 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
982 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
983 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
984 }
985 
986 template<>
TypeTestResults()987 TypeTestResults<float>::TypeTestResults()
988 {
989 	m_floatType = FP32;
990 
991 	const BinaryCase binaryOpFTZArr[] = {
992 		//operation		den op one		den op den		den op inf		den op nan
993 		{ O_ADD,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
994 		{ O_SUB,		V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
995 		{ O_MUL,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
996 		{ O_DIV,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
997 		{ O_REM,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
998 		{ O_MOD,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
999 		{ O_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1000 		{ O_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1001 		{ O_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1002 		{ O_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1003 		{ O_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1004 		{ O_OUT_PROD,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1005 		{ O_DOT,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1006 		{ O_ATAN2,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1007 		{ O_POW,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1008 		{ O_MIX,		V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
1009 		{ O_MIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
1010 		{ O_MAX,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1011 		{ O_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1012 		{ O_STEP,		V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
1013 		{ O_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
1014 		{ O_FMA,		V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
1015 		{ O_FACE_FWD,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
1016 		{ O_NMIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
1017 		{ O_NMAX,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1018 		{ O_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1019 		{ O_DIST,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1020 		{ O_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1021 	};
1022 
1023 	const UnaryCase unaryOpFTZArr[] = {
1024 		//operation			op den
1025 		{ O_NEGATE,			V_MINUS_ZERO },
1026 		{ O_ROUND,			V_ZERO },
1027 		{ O_ROUND_EV,		V_ZERO },
1028 		{ O_TRUNC,			V_ZERO },
1029 		{ O_ABS,			V_ZERO },
1030 		{ O_FLOOR,			V_ZERO },
1031 		{ O_CEIL,			V_ZERO },
1032 		{ O_FRACT,			V_ZERO },
1033 		{ O_RADIANS,		V_ZERO },
1034 		{ O_DEGREES,		V_ZERO },
1035 		{ O_SIN,			V_ZERO },
1036 		{ O_COS,			V_TRIG_ONE },
1037 		{ O_TAN,			V_ZERO },
1038 		{ O_ASIN,			V_ZERO },
1039 		{ O_ACOS,			V_PI_DIV_2 },
1040 		{ O_ATAN,			V_ZERO },
1041 		{ O_SINH,			V_ZERO },
1042 		{ O_COSH,			V_ONE },
1043 		{ O_TANH,			V_ZERO },
1044 		{ O_ASINH,			V_ZERO },
1045 		{ O_ACOSH,			V_UNUSED },
1046 		{ O_ATANH,			V_ZERO },
1047 		{ O_EXP,			V_ONE },
1048 		{ O_LOG,			V_MINUS_INF },
1049 		{ O_EXP2,			V_ONE },
1050 		{ O_LOG2,			V_MINUS_INF },
1051 		{ O_SQRT,			V_ZERO },
1052 		{ O_INV_SQRT,		V_INF },
1053 		{ O_MAT_DET,		V_ZERO },
1054 		{ O_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1055 		{ O_MODF,			V_ZERO },
1056 		{ O_MODF_ST,		V_ZERO },
1057 		{ O_NORMALIZE,		V_ZERO },
1058 		{ O_REFLECT,		V_ZERO },
1059 		{ O_REFRACT,		V_ZERO },
1060 		{ O_LENGHT,			V_ZERO },
1061 	};
1062 
1063 	const BinaryCase binaryOpDenormPreserveArr[] = {
1064 		//operation			den op one			den op den				den op inf		den op nan
1065 		{ O_PHI,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1066 		{ O_SELECT,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1067 		{ O_ADD,			V_ONE,				V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1068 		{ O_SUB,			V_MINUS_ONE,		V_ZERO,					V_MINUS_INF,	V_NAN },
1069 		{ O_MUL,			V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1070 		{ O_VEC_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1071 		{ O_VEC_MUL_M,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1072 		{ O_MAT_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1073 		{ O_MAT_MUL_V,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1074 		{ O_MAT_MUL_M,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1075 		{ O_OUT_PROD,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1076 		{ O_DOT,			V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1077 		{ O_MIX,			V_HALF,				V_DENORM,				V_INF,			V_NAN },
1078 		{ O_FMA,			V_HALF,				V_HALF,					V_INF,			V_NAN },
1079 		{ O_MIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_UNUSED },
1080 		{ O_MAX,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1081 		{ O_CLAMP,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1082 		{ O_NMIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1083 		{ O_NMAX,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1084 		{ O_NCLAMP,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1085 	};
1086 
1087 	const UnaryCase unaryOpDenormPreserveArr[] = {
1088 		//operation			op den
1089 		{ O_RETURN_VAL,		V_DENORM },
1090 		{ O_D_EXTRACT,		V_DENORM },
1091 		{ O_D_INSERT,		V_DENORM },
1092 		{ O_SHUFFLE,		V_DENORM },
1093 		{ O_COMPOSITE,		V_DENORM },
1094 		{ O_COMPOSITE_INS,	V_DENORM },
1095 		{ O_COPY,			V_DENORM },
1096 		{ O_TRANSPOSE,		V_DENORM },
1097 		{ O_NEGATE,			V_DENORM },
1098 		{ O_ABS,			V_DENORM },
1099 		{ O_SIGN,			V_ONE },
1100 		{ O_RADIANS,		V_DENORM },
1101 		{ O_DEGREES,		V_DEGREES_DENORM },
1102 	};
1103 
1104 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1105 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1106 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1107 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1108 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1109 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1110 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1111 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1112 }
1113 
1114 template<>
TypeTestResults()1115 TypeTestResults<double>::TypeTestResults()
1116 {
1117 	m_floatType = FP64;
1118 
1119 	// fp64 is supported by fewer operations then fp16 and fp32
1120 	// e.g. Radians and Degrees functions are not supported
1121 	const BinaryCase binaryOpFTZArr[] = {
1122 		//operation		den op one		den op den		den op inf		den op nan
1123 		{ O_ADD,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1124 		{ O_SUB,		V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
1125 		{ O_MUL,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1126 		{ O_DIV,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1127 		{ O_REM,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1128 		{ O_MOD,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1129 		{ O_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1130 		{ O_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1131 		{ O_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1132 		{ O_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1133 		{ O_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1134 		{ O_OUT_PROD,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1135 		{ O_DOT,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1136 		{ O_MIX,		V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
1137 		{ O_MIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
1138 		{ O_MAX,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1139 		{ O_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1140 		{ O_STEP,		V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
1141 		{ O_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
1142 		{ O_FMA,		V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
1143 		{ O_FACE_FWD,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
1144 		{ O_NMIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
1145 		{ O_NMAX,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1146 		{ O_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1147 		{ O_DIST,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1148 		{ O_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1149 	};
1150 
1151 	const UnaryCase unaryOpFTZArr[] = {
1152 		//operation			op den
1153 		{ O_NEGATE,			V_MINUS_ZERO },
1154 		{ O_ROUND,			V_ZERO },
1155 		{ O_ROUND_EV,		V_ZERO },
1156 		{ O_TRUNC,			V_ZERO },
1157 		{ O_ABS,			V_ZERO },
1158 		{ O_FLOOR,			V_ZERO },
1159 		{ O_CEIL,			V_ZERO },
1160 		{ O_FRACT,			V_ZERO },
1161 		{ O_SQRT,			V_ZERO },
1162 		{ O_INV_SQRT,		V_INF },
1163 		{ O_MAT_DET,		V_ZERO },
1164 		{ O_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1165 		{ O_MODF,			V_ZERO },
1166 		{ O_MODF_ST,		V_ZERO },
1167 		{ O_NORMALIZE,		V_ZERO },
1168 		{ O_REFLECT,		V_ZERO },
1169 		{ O_LENGHT,			V_ZERO },
1170 	};
1171 
1172 	const BinaryCase binaryOpDenormPreserveArr[] = {
1173 		//operation			den op one			den op den				den op inf		den op nan
1174 		{ O_PHI,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1175 		{ O_SELECT,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1176 		{ O_ADD,			V_ONE,				V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1177 		{ O_SUB,			V_MINUS_ONE,		V_ZERO,					V_MINUS_INF,	V_NAN },
1178 		{ O_MUL,			V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1179 		{ O_VEC_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1180 		{ O_VEC_MUL_M,		V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1181 		{ O_MAT_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1182 		{ O_MAT_MUL_V,		V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1183 		{ O_MAT_MUL_M,		V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1184 		{ O_OUT_PROD,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1185 		{ O_DOT,			V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1186 		{ O_MIX,			V_HALF,				V_DENORM,				V_INF,			V_NAN },
1187 		{ O_FMA,			V_HALF,				V_HALF,					V_INF,			V_NAN },
1188 		{ O_MIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_UNUSED },
1189 		{ O_MAX,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1190 		{ O_CLAMP,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1191 		{ O_NMIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1192 		{ O_NMAX,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1193 		{ O_NCLAMP,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1194 	};
1195 
1196 	const UnaryCase unaryOpDenormPreserveArr[] = {
1197 		//operation			op den
1198 		{ O_RETURN_VAL,		V_DENORM },
1199 		{ O_D_EXTRACT,		V_DENORM },
1200 		{ O_D_INSERT,		V_DENORM },
1201 		{ O_SHUFFLE,		V_DENORM },
1202 		{ O_COMPOSITE,		V_DENORM },
1203 		{ O_COMPOSITE_INS,	V_DENORM },
1204 		{ O_COPY,			V_DENORM },
1205 		{ O_TRANSPOSE,		V_DENORM },
1206 		{ O_NEGATE,			V_DENORM },
1207 		{ O_ABS,			V_DENORM },
1208 		{ O_SIGN,			V_ONE },
1209 	};
1210 
1211 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1212 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1213 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1214 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1215 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1216 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1217 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1218 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1219 }
1220 
1221 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1222 // additional annotations, additional types and aditional constants that should be properly included
1223 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1224 // on given arguments, in some cases verification is also performed there.
1225 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1226 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1227 // float behaviours on diferent float widths).
1228 struct Operation
1229 {
1230 	// operation name is included in test case name
1231 	const char*	name;
1232 
1233 	// operation specific spir-v snippets that will be
1234 	// placed in proper places in final test shader
1235 	const char*	annotations;
1236 	const char*	types;
1237 	const char*	constants;
1238 	const char*	variables;
1239 	const char*	commands;
1240 
1241 	// conversion operations operate on one float type and produce float
1242 	// type with different bit width; restrictedInputType is used only when
1243 	// isInputTypeRestricted is set to true and it restricts usega of this
1244 	// operation to specified input type
1245 	bool		isInputTypeRestricted;
1246 	FloatType	restrictedInputType;
1247 
1248 	// arguments for OpSpecConstant need to be specified also as constant
1249 	bool		isSpecConstant;
1250 
Operationvkt::SpirVAssembly::__anone328874d0111::Operation1251 	Operation()		{}
1252 
1253 	// Minimal constructor - used by most of operations
Operationvkt::SpirVAssembly::__anone328874d0111::Operation1254 	Operation(const char* _name, const char* _commands)
1255 		: name(_name)
1256 		, annotations("")
1257 		, types("")
1258 		, constants("")
1259 		, variables("")
1260 		, commands(_commands)
1261 		, isInputTypeRestricted(false)
1262 		, restrictedInputType(FP16)		// not used as isInputTypeRestricted is false
1263 		, isSpecConstant(false)
1264 	{}
1265 
1266 	// Conversion operations constructor (used also by conversions done in SpecConstantOp)
Operationvkt::SpirVAssembly::__anone328874d0111::Operation1267 	Operation(const char* _name,
1268 			  bool specConstant,
1269 			  FloatType _inputType,
1270 			  const char* _constants,
1271 			  const char* _commands)
1272 		: name(_name)
1273 		, annotations("")
1274 		, types("")
1275 		, constants(_constants)
1276 		, variables("")
1277 		, commands(_commands)
1278 		, isInputTypeRestricted(true)
1279 		, restrictedInputType(_inputType)
1280 		, isSpecConstant(specConstant)
1281 	{}
1282 
1283 	// Full constructor - used by few operations, that are more complex to test
Operationvkt::SpirVAssembly::__anone328874d0111::Operation1284 	Operation(const char* _name,
1285 			  const char* _annotations,
1286 			  const char* _types,
1287 			  const char* _constants,
1288 			  const char* _variables,
1289 			  const char* _commands)
1290 		: name(_name)
1291 		, annotations(_annotations)
1292 		, types(_types)
1293 		, constants(_constants)
1294 		, variables(_variables)
1295 		, commands(_commands)
1296 		, isInputTypeRestricted(false)
1297 		, restrictedInputType(FP16)		// not used as isInputTypeRestricted is false
1298 		, isSpecConstant(false)
1299 	{}
1300 
1301 	// Full constructor - used by rounding override cases
Operationvkt::SpirVAssembly::__anone328874d0111::Operation1302 	Operation(const char* _name,
1303 			  FloatType _inputType,
1304 			  const char* _annotations,
1305 			  const char* _types,
1306 			  const char* _constants,
1307 			  const char* _commands)
1308 		: name(_name)
1309 		, annotations(_annotations)
1310 		, types(_types)
1311 		, constants(_constants)
1312 		, variables("")
1313 		, commands(_commands)
1314 		, isInputTypeRestricted(true)
1315 		, restrictedInputType(_inputType)
1316 		, isSpecConstant(false)
1317 	{}
1318 };
1319 
1320 // Class storing input that will be passed to operation and expected
1321 // output that should be generated for specified behaviour.
1322 class OperationTestCase
1323 {
1324 public:
1325 
OperationTestCase()1326 	OperationTestCase()		{}
1327 
OperationTestCase(const char * _baseName,BehaviorFlags _behaviorFlags,OperationId _operatinId,ValueId _input1,ValueId _input2,ValueId _expectedOutput)1328 	OperationTestCase(const char*	_baseName,
1329 					  BehaviorFlags	_behaviorFlags,
1330 					  OperationId	_operatinId,
1331 					  ValueId		_input1,
1332 					  ValueId		_input2,
1333 					  ValueId		_expectedOutput)
1334 		: baseName(_baseName)
1335 		, behaviorFlags(_behaviorFlags)
1336 		, operationId(_operatinId)
1337 		, expectedOutput(_expectedOutput)
1338 	{
1339 		input[0] = _input1;
1340 		input[1] = _input2;
1341 	}
1342 
1343 public:
1344 
1345 	string					baseName;
1346 	BehaviorFlags			behaviorFlags;
1347 	OperationId				operationId;
1348 	ValueId					input[2];
1349 	ValueId					expectedOutput;
1350 };
1351 
1352 // Helper structure used to store specialized operation
1353 // data. This data is ready to be used during shader assembly.
1354 struct SpecializedOperation
1355 {
1356 	string constans;
1357 	string annotations;
1358 	string types;
1359 	string arguments;
1360 	string variables;
1361 	string commands;
1362 
1363 	FloatType		inFloatType;
1364 	TypeSnippetsSP	inTypeSnippets;
1365 	TypeSnippetsSP	outTypeSnippets;
1366 };
1367 
1368 // Class responsible for constructing list of test cases for specified
1369 // float type and specified way of preparation of arguments.
1370 // Arguments can be either read from input SSBO or generated via math
1371 // operations in spir-v code.
1372 class TestCasesBuilder
1373 {
1374 public:
1375 
1376 	void init();
1377 	void build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
1378 	const Operation& getOperation(OperationId id) const;
1379 
1380 private:
1381 
1382 	void createUnaryTestCases(vector<OperationTestCase>& testCases,
1383 							  OperationId operationId,
1384 							  ValueId denormPreserveResult,
1385 							  ValueId denormFTZResult) const;
1386 
1387 private:
1388 
1389 	// Operations are shared betwean test cases so they are
1390 	// passed to them as pointers to data stored in TestCasesBuilder.
1391 	typedef OperationTestCase OTC;
1392 	typedef Operation Op;
1393 	map<int, Op> m_operations;
1394 };
1395 
init()1396 void TestCasesBuilder::init()
1397 {
1398 	map<int, Op>& mo = m_operations;
1399 
1400 	// predefine operations repeatedly used in tests; note that "_float"
1401 	// in every operation command will be replaced with either "_f16",
1402 	// "_f32" or "_f64" - StringTemplate is not used here because it
1403 	// would make code less readable
1404 	// m_operations contains generic operation definitions that can be
1405 	// used for all float types
1406 
1407 	mo[O_NEGATE]		= Op("negate",		"%result             = OpFNegate %type_float %arg1\n");
1408 	mo[O_COMPOSITE]		= Op("composite",	"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1409 											"%result             = OpCompositeExtract %type_float %vec1 0\n");
1410 	mo[O_COMPOSITE_INS]	= Op("comp_ins",	"%vec1               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_0\n"
1411 											"%vec2               = OpCompositeInsert %type_float_vec2 %arg1 %vec1 0\n"
1412 											"%result             = OpCompositeExtract %type_float %vec2 0\n");
1413 	mo[O_COPY]			= Op("copy",		"%result             = OpCopyObject %type_float %arg1\n");
1414 	mo[O_D_EXTRACT]		= Op("extract",		"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1415 											"%result             = OpVectorExtractDynamic %type_float %vec1 %c_i32_0\n");
1416 	mo[O_D_INSERT]		= Op("insert",		"%tmpVec             = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"
1417 											"%vec1               = OpVectorInsertDynamic %type_float_vec2 %tmpVec %arg1 %c_i32_0\n"
1418 											"%result             = OpCompositeExtract %type_float %vec1 0\n");
1419 	mo[O_SHUFFLE]		= Op("shuffle",		"%tmpVec1            = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1420 											"%tmpVec2            = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"	// NOTE: its impossible to test shuffle with denorms flushed
1421 											"%vec1               = OpVectorShuffle %type_float_vec2 %tmpVec1 %tmpVec2 0 2\n"		//       to zero as this will be done by earlier operation
1422 											"%result             = OpCompositeExtract %type_float %vec1 0\n");						//       (this also applies to few other operations)
1423 	mo[O_TRANSPOSE]		= Op("transpose",	"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1424 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1425 											"%tmat               = OpTranspose %type_float_mat2x2 %mat\n"
1426 											"%tcol               = OpCompositeExtract %type_float_vec2 %tmat 0\n"
1427 											"%result             = OpCompositeExtract %type_float %tcol 0\n");
1428 	mo[O_RETURN_VAL]	= Op("ret_val",		"",
1429 											"%type_test_fun      = OpTypeFunction %type_float %type_float\n",
1430 											"%test_fun = OpFunction %type_float None %type_test_fun\n"
1431 											"%param = OpFunctionParameter %type_float\n"
1432 											"%entry = OpLabel\n"
1433 											"OpReturnValue %param\n"
1434 											"OpFunctionEnd\n",
1435 											"",
1436 											"%result             = OpFunctionCall %type_float %test_fun %arg1\n");
1437 
1438 	// conversion operations that are meant to be used only for single output type (defined by the second number in name)
1439 	const char* convertSource =				"%result             = OpFConvert %type_float %arg1\n";
1440 	mo[O_CONV_FROM_FP16]	= Op("conv_from_fp16", false, FP16, "", convertSource);
1441 	mo[O_CONV_FROM_FP32]	= Op("conv_from_fp32", false, FP32, "", convertSource);
1442 	mo[O_CONV_FROM_FP64]	= Op("conv_from_fp64", false, FP64, "", convertSource);
1443 
1444 	// from all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
1445 	// else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equiwalent to
1446 	// V_CONV_FROM_FP32_ARG and V_CONV_FROM_FP64_ARG so we can use same expected rounded values as for regular OpFConvert
1447 	mo[O_SCONST_CONV_FROM_FP32_TO_FP16]
1448 						= Op("sconst_conv_from_fp32", true, FP32,
1449 											"%c_arg              = OpConstant %type_f32 1.22334445\n"
1450 											"%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1451 											"");
1452 	mo[O_SCONST_CONV_FROM_FP64_TO_FP32]
1453 						= Op("sconst_conv_from_fp64", true, FP64,
1454 											"%c_arg              = OpConstant %type_f64 1.22334455\n"
1455 											"%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n",
1456 											"");
1457 	mo[O_SCONST_CONV_FROM_FP64_TO_FP16]
1458 						= Op("sconst_conv_from_fp64", true, FP64,
1459 											"%c_arg              = OpConstant %type_f64 1.22334445\n"
1460 											"%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1461 											"");
1462 
1463 	mo[O_ADD]			= Op("add",			"%result             = OpFAdd %type_float %arg1 %arg2\n");
1464 	mo[O_SUB]			= Op("sub",			"%result             = OpFSub %type_float %arg1 %arg2\n");
1465 	mo[O_MUL]			= Op("mul",			"%result             = OpFMul %type_float %arg1 %arg2\n");
1466 	mo[O_DIV]			= Op("div",			"%result             = OpFDiv %type_float %arg1 %arg2\n");
1467 	mo[O_REM]			= Op("rem",			"%result             = OpFRem %type_float %arg1 %arg2\n");
1468 	mo[O_MOD]			= Op("mod",			"%result             = OpFMod %type_float %arg1 %arg2\n");
1469 	mo[O_PHI]			= Op("phi",			"%comp               = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1470 											"                      OpSelectionMerge %comp_merge None\n"
1471 											"                      OpBranchConditional %comp %true_branch %false_branch\n"
1472 											"%true_branch        = OpLabel\n"
1473 											"                      OpBranch %comp_merge\n"
1474 											"%false_branch       = OpLabel\n"
1475 											"                      OpBranch %comp_merge\n"
1476 											"%comp_merge         = OpLabel\n"
1477 											"%result             = OpPhi %type_float %arg2 %true_branch %arg1 %false_branch\n");
1478 	mo[O_SELECT]		= Op("select",		"%always_true        = OpFOrdGreaterThan %type_bool %c_float_1 %c_float_0\n"
1479 											"%result             = OpSelect %type_float %always_true %arg1 %arg2\n");
1480 	mo[O_DOT]			= Op("dot",			"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1481 											"%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1482 											"%result             = OpDot %type_float %vec1 %vec2\n");
1483 	mo[O_VEC_MUL_S]		= Op("vmuls",		"%vec                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1484 											"%tmpVec             = OpVectorTimesScalar %type_float_vec2 %vec %arg2\n"
1485 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n");
1486 	mo[O_VEC_MUL_M]		= Op("vmulm",		"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1487 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1488 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1489 											"%tmpVec             = OpVectorTimesMatrix %type_float_vec2 %vec %mat\n"
1490 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n");
1491 	mo[O_MAT_MUL_S]		= Op("mmuls",		"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1492 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1493 											"%mulMat             = OpMatrixTimesScalar %type_float_mat2x2 %mat %arg2\n"
1494 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1495 											"%result             = OpCompositeExtract %type_float %extCol 0\n");
1496 	mo[O_MAT_MUL_V]		= Op("mmulv",		"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1497 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1498 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1499 											"%mulVec             = OpMatrixTimesVector %type_float_vec2 %mat %vec\n"
1500 											"%result             = OpCompositeExtract %type_float %mulVec 0\n");
1501 	mo[O_MAT_MUL_M]		= Op("mmulm",		"%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1502 											"%mat1               = OpCompositeConstruct %type_float_mat2x2 %col1 %col1\n"
1503 											"%col2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1504 											"%mat2               = OpCompositeConstruct %type_float_mat2x2 %col2 %col2\n"
1505 											"%mulMat             = OpMatrixTimesMatrix %type_float_mat2x2 %mat1 %mat2\n"
1506 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1507 											"%result             = OpCompositeExtract %type_float %extCol 0\n");
1508 	mo[O_OUT_PROD]		= Op("out_prod",	"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1509 											"%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1510 											"%mulMat             = OpOuterProduct %type_float_mat2x2 %vec1 %vec2\n"
1511 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1512 											"%result             = OpCompositeExtract %type_float %extCol 0\n");
1513 
1514 	// comparison operations
1515 	mo[O_ORD_EQ]		= Op("ord_eq",		"%boolVal           = OpFOrdEqual %type_bool %arg1 %arg2\n"
1516 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1517 	mo[O_UORD_EQ]		= Op("uord_eq",		"%boolVal           = OpFUnordEqual %type_bool %arg1 %arg2\n"
1518 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1519 	mo[O_ORD_NEQ]		= Op("ord_neq",		"%boolVal           = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
1520 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1521 	mo[O_UORD_NEQ]		= Op("uord_neq",	"%boolVal           = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
1522 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1523 	mo[O_ORD_LS]		= Op("ord_ls",		"%boolVal           = OpFOrdLessThan %type_bool %arg1 %arg2\n"
1524 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1525 	mo[O_UORD_LS]		= Op("uord_ls",		"%boolVal           = OpFUnordLessThan %type_bool %arg1 %arg2\n"
1526 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1527 	mo[O_ORD_GT]		= Op("ord_gt",		"%boolVal           = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1528 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1529 	mo[O_UORD_GT]		= Op("uord_gt",		"%boolVal           = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
1530 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1531 	mo[O_ORD_LE]		= Op("ord_le",		"%boolVal           = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
1532 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1533 	mo[O_UORD_LE]		= Op("uord_le",		"%boolVal           = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
1534 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1535 	mo[O_ORD_GE]		= Op("ord_ge",		"%boolVal           = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
1536 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1537 	mo[O_UORD_GE]		= Op("uord_ge",		"%boolVal           = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
1538 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n");
1539 
1540 	mo[O_ATAN2]			= Op("atan2",		"%result             = OpExtInst %type_float %std450 Atan2 %arg1 %arg2\n");
1541 	mo[O_POW]			= Op("pow",			"%result             = OpExtInst %type_float %std450 Pow %arg1 %arg2\n");
1542 	mo[O_MIX]			= Op("mix",			"%result             = OpExtInst %type_float %std450 FMix %arg1 %arg2 %c_float_0_5\n");
1543 	mo[O_FMA]			= Op("fma",			"%result             = OpExtInst %type_float %std450 Fma %arg1 %arg2 %c_float_0_5\n");
1544 	mo[O_MIN]			= Op("min",			"%result             = OpExtInst %type_float %std450 FMin %arg1 %arg2\n");
1545 	mo[O_MAX]			= Op("max",			"%result             = OpExtInst %type_float %std450 FMax %arg1 %arg2\n");
1546 	mo[O_CLAMP]			= Op("clamp",		"%result             = OpExtInst %type_float %std450 FClamp %arg1 %arg2 %arg2\n");
1547 	mo[O_STEP]			= Op("step",		"%result             = OpExtInst %type_float %std450 Step %arg1 %arg2\n");
1548 	mo[O_SSTEP]			= Op("sstep",		"%result             = OpExtInst %type_float %std450 SmoothStep %arg1 %arg2 %c_float_0_5\n");
1549 	mo[O_DIST]			= Op("distance",	"%result             = OpExtInst %type_float %std450 Distance %arg1 %arg2\n");
1550 	mo[O_CROSS]			= Op("cross",		"%vec1               = OpCompositeConstruct %type_float_vec3 %arg1 %arg1 %arg1\n"
1551 											"%vec2               = OpCompositeConstruct %type_float_vec3 %arg2 %arg2 %arg2\n"
1552 											"%tmpVec             = OpExtInst %type_float_vec3 %std450 Cross %vec1 %vec2\n"
1553 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n");
1554 	mo[O_FACE_FWD]		= Op("face_fwd",	"%result             = OpExtInst %type_float %std450 FaceForward %c_float_1 %arg1 %arg2\n");
1555 	mo[O_NMIN]			= Op("nmin",		"%result             = OpExtInst %type_float %std450 NMin %arg1 %arg2\n");
1556 	mo[O_NMAX]			= Op("nmax",		"%result             = OpExtInst %type_float %std450 NMax %arg1 %arg2\n");
1557 	mo[O_NCLAMP]		= Op("nclamp",		"%result             = OpExtInst %type_float %std450 NClamp %arg2 %arg1 %arg2\n");
1558 
1559 	mo[O_ROUND]			= Op("round",		"%result             = OpExtInst %type_float %std450 Round %arg1\n");
1560 	mo[O_ROUND_EV]		= Op("round_ev",	"%result             = OpExtInst %type_float %std450 RoundEven %arg1\n");
1561 	mo[O_TRUNC]			= Op("trunc",		"%result             = OpExtInst %type_float %std450 Trunc %arg1\n");
1562 	mo[O_ABS]			= Op("abs",			"%result             = OpExtInst %type_float %std450 FAbs %arg1\n");
1563 	mo[O_SIGN]			= Op("sign",		"%result             = OpExtInst %type_float %std450 FSign %arg1\n");
1564 	mo[O_FLOOR]			= Op("floor",		"%result             = OpExtInst %type_float %std450 Floor %arg1\n");
1565 	mo[O_CEIL]			= Op("ceil",		"%result             = OpExtInst %type_float %std450 Ceil %arg1\n");
1566 	mo[O_FRACT]			= Op("fract",		"%result             = OpExtInst %type_float %std450 Fract %arg1\n");
1567 	mo[O_RADIANS]		= Op("radians",		"%result             = OpExtInst %type_float %std450 Radians %arg1\n");
1568 	mo[O_DEGREES]		= Op("degrees",		"%result             = OpExtInst %type_float %std450 Degrees %arg1\n");
1569 	mo[O_SIN]			= Op("sin",			"%result             = OpExtInst %type_float %std450 Sin %arg1\n");
1570 	mo[O_COS]			= Op("cos",			"%result             = OpExtInst %type_float %std450 Cos %arg1\n");
1571 	mo[O_TAN]			= Op("tan",			"%result             = OpExtInst %type_float %std450 Tan %arg1\n");
1572 	mo[O_ASIN]			= Op("asin",		"%result             = OpExtInst %type_float %std450 Asin %arg1\n");
1573 	mo[O_ACOS]			= Op("acos",		"%result             = OpExtInst %type_float %std450 Acos %arg1\n");
1574 	mo[O_ATAN]			= Op("atan",		"%result             = OpExtInst %type_float %std450 Atan %arg1\n");
1575 	mo[O_SINH]			= Op("sinh",		"%result             = OpExtInst %type_float %std450 Sinh %arg1\n");
1576 	mo[O_COSH]			= Op("cosh",		"%result             = OpExtInst %type_float %std450 Cosh %arg1\n");
1577 	mo[O_TANH]			= Op("tanh",		"%result             = OpExtInst %type_float %std450 Tanh %arg1\n");
1578 	mo[O_ASINH]			= Op("asinh",		"%result             = OpExtInst %type_float %std450 Asinh %arg1\n");
1579 	mo[O_ACOSH]			= Op("acosh",		"%result             = OpExtInst %type_float %std450 Acosh %arg1\n");
1580 	mo[O_ATANH]			= Op("atanh",		"%result             = OpExtInst %type_float %std450 Atanh %arg1\n");
1581 	mo[O_EXP]			= Op("exp",			"%result             = OpExtInst %type_float %std450 Exp %arg1\n");
1582 	mo[O_LOG]			= Op("log",			"%result             = OpExtInst %type_float %std450 Log %arg1\n");
1583 	mo[O_EXP2]			= Op("exp2",		"%result             = OpExtInst %type_float %std450 Exp2 %arg1\n");
1584 	mo[O_LOG2]			= Op("log2",		"%result             = OpExtInst %type_float %std450 Log2 %arg1\n");
1585 	mo[O_SQRT]			= Op("sqrt",		"%result             = OpExtInst %type_float %std450 Sqrt %arg1\n");
1586 	mo[O_INV_SQRT]		= Op("inv_sqrt",	"%result             = OpExtInst %type_float %std450 InverseSqrt %arg1\n");
1587 	mo[O_MODF]			= Op("modf",		"",
1588 											"",
1589 											"",
1590 											"%tmpVarPtr          = OpVariable %type_float_fptr Function\n",
1591 											"%result             = OpExtInst %type_float %std450 Modf %arg1 %tmpVarPtr\n");
1592 	mo[O_MODF_ST]		= Op("modf_st",		"OpMemberDecorate %struct_ff 0 Offset ${float_width}\n"
1593 											"OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
1594 											"%struct_ff          = OpTypeStruct %type_float %type_float\n"
1595 											"%struct_ff_fptr     = OpTypePointer Function %struct_ff\n",
1596 											"",
1597 											"%tmpStructPtr       = OpVariable %struct_ff_fptr Function\n",
1598 											"%tmpStruct          = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
1599 											"                      OpStore %tmpStructPtr %tmpStruct\n"
1600 											"%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1601 											"%result             = OpLoad %type_float %tmpLoc\n");
1602 	mo[O_FREXP]			= Op("frexp",		"",
1603 											"",
1604 											"",
1605 											"%tmpVarPtr          = OpVariable %type_i32_fptr Function\n",
1606 											"%result             = OpExtInst %type_float %std450 Frexp %arg1 %tmpVarPtr\n");
1607 	mo[O_FREXP_ST]		= Op("frexp_st",	"OpMemberDecorate %struct_fi 0 Offset ${float_width}\n"
1608 											"OpMemberDecorate %struct_fi 1 Offset 32\n",
1609 											"%struct_fi          = OpTypeStruct %type_float %type_i32\n"
1610 											"%struct_fi_fptr     = OpTypePointer Function %struct_fi\n",
1611 											"",
1612 											"%tmpStructPtr       = OpVariable %struct_fi_fptr Function\n",
1613 											"%tmpStruct          = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
1614 											"                      OpStore %tmpStructPtr %tmpStruct\n"
1615 											"%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1616 											"%result             = OpLoad %type_float %tmpLoc\n");
1617 	mo[O_LENGHT]		= Op("length",		"%result             = OpExtInst %type_float %std450 Length %arg1\n");
1618 	mo[O_NORMALIZE]		= Op("normalize",	"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_2\n"
1619 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Normalize %vec1\n"
1620 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n");
1621 	mo[O_REFLECT]		= Op("reflect",		"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1622 											"%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
1623 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Reflect %vec1 %vecN\n"
1624 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n");
1625 	mo[O_REFRACT]		= Op("refract",		"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1626 											"%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
1627 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Refract %vec1 %vecN %c_float_0_5\n"
1628 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n");
1629 	mo[O_MAT_DET]		= Op("mat_det",		"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1630 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1631 											"%result             = OpExtInst %type_float %std450 Determinant %mat\n");
1632 	mo[O_MAT_INV]		= Op("mat_inv",		"%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_1\n"
1633 											"%col2               = OpCompositeConstruct %type_float_vec2 %c_float_1 %c_float_1\n"
1634 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col1 %col2\n"
1635 											"%invMat             = OpExtInst %type_float_mat2x2 %std450 MatrixInverse %mat\n"
1636 											"%extCol             = OpCompositeExtract %type_float_vec2 %invMat 1\n"
1637 											"%result             = OpCompositeExtract %type_float %extCol 1\n");
1638 
1639 	// PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
1640 	// the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
1641 	mo[O_PH_DENORM]		= Op("ph_denorm",	"",
1642 											"",
1643 											"%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n"		// fp32 representation of fp16 denorm value
1644 											"%c_ref              = OpConstant %type_u32 66061296\n",
1645 											"",
1646 											"%srcVec             = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
1647 											"%packedInt          = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
1648 											"%boolVal            = OpIEqual %type_bool %c_ref %packedInt\n"
1649 											"%result             = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n");
1650 
1651 	// UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
1652 	// this function is tested using constants
1653 	mo[O_UPH_DENORM]	= Op("uph_denorm",	"",
1654 											"",
1655 											"%c_u32_2_16_pack    = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
1656 											"",
1657 											"%tmpVec             = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
1658 											"%result             = OpCompositeExtract %type_f32 %tmpVec 0\n");
1659 
1660 	// PackDouble2x32 is a special case that operates on two uint32 and returns
1661 	// double, this function is tested using constants
1662 	mo[O_PD_DENORM]		= Op("pd_denorm",	"",
1663 											"",
1664 											"%c_p1               = OpConstant %type_u32 0\n"
1665 											"%c_p2               = OpConstant %type_u32 262144\n",		// == UnpackDouble2x32(denorm)
1666 											"",
1667 											"%srcVec             = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
1668 											"%result             = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n");
1669 
1670 	// UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
1671 	// the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
1672 	const char* unpackDouble2x32Types	=	"%type_bool_vec2     = OpTypeVector %type_bool 2\n";
1673 	const char* unpackDouble2x32Source	=	"%refVec2            = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
1674 											"%resVec2            = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
1675 											"%boolVec2           = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
1676 											"%boolVal            = OpAll %type_bool %boolVec2\n"
1677 											"%result             = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
1678 	mo[O_UPD_DENORM_FLUSH]		= Op("upd_denorm",	"",
1679 											unpackDouble2x32Types,
1680 											"%c_p1               = OpConstant %type_u32 0\n"
1681 											"%c_p2               = OpConstant %type_u32 0\n",
1682 											"",
1683 											unpackDouble2x32Source);
1684 	mo[O_UPD_DENORM_PRESERVE]	= Op("upd_denorm",	"",
1685 											unpackDouble2x32Types,
1686 											"%c_p1               = OpConstant %type_u32 1008\n"
1687 											"%c_p2               = OpConstant %type_u32 0\n",
1688 											"",
1689 											unpackDouble2x32Source);
1690 
1691 	mo[O_ORTE_ROUND]	= Op("orte_round",	FP32,
1692 											"OpDecorate %result FPRoundingMode RTE\n",
1693 											"",
1694 											"",
1695 											"%result             = OpFConvert %type_f16 %arg1\n");
1696 	mo[O_ORTZ_ROUND]	= Op("ortz_round",	FP32,
1697 											"OpDecorate %result FPRoundingMode RTZ\n",
1698 											"",
1699 											"",
1700 											"%result             = OpFConvert %type_f16 %arg1\n");
1701 }
1702 
build(vector<OperationTestCase> & testCases,TypeTestResultsSP typeTestResults,bool argumentsFromInput)1703 void TestCasesBuilder::build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput)
1704 {
1705 	// this method constructs a list of test cases; this list is a bit different
1706 	// for every combination of float type, arguments preparation method and tested float control
1707 
1708 	testCases.reserve(750);
1709 
1710 	// Denorm - FlushToZero - binary operations
1711 	for (size_t i = 0 ; i < typeTestResults->binaryOpFTZ.size() ; ++i)
1712 	{
1713 		const BinaryCase&	binaryCase	= typeTestResults->binaryOpFTZ[i];
1714 		OperationId			operation	= binaryCase.operationId;
1715 		testCases.push_back(OTC("denorm_op_var_flush_to_zero",		B_DENORM_FLUSH,					 operation, V_DENORM, V_ONE,		binaryCase.opVarResult));
1716 		testCases.push_back(OTC("denorm_op_denorm_flush_to_zero",	B_DENORM_FLUSH,					 operation, V_DENORM, V_DENORM,		binaryCase.opDenormResult));
1717 		testCases.push_back(OTC("denorm_op_inf_flush_to_zero",		B_DENORM_FLUSH | B_ZIN_PERSERVE, operation, V_DENORM, V_INF,		binaryCase.opInfResult));
1718 		testCases.push_back(OTC("denorm_op_nan_flush_to_zero",		B_DENORM_FLUSH | B_ZIN_PERSERVE, operation, V_DENORM, V_NAN,		binaryCase.opNanResult));
1719 	}
1720 
1721 	// Denorm - FlushToZero - unary operations
1722 	for (size_t i = 0 ; i < typeTestResults->unaryOpFTZ.size() ; ++i)
1723 	{
1724 		const UnaryCase&	unaryCase = typeTestResults->unaryOpFTZ[i];
1725 		OperationId			operation = unaryCase.operationId;
1726 		testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result));
1727 	}
1728 
1729 	// Denom - Preserve - binary operations
1730 	for (size_t i = 0 ; i < typeTestResults->binaryOpDenormPreserve.size() ; ++i)
1731 	{
1732 		const BinaryCase&	binaryCase	= typeTestResults->binaryOpDenormPreserve[i];
1733 		OperationId			operation	= binaryCase.operationId;
1734 		testCases.push_back(OTC("denorm_op_var_preserve",			B_DENORM_PERSERVE,					operation, V_DENORM,	V_ONE,		binaryCase.opVarResult));
1735 		testCases.push_back(OTC("denorm_op_denorm_preserve",		B_DENORM_PERSERVE,					operation, V_DENORM,	V_DENORM,	binaryCase.opDenormResult));
1736 		testCases.push_back(OTC("denorm_op_inf_preserve",			B_DENORM_PERSERVE | B_ZIN_PERSERVE, operation, V_DENORM,	V_INF,		binaryCase.opInfResult));
1737 		testCases.push_back(OTC("denorm_op_nan_preserve",			B_DENORM_PERSERVE | B_ZIN_PERSERVE, operation, V_DENORM,	V_NAN,		binaryCase.opNanResult));
1738 	}
1739 
1740 	// Denom - Preserve - unary operations
1741 	for (size_t i = 0 ; i < typeTestResults->unaryOpDenormPreserve.size() ; ++i)
1742 	{
1743 		const UnaryCase&	unaryCase	= typeTestResults->unaryOpDenormPreserve[i];
1744 		OperationId			operation	= unaryCase.operationId;
1745 		testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PERSERVE, operation, V_DENORM, V_UNUSED, unaryCase.result));
1746 	}
1747 
1748 	struct ZINCase
1749 	{
1750 		OperationId	operationId;
1751 		bool		supportedByFP64;
1752 		ValueId		secondArgument;
1753 		ValueId		preserveZeroResult;
1754 		ValueId		preserveSZeroResult;
1755 		ValueId		preserveInfResult;
1756 		ValueId		preserveSInfResult;
1757 		ValueId		preserveNanResult;
1758 	};
1759 
1760 	const ZINCase binaryOpZINPreserve[] = {
1761 		// operation		fp64	second arg		preserve zero	preserve szero		preserve inf	preserve sinf		preserve nan
1762 		{ O_PHI,			true,	V_INF,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1763 		{ O_SELECT,			true,	V_ONE,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1764 		{ O_ADD,			true,	V_ZERO,			V_ZERO,			V_ZERO,				V_INF,			V_MINUS_INF,		V_NAN },
1765 		{ O_SUB,			true,	V_ZERO,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1766 		{ O_MUL,			true,	V_ONE,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1767 	};
1768 
1769 	const ZINCase unaryOpZINPreserve[] = {
1770 		// operation				fp64	second arg		preserve zero	preserve szero		preserve inf	preserve sinf		preserve nan
1771 		{ O_RETURN_VAL,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1772 		{ O_D_EXTRACT,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1773 		{ O_D_INSERT,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1774 		{ O_SHUFFLE,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1775 		{ O_COMPOSITE,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1776 		{ O_COMPOSITE_INS,			true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1777 		{ O_COPY,					true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1778 		{ O_TRANSPOSE,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
1779 		{ O_NEGATE,					true,	V_UNUSED,		V_MINUS_ZERO,	V_ZERO,				V_MINUS_INF,	V_INF,				V_NAN },
1780 	};
1781 
1782 	bool isFP64 = typeTestResults->floatType() == FP64;
1783 
1784 	// Signed Zero Inf Nan - Preserve - binary operations
1785 	for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve) ; ++i)
1786 	{
1787 		const ZINCase& zc = binaryOpZINPreserve[i];
1788 		if (isFP64 && !zc.supportedByFP64)
1789 			continue;
1790 
1791 		testCases.push_back(OTC("zero_op_var_preserve",				B_ZIN_PERSERVE, zc.operationId, V_ZERO,			zc.secondArgument,	zc.preserveZeroResult));
1792 		testCases.push_back(OTC("signed_zero_op_var_preserve",		B_ZIN_PERSERVE, zc.operationId, V_MINUS_ZERO,	zc.secondArgument,	zc.preserveSZeroResult));
1793 		testCases.push_back(OTC("inf_op_var_preserve",				B_ZIN_PERSERVE, zc.operationId, V_INF,			zc.secondArgument,	zc.preserveInfResult));
1794 		testCases.push_back(OTC("signed_inf_op_var_preserve",		B_ZIN_PERSERVE, zc.operationId, V_MINUS_INF,	zc.secondArgument,	zc.preserveSInfResult));
1795 		testCases.push_back(OTC("nan_op_var_preserve",				B_ZIN_PERSERVE, zc.operationId, V_NAN,			zc.secondArgument,	zc.preserveNanResult));
1796 	}
1797 
1798 	// Signed Zero Inf Nan - Preserve - unary operations
1799 	for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve) ; ++i)
1800 	{
1801 		const ZINCase& zc = unaryOpZINPreserve[i];
1802 		if (isFP64 && !zc.supportedByFP64)
1803 			continue;
1804 
1805 		testCases.push_back(OTC("op_zero_preserve",			B_ZIN_PERSERVE,zc.operationId, V_ZERO,			V_UNUSED,	zc.preserveZeroResult));
1806 		testCases.push_back(OTC("op_signed_zero_preserve",	B_ZIN_PERSERVE,zc.operationId, V_MINUS_ZERO,	V_UNUSED,	zc.preserveSZeroResult));
1807 		testCases.push_back(OTC("op_inf_preserve",			B_ZIN_PERSERVE,zc.operationId, V_INF,			V_UNUSED,	zc.preserveInfResult));
1808 		testCases.push_back(OTC("op_signed_inf_preserve",	B_ZIN_PERSERVE,zc.operationId, V_MINUS_INF,		V_UNUSED,	zc.preserveSInfResult));
1809 		testCases.push_back(OTC("op_nan_preserve",			B_ZIN_PERSERVE,zc.operationId, V_NAN,			V_UNUSED,	zc.preserveNanResult));
1810 	}
1811 
1812 	// comparison operations - tested differently because they return true/false
1813 	struct ComparisonCase
1814 	{
1815 		OperationId	operationId;
1816 		ValueId		denormPreserveResult;
1817 	};
1818 	const ComparisonCase comparisonCases[] =
1819 	{
1820 		// operation	denorm
1821 		{ O_ORD_EQ,		V_ZERO },
1822 		{ O_UORD_EQ,	V_ZERO },
1823 		{ O_ORD_NEQ,	V_ONE  },
1824 		{ O_UORD_NEQ,	V_ONE  },
1825 		{ O_ORD_LS,		V_ONE  },
1826 		{ O_UORD_LS,	V_ONE  },
1827 		{ O_ORD_GT,		V_ZERO },
1828 		{ O_UORD_GT,	V_ZERO },
1829 		{ O_ORD_LE,		V_ONE  },
1830 		{ O_UORD_LE,	V_ONE  },
1831 		{ O_ORD_GE,		V_ZERO },
1832 		{ O_UORD_GE,	V_ZERO }
1833 	};
1834 	for (int op = 0 ; op < DE_LENGTH_OF_ARRAY(comparisonCases) ; ++op)
1835 	{
1836 		const ComparisonCase& cc = comparisonCases[op];
1837 		testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PERSERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
1838 	}
1839 
1840 	if (argumentsFromInput)
1841 	{
1842 		struct RoundingModeCase
1843 		{
1844 			OperationId	operationId;
1845 			ValueId		arg1;
1846 			ValueId		arg2;
1847 			ValueId		expectedRTEResult;
1848 			ValueId		expectedRTZResult;
1849 		};
1850 
1851 		const RoundingModeCase roundingCases[] =
1852 		{
1853 			{ O_ADD,			V_ADD_ARG_A,	V_ADD_ARG_B,	V_ADD_RTE_RESULT,	V_ADD_RTZ_RESULT },
1854 			{ O_SUB,			V_SUB_ARG_A,	V_SUB_ARG_B,	V_SUB_RTE_RESULT,	V_SUB_RTZ_RESULT },
1855 			{ O_MUL,			V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
1856 			{ O_DOT,			V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
1857 
1858 			// in vect/mat multiplication by scalar operations only first element of result is checked
1859 			// so argument and result values prepared for multiplication can be reused for those cases
1860 			{ O_VEC_MUL_S,		V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
1861 			{ O_MAT_MUL_S,		V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
1862 			{ O_OUT_PROD,		V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
1863 
1864 			// in SPIR-V code we return first element of operation result so for following
1865 			// cases argument and result values prepared for dot product can be reused
1866 			{ O_VEC_MUL_M,		V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
1867 			{ O_MAT_MUL_V,		V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
1868 			{ O_MAT_MUL_M,		V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
1869 
1870 			// conversion operations are added separately - depending on float type width
1871 		};
1872 
1873 		for (int c = 0 ; c < DE_LENGTH_OF_ARRAY(roundingCases) ; ++c)
1874 		{
1875 			const RoundingModeCase& rmc = roundingCases[c];
1876 			testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
1877 			testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
1878 		}
1879 	}
1880 
1881 	// special cases
1882 	if (typeTestResults->floatType() == FP16)
1883 	{
1884 		if (argumentsFromInput)
1885 		{
1886 			testCases.push_back(OTC("rounding_rte_conv_from_fp32", B_RTE_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
1887 			testCases.push_back(OTC("rounding_rtz_conv_from_fp32", B_RTZ_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
1888 			testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
1889 			testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
1890 
1891 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
1892 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
1893 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
1894 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
1895 
1896 			// verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration
1897 			testCases.push_back(OTC("rounding_rte_override", B_RTE_ROUNDING, O_ORTZ_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
1898 			testCases.push_back(OTC("rounding_rtz_override", B_RTZ_ROUNDING, O_ORTE_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
1899 		}
1900 
1901 		createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
1902 		createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
1903 	}
1904 	else if (typeTestResults->floatType() == FP32)
1905 	{
1906 		if (argumentsFromInput)
1907 		{
1908 			// convert from fp64 to fp32
1909 			testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
1910 			testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
1911 
1912 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
1913 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
1914 		}
1915 		else
1916 		{
1917 			// PackHalf2x16 - verification done in SPIR-V
1918 			testCases.push_back(OTC("pack_half_denorm_preserve",		B_DENORM_PERSERVE,	O_PH_DENORM,	V_UNUSED, V_UNUSED, V_ONE));
1919 
1920 			// UnpackHalf2x16 - custom arguments defined as constants
1921 			testCases.push_back(OTC("upack_half_denorm_flush_to_zero",	B_DENORM_FLUSH,		O_UPH_DENORM,	V_UNUSED, V_UNUSED, V_ZERO));
1922 			testCases.push_back(OTC("upack_half_denorm_preserve",		B_DENORM_PERSERVE,	O_UPH_DENORM,	V_UNUSED, V_UNUSED, V_CONV_DENORM_SMALLER));
1923 		}
1924 
1925 		createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
1926 		createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
1927 	}
1928 	else // FP64
1929 	{
1930 		if (!argumentsFromInput)
1931 		{
1932 			// PackDouble2x32 - custom arguments defined as constants
1933 			testCases.push_back(OTC("pack_double_denorm_preserve",			B_DENORM_PERSERVE,	O_PD_DENORM,			V_UNUSED, V_UNUSED, V_DENORM));
1934 
1935 			// UnpackDouble2x32 - verification done in SPIR-V
1936 			testCases.push_back(OTC("upack_double_denorm_flush_to_zero",	B_DENORM_FLUSH,		O_UPD_DENORM_FLUSH,		V_DENORM, V_UNUSED, V_ONE));
1937 			testCases.push_back(OTC("upack_double_denorm_preserve",			B_DENORM_PERSERVE,	O_UPD_DENORM_PRESERVE,	V_DENORM, V_UNUSED, V_ONE));
1938 		}
1939 
1940 		createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
1941 		createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
1942 	}
1943 }
1944 
getOperation(OperationId id) const1945 const Operation& TestCasesBuilder::getOperation(OperationId id) const
1946 {
1947 	return m_operations.at(id);
1948 }
1949 
createUnaryTestCases(vector<OperationTestCase> & testCases,OperationId operationId,ValueId denormPreserveResult,ValueId denormFTZResult) const1950 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase>& testCases, OperationId operationId, ValueId denormPreserveResult, ValueId denormFTZResult) const
1951 {
1952 	// Denom - Preserve
1953 	testCases.push_back(OTC("op_denorm_preserve",		B_DENORM_PERSERVE,	operationId, V_DENORM,	V_UNUSED, denormPreserveResult));
1954 
1955 	// Denorm - FlushToZero
1956 	testCases.push_back(OTC("op_denorm_flush_to_zero",	B_DENORM_FLUSH,		operationId, V_DENORM,	V_UNUSED, denormFTZResult));
1957 
1958 	// Signed Zero Inf Nan - Preserve
1959 	testCases.push_back(OTC("op_zero_preserve",			B_ZIN_PERSERVE,		operationId, V_ZERO,		V_UNUSED, V_ZERO));
1960 	testCases.push_back(OTC("op_signed_zero_preserve",	B_ZIN_PERSERVE,		operationId, V_MINUS_ZERO,	V_UNUSED, V_MINUS_ZERO));
1961 	testCases.push_back(OTC("op_inf_preserve",			B_ZIN_PERSERVE,		operationId, V_INF,			V_UNUSED, V_INF));
1962 	testCases.push_back(OTC("op_nan_preserve",			B_ZIN_PERSERVE,		operationId, V_NAN,			V_UNUSED, V_NAN));
1963 }
1964 
1965 template <typename TYPE, typename FLOAT_TYPE>
isZeroOrOtherValue(const TYPE & returnedFloat,ValueId secondAcceptableResult,TestLog & log)1966 bool isZeroOrOtherValue(const TYPE& returnedFloat, ValueId secondAcceptableResult, TestLog& log)
1967 {
1968 	if (returnedFloat.isZero() && !returnedFloat.signBit())
1969 		return true;
1970 
1971 	TypeValues<FLOAT_TYPE> typeValues;
1972 	typedef typename TYPE::StorageType SType;
1973 	typename RawConvert<FLOAT_TYPE, SType>::Value value;
1974 	value.fp = typeValues.getValue(secondAcceptableResult);
1975 
1976 	if (returnedFloat.bits() == value.ui)
1977 		return true;
1978 
1979 	log << TestLog::Message << "Expected 0 or " << toHex(value.ui)
1980 		<< " (" << value.fp << ")" << TestLog::EndMessage;
1981 	return false;
1982 }
1983 
1984 template <typename TYPE>
isAcosResultCorrect(const TYPE & returnedFloat,TestLog & log)1985 bool isAcosResultCorrect(const TYPE& returnedFloat, TestLog& log)
1986 {
1987 	// pi/2 is result of acos(0) which in the specs is defined as equivalent to
1988 	// atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
1989 	// 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
1990 
1991 	double precision = 0;
1992 	const double piDiv2 = 3.14159265358979323846 / 2;
1993 	if (returnedFloat.MANTISSA_BITS == 23)
1994 	{
1995 		FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
1996 		precision = fp32Format.ulp(piDiv2, 4096.0);
1997 	}
1998 	else
1999 	{
2000 		FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2001 		precision = fp16Format.ulp(piDiv2, 5.0);
2002 	}
2003 
2004 	if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
2005 		return true;
2006 
2007 	log << TestLog::Message << "Expected result to be in range"
2008 		<< " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got "
2009 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2010 	return false;
2011 }
2012 
2013 template <typename TYPE>
isCosResultCorrect(const TYPE & returnedFloat,TestLog & log)2014 bool isCosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2015 {
2016 	// for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
2017 	double precision = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
2018 	const double expected = 1.0;
2019 
2020 	if (deAbs(returnedFloat.asDouble() - expected) < precision)
2021 		return true;
2022 
2023 	log << TestLog::Message << "Expected result to be in range"
2024 		<< " (" << expected - precision << ", " << expected + precision << "), got "
2025 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2026 	return false;
2027 }
2028 
2029 // Function used to compare test result with expected output.
2030 // TYPE can be Float16, Float32 or Float64.
2031 // FLOAT_TYPE can be deFloat16, float, double.
2032 template <typename TYPE, typename FLOAT_TYPE>
compareBytes(vector<deUint8> & expectedBytes,AllocationSp outputAlloc,TestLog & log)2033 bool compareBytes(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log)
2034 {
2035 	const TYPE* returned	= static_cast<const TYPE*>(outputAlloc->getHostPtr());
2036 	const TYPE* fValueId	= reinterpret_cast<const TYPE*>(&expectedBytes.front());
2037 
2038 	// all test return single value
2039 	DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
2040 
2041 	// during test setup we do not store expected value but id that can be used to
2042 	// retrieve actual value - this is done to handle special cases like multiple
2043 	// allowed results or epsilon checks for some cases
2044 	// note that this is workaround - this should be done by changing
2045 	// ComputerShaderCase and GraphicsShaderCase so that additional arguments can
2046 	// be passed to this verification callback
2047 	typedef typename TYPE::StorageType SType;
2048 	SType		expectedInt		= fValueId[0].bits();
2049 	ValueId		expectedValueId	= static_cast<ValueId>(expectedInt);
2050 
2051 	// something went wrong, expected value cant be V_UNUSED,
2052 	// if this is the case then test shouldn't be created at all
2053 	DE_ASSERT(expectedValueId != V_UNUSED);
2054 
2055 	TYPE returnedFloat = returned[0];
2056 
2057 	log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits())
2058 		<< " (" << returnedFloat.asFloat() << ")" << TestLog::EndMessage;
2059 
2060 	if (expectedValueId == V_NAN)
2061 	{
2062 		if (returnedFloat.isNaN())
2063 			return true;
2064 
2065 		log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
2066 		return false;
2067 	}
2068 
2069 	if (expectedValueId == V_DENORM)
2070 	{
2071 		if (returnedFloat.isDenorm())
2072 			return true;
2073 
2074 		log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
2075 		return false;
2076 	}
2077 
2078 	// handle multiple acceptable results cases
2079 	if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
2080 	{
2081 		if (returnedFloat.isZero())
2082 			return true;
2083 
2084 		log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
2085 		return false;
2086 	}
2087 	if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
2088 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
2089 	if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
2090 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
2091 	if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
2092 	{
2093 		// this expected value is only needed for fp16
2094 		DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2095 		typename TYPE::StorageType returnedValue = returnedFloat.bits();
2096 		return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
2097 	}
2098 
2099 	// handle trigonometric operations precision errors
2100 	if (expectedValueId == V_TRIG_ONE)
2101 		return isCosResultCorrect<TYPE>(returnedFloat, log);
2102 
2103 	// handle acos(0) case
2104 	if (expectedValueId == V_PI_DIV_2)
2105 		return isAcosResultCorrect<TYPE>(returnedFloat, log);
2106 
2107 	TypeValues<FLOAT_TYPE> typeValues;
2108 	typename RawConvert<FLOAT_TYPE, SType>::Value value;
2109 	value.fp = typeValues.getValue(expectedValueId);
2110 
2111 	if (returnedFloat.bits() == value.ui)
2112 		return true;
2113 
2114 	log << TestLog::Message << "Expected " << toHex(value.ui)
2115 		<< " (" << value.fp << ")" << TestLog::EndMessage;
2116 	return false;
2117 }
2118 
2119 template <typename TYPE, typename FLOAT_TYPE>
checkFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2120 bool checkFloats (const vector<Resource>&		,
2121 						  const vector<AllocationSp>&	outputAllocs,
2122 						  const vector<Resource>&		expectedOutputs,
2123 						  TestLog&						log)
2124 {
2125 	if (outputAllocs.size() != expectedOutputs.size())
2126 		return false;
2127 
2128 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
2129 	{
2130 		vector<deUint8> expectedBytes;
2131 		expectedOutputs[outputNdx].getBytes(expectedBytes);
2132 
2133 		if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
2134 			return false;
2135 	}
2136 
2137 	return true;
2138 }
2139 
2140 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
2141 // It contains all functionalities that are used by both child classes.
2142 class TestGroupBuilderBase
2143 {
2144 public:
2145 
2146 	TestGroupBuilderBase();
~TestGroupBuilderBase()2147 	virtual ~TestGroupBuilderBase() {}
2148 
2149 	void init();
2150 
2151 	virtual void createTests(TestCaseGroup* group,
2152 							 FloatType floatType,
2153 							 bool argumentsFromInput) = 0;
2154 
2155 protected:
2156 
2157 	typedef vector<OperationTestCase> TestCaseVect;
2158 
2159 	// Structure containing all data required to create single test.
2160 	struct TestCaseInfo
2161 	{
2162 		FloatType					outFloatType;
2163 		bool						argumentsFromInput;
2164 		VkShaderStageFlagBits		testedStage;
2165 		const Operation&			operation;
2166 		const OperationTestCase&	testCase;
2167 	};
2168 
2169 	void specializeOperation(const TestCaseInfo&	testCaseInfo,
2170 							 SpecializedOperation&	specializedOperation) const;
2171 
2172 	void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2173 											   const string inBitWidth,
2174 											   const string outBitWidth,
2175 											   string& capability,
2176 											   string& executionMode) const;
2177 
2178 	void setupVulkanFeatures(FloatType			inFloatType,
2179 							 FloatType			outFloatType,
2180 							 BehaviorFlags		behaviorFlags,
2181 							 bool				float64FeatureRequired,
2182 							 VulkanFeatures&	features) const;
2183 
2184 protected:
2185 
2186 	struct TypeData
2187 	{
2188 		TypeValuesSP		values;
2189 		TypeSnippetsSP		snippets;
2190 		TypeTestResultsSP	testResults;
2191 	};
2192 
2193 	// Type specific parameters are stored in this map.
2194 	map<FloatType, TypeData> m_typeData;
2195 
2196 	// Map converting behaviuor id to OpCapability instruction
2197 	typedef map<BehaviorFlagBits, string> BehaviorNameMap;
2198 	BehaviorNameMap m_behaviorToName;
2199 };
2200 
TestGroupBuilderBase()2201 TestGroupBuilderBase::TestGroupBuilderBase()
2202 {
2203 	m_typeData[FP16] = TypeData();
2204 	m_typeData[FP16].values			= TypeValuesSP(new TypeValues<deFloat16>);
2205 	m_typeData[FP16].snippets		= TypeSnippetsSP(new TypeSnippets<deFloat16>);
2206 	m_typeData[FP16].testResults	= TypeTestResultsSP(new TypeTestResults<deFloat16>);
2207 	m_typeData[FP32] = TypeData();
2208 	m_typeData[FP32].values			= TypeValuesSP(new TypeValues<float>);
2209 	m_typeData[FP32].snippets		= TypeSnippetsSP(new TypeSnippets<float>);
2210 	m_typeData[FP32].testResults	= TypeTestResultsSP(new TypeTestResults<float>);
2211 	m_typeData[FP64] = TypeData();
2212 	m_typeData[FP64].values			= TypeValuesSP(new TypeValues<double>);
2213 	m_typeData[FP64].snippets		= TypeSnippetsSP(new TypeSnippets<double>);
2214 	m_typeData[FP64].testResults	= TypeTestResultsSP(new TypeTestResults<double>);
2215 
2216 	m_behaviorToName[B_DENORM_PERSERVE]	= "DenormPreserve";
2217 	m_behaviorToName[B_DENORM_FLUSH]	= "DenormFlushToZero";
2218 	m_behaviorToName[B_ZIN_PERSERVE]	= "SignedZeroInfNanPreserve";
2219 	m_behaviorToName[B_RTE_ROUNDING]	= "RoundingModeRTE";
2220 	m_behaviorToName[B_RTZ_ROUNDING]	= "RoundingModeRTZ";
2221 }
2222 
specializeOperation(const TestCaseInfo & testCaseInfo,SpecializedOperation & specializedOperation) const2223 void TestGroupBuilderBase::specializeOperation(const TestCaseInfo&		testCaseInfo,
2224 											   SpecializedOperation&	specializedOperation) const
2225 {
2226 	const string		typeToken		= "_float";
2227 	const string		widthToken		= "${float_width}";
2228 
2229 	FloatType				outFloatType	= testCaseInfo.outFloatType;
2230 	const Operation&		operation		= testCaseInfo.operation;
2231 	const TypeSnippetsSP	outTypeSnippets	= m_typeData.at(outFloatType).snippets;
2232 	const bool				inputRestricted	= operation.isInputTypeRestricted;
2233 	FloatType				inFloatType		= operation.restrictedInputType;
2234 
2235 	// usually input type is same as output but this is not the case for conversion
2236 	// operations; in those cases operation definitions have restricted input type
2237 	inFloatType = inputRestricted ? inFloatType : outFloatType;
2238 
2239 	TypeSnippetsSP inTypeSnippets = m_typeData.at(inFloatType).snippets;
2240 
2241 	const string inTypePrefix	= string("_f") + inTypeSnippets->bitWidth;
2242 	const string outTypePrefix	= string("_f") + outTypeSnippets->bitWidth;
2243 
2244 	specializedOperation.constans		= replace(operation.constants, typeToken, inTypePrefix);
2245 	specializedOperation.annotations	= replace(operation.annotations, widthToken, outTypeSnippets->bitWidth);
2246 	specializedOperation.types			= replace(operation.types, typeToken, outTypePrefix);
2247 	specializedOperation.variables		= replace(operation.variables, typeToken, outTypePrefix);
2248 	specializedOperation.commands		= replace(operation.commands, typeToken, outTypePrefix);
2249 
2250 	specializedOperation.inFloatType		= inFloatType;
2251 	specializedOperation.inTypeSnippets		= inTypeSnippets;
2252 	specializedOperation.outTypeSnippets	= outTypeSnippets;
2253 
2254 	if (operation.isSpecConstant)
2255 		return;
2256 
2257 	// select way arguments are prepared
2258 	if (testCaseInfo.argumentsFromInput)
2259 	{
2260 		// read arguments from input SSBO in main function
2261 		specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
2262 	}
2263 	else
2264 	{
2265 		// generate proper values in main function
2266 		const string arg1 = "%arg1                 = ";
2267 		const string arg2 = "%arg2                 = ";
2268 
2269 		const ValueId* inputArguments = testCaseInfo.testCase.input;
2270 		if (inputArguments[0] != V_UNUSED)
2271 			specializedOperation.arguments  = arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
2272 		if (inputArguments[1] != V_UNUSED)
2273 			specializedOperation.arguments += arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
2274 	}
2275 }
2276 
2277 
getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,const string inBitWidth,const string outBitWidth,string & capability,string & executionMode) const2278 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2279 																 const string inBitWidth,
2280 																 const string outBitWidth,
2281 																 string& capability,
2282 																 string& executionMode) const
2283 {
2284 	// iterate over all behaviours and request those that are needed
2285 	BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
2286 	while (it != m_behaviorToName.end())
2287 	{
2288 		BehaviorFlagBits	behaviorId		= it->first;
2289 		string				behaviorName	= it->second;
2290 
2291 		if (behaviorFlags & behaviorId)
2292 		{
2293 			capability += "OpCapability " + behaviorName + "\n";
2294 
2295 			// rounding mode should be obeyed for destination type
2296 			bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
2297 			executionMode += "OpExecutionMode %main " + behaviorName + " " +
2298 							 (rounding ? outBitWidth : inBitWidth) + "\n";
2299 		}
2300 
2301 		++it;
2302 	}
2303 
2304 	DE_ASSERT(!capability.empty() && !executionMode.empty());
2305 }
2306 
setupVulkanFeatures(FloatType inFloatType,FloatType outFloatType,BehaviorFlags behaviorFlags,bool float64FeatureRequired,VulkanFeatures & features) const2307 void TestGroupBuilderBase::setupVulkanFeatures(FloatType		inFloatType,
2308 											   FloatType		outFloatType,
2309 											   BehaviorFlags	behaviorFlags,
2310 											   bool				float64FeatureRequired,
2311 											   VulkanFeatures&	features) const
2312 {
2313 	features.coreFeatures.shaderFloat64 = float64FeatureRequired;
2314 
2315 	// request proper float controls features
2316 	ExtensionFloatControlsFeatures& floatControls = features.floatControlsProperties;
2317 
2318 	// rounding mode should obey the destination type
2319 	bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
2320 	bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
2321 	if (rteRounding || rtzRounding)
2322 	{
2323 		switch(outFloatType)
2324 		{
2325 		case FP16:
2326 			floatControls.shaderRoundingModeRTEFloat16 = rteRounding;
2327 			floatControls.shaderRoundingModeRTZFloat16 = rtzRounding;
2328 			return;
2329 		case FP32:
2330 			floatControls.shaderRoundingModeRTEFloat32 = rteRounding;
2331 			floatControls.shaderRoundingModeRTZFloat32 = rtzRounding;
2332 			return;
2333 		case FP64:
2334 			floatControls.shaderRoundingModeRTEFloat64 = rteRounding;
2335 			floatControls.shaderRoundingModeRTZFloat64 = rtzRounding;
2336 			return;
2337 		}
2338 	}
2339 
2340 	switch(inFloatType)
2341 	{
2342 	case FP16:
2343 		floatControls.shaderDenormPreserveFloat16			= behaviorFlags & B_DENORM_PERSERVE;
2344 		floatControls.shaderDenormFlushToZeroFloat16		= behaviorFlags & B_DENORM_FLUSH;
2345 		floatControls.shaderSignedZeroInfNanPreserveFloat16	= behaviorFlags & B_ZIN_PERSERVE;
2346 		return;
2347 	case FP32:
2348 		floatControls.shaderDenormPreserveFloat32			= behaviorFlags & B_DENORM_PERSERVE;
2349 		floatControls.shaderDenormFlushToZeroFloat32		= behaviorFlags & B_DENORM_FLUSH;
2350 		floatControls.shaderSignedZeroInfNanPreserveFloat32	= behaviorFlags & B_ZIN_PERSERVE;
2351 		return;
2352 	case FP64:
2353 		floatControls.shaderDenormPreserveFloat64			= behaviorFlags & B_DENORM_PERSERVE;
2354 		floatControls.shaderDenormFlushToZeroFloat64		= behaviorFlags & B_DENORM_FLUSH;
2355 		floatControls.shaderSignedZeroInfNanPreserveFloat64	= behaviorFlags & B_ZIN_PERSERVE;
2356 		return;
2357 	}
2358 }
2359 
2360 // ComputeTestGroupBuilder contains logic that creates compute shaders
2361 // for all test cases. As most tests in spirv-assembly it uses functionality
2362 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
2363 class ComputeTestGroupBuilder: public TestGroupBuilderBase
2364 {
2365 public:
2366 
2367 	void init();
2368 
2369 	void createTests(TestCaseGroup* group, FloatType floatType, bool argumentsFromInput);
2370 
2371 protected:
2372 
2373 	void fillShaderSpec(const TestCaseInfo&		testCaseInfo,
2374 						ComputeShaderSpec&		csSpec) const;
2375 
2376 private:
2377 
2378 
2379 	StringTemplate		m_shaderTemplate;
2380 	TestCasesBuilder	m_testCaseBuilder;
2381 };
2382 
init()2383 void ComputeTestGroupBuilder::init()
2384 {
2385 	m_testCaseBuilder.init();
2386 
2387 	// geenric compute shader template that has code common for all
2388 	// float types and all possible operations listed in OperationId enum
2389 	m_shaderTemplate.setString(
2390 		"OpCapability Shader\n"
2391 		"${capabilities}"
2392 
2393 		"OpExtension \"SPV_KHR_float_controls\"\n"
2394 		"${extensions}"
2395 
2396 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
2397 		"OpMemoryModel Logical GLSL450\n"
2398 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2399 		"OpExecutionMode %main LocalSize 1 1 1\n"
2400 		"${execution_mode}"
2401 
2402 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2403 
2404 		// some tests require additional annotations
2405 		"${annotations}"
2406 
2407 		"%type_void            = OpTypeVoid\n"
2408 		"%type_voidf           = OpTypeFunction %type_void\n"
2409 		"%type_bool            = OpTypeBool\n"
2410 		"%type_u32             = OpTypeInt 32 0\n"
2411 		"%type_i32             = OpTypeInt 32 1\n"
2412 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
2413 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
2414 		"%type_u32_vec3        = OpTypeVector %type_u32 3\n"
2415 		"%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
2416 
2417 		"%c_i32_0              = OpConstant %type_i32 0\n"
2418 		"%c_i32_1              = OpConstant %type_i32 1\n"
2419 		"%c_i32_2              = OpConstant %type_i32 2\n"
2420 		"%c_u32_1              = OpConstant %type_u32 1\n"
2421 
2422 		// if input float type has different width then output then
2423 		// both types are defined here along with all types derived from
2424 		// them that are commonly used by tests; some tests also define
2425 		// their own types (those that are needed just by this single test)
2426 		"${types}"
2427 
2428 		// SSBO definitions
2429 		"${io_definitions}"
2430 
2431 		"%id                   = OpVariable %type_u32_vec3_ptr Input\n"
2432 
2433 		// set of default constants per float type is placed here,
2434 		// operation tests can also define additional constants;
2435 		// note that O_RETURN_VAL defines function here and becouse
2436 		// of that this token needs to be directly before main function
2437 		"${constants}"
2438 
2439 		"%main                 = OpFunction %type_void None %type_voidf\n"
2440 		"%label                = OpLabel\n"
2441 
2442 		"${variables}"
2443 
2444 		// depending on test case arguments are either read from input ssbo
2445 		// or generated in spir-v code - in later case shader input is not used
2446 		"${arguments}"
2447 
2448 		// perform test commands
2449 		"${commands}"
2450 
2451 		// save result to SSBO
2452 		"${save_result}"
2453 
2454 		"OpReturn\n"
2455 		"OpFunctionEnd\n");
2456 }
2457 
createTests(TestCaseGroup * group,FloatType floatType,bool argumentsFromInput)2458 void ComputeTestGroupBuilder::createTests(TestCaseGroup* group, FloatType floatType, bool argumentsFromInput)
2459 {
2460 	TestContext& testCtx = group->getTestContext();
2461 	TestCaseVect testCases;
2462 	m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
2463 
2464 	TestCaseVect::const_iterator currTestCase = testCases.begin();
2465 	TestCaseVect::const_iterator lastTestCase = testCases.end();
2466 	while(currTestCase != lastTestCase)
2467 	{
2468 		const OperationTestCase& testCase = *currTestCase;
2469 		++currTestCase;
2470 
2471 		// skip cases with undefined output
2472 		if (testCase.expectedOutput == V_UNUSED)
2473 			continue;
2474 
2475 		TestCaseInfo testCaseInfo =
2476 		{
2477 			floatType,
2478 			argumentsFromInput,
2479 			VK_SHADER_STAGE_COMPUTE_BIT,
2480 			m_testCaseBuilder.getOperation(testCase.operationId),
2481 			testCase
2482 		};
2483 
2484 		ComputeShaderSpec	csSpec;
2485 
2486 		fillShaderSpec(testCaseInfo, csSpec);
2487 
2488 		string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
2489 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", csSpec));
2490 	}
2491 }
2492 
fillShaderSpec(const TestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const2493 void ComputeTestGroupBuilder::fillShaderSpec(const TestCaseInfo& testCaseInfo,
2494 											 ComputeShaderSpec& csSpec) const
2495 {
2496 	// LUT storing functions used to verify test results
2497 	const VerifyIOFunc checkFloatsLUT[] =
2498 	{
2499 		checkFloats<Float16, deFloat16>,
2500 		checkFloats<Float32, float>,
2501 		checkFloats<Float64, double>
2502 	};
2503 
2504 	const Operation&			testOperation	= testCaseInfo.operation;
2505 	const OperationTestCase&	testCase		= testCaseInfo.testCase;
2506 	FloatType					outFloatType	= testCaseInfo.outFloatType;
2507 
2508 	SpecializedOperation specOpData;
2509 	specializeOperation(testCaseInfo, specOpData);
2510 
2511 	TypeSnippetsSP	inTypeSnippets		= specOpData.inTypeSnippets;
2512 	TypeSnippetsSP	outTypeSnippets		= specOpData.outTypeSnippets;
2513 	FloatType		inFloatType			= specOpData.inFloatType;
2514 
2515 	// UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
2516 	// internaly operates on fp16 and this type should be used by float controls
2517 	FloatType		inFloatTypeForCaps		= inFloatType;
2518 	string			inFloatWidthForCaps		= inTypeSnippets->bitWidth;
2519 	if (testCase.operationId == O_UPH_DENORM)
2520 	{
2521 		inFloatTypeForCaps	= FP16;
2522 		inFloatWidthForCaps	= "16";
2523 	}
2524 
2525 	string behaviorCapability;
2526 	string behaviorExecutionMode;
2527 	getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
2528 										  inFloatWidthForCaps,
2529 										  outTypeSnippets->bitWidth,
2530 										  behaviorCapability,
2531 										  behaviorExecutionMode);
2532 
2533 	string capabilities		= behaviorCapability + outTypeSnippets->capabilities;
2534 	string extensions		= outTypeSnippets->extensions;
2535 	string annotations		= inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet +
2536 							  outTypeSnippets->typeAnnotationsSnippet;
2537 	string types			= outTypeSnippets->typeDefinitionsSnippet;
2538 	string constants		= outTypeSnippets->constantsDefinitionsSnippet;
2539 	string ioDefinitions	= inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputDefinitionsSnippet;
2540 
2541 	if (testOperation.isInputTypeRestricted)
2542 	{
2543 		annotations		+= inTypeSnippets->typeAnnotationsSnippet;
2544 		capabilities	+= inTypeSnippets->capabilities;
2545 		extensions		+= inTypeSnippets->extensions;
2546 		types			+= inTypeSnippets->typeDefinitionsSnippet;
2547 		constants		+= inTypeSnippets->constantsDefinitionsSnippet;
2548 	}
2549 
2550 	map<string, string> specializations;
2551 	specializations["capabilities"]		= capabilities;
2552 	specializations["extensions"]		= extensions;
2553 	specializations["execution_mode"]	= behaviorExecutionMode;
2554 	specializations["annotations"]		= annotations + specOpData.annotations;
2555 	specializations["types"]			= types + specOpData.types;
2556 	specializations["constants"]		= constants + specOpData.constans;
2557 	specializations["io_definitions"]	= ioDefinitions;
2558 	specializations["arguments"]		= specOpData.arguments;
2559 	specializations["variables"]		= specOpData.variables;
2560 	specializations["commands"]			= specOpData.commands;
2561 	specializations["save_result"]		= outTypeSnippets->storeResultsSnippet;
2562 
2563 	// specialize shader
2564 	const string shaderCode = m_shaderTemplate.specialize(specializations);
2565 
2566 	// construct input and output buffers of proper types
2567 	TypeValuesSP inTypeValues	= m_typeData.at(inFloatType).values;
2568 	TypeValuesSP outTypeValues	= m_typeData.at(outFloatType).values;
2569 	BufferSp inBufferSp			= inTypeValues->constructInputBuffer(testCase.input);
2570 	BufferSp outBufferSp		= outTypeValues->constructOutputBuffer(testCase.expectedOutput);
2571 	csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2572 	csSpec.outputs.push_back(Resource(outBufferSp));
2573 
2574 	// check which format features are needed
2575 	bool float16FeatureRequired = (outFloatType == FP16) || (inFloatType == FP16);
2576 	bool float64FeatureRequired = (outFloatType == FP64) || (inFloatType == FP64);
2577 
2578 	setupVulkanFeatures(inFloatTypeForCaps,		// usualy same as inFloatType - different only for UnpackHalf2x16
2579 						outFloatType,
2580 						testCase.behaviorFlags,
2581 						float64FeatureRequired,
2582 						csSpec.requestedVulkanFeatures);
2583 
2584 	csSpec.assembly			= shaderCode;
2585 	csSpec.numWorkGroups	= IVec3(1, 1, 1);
2586 	csSpec.verifyIO			= checkFloatsLUT[outFloatType];
2587 
2588 	csSpec.extensions.push_back("VK_KHR_shader_float_controls");
2589 	if (float16FeatureRequired)
2590 	{
2591 		csSpec.extensions.push_back("VK_KHR_16bit_storage");
2592 		csSpec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
2593 	}
2594 	if (float64FeatureRequired)
2595 		csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
2596 }
2597 
getGraphicsShaderCode(vk::SourceCollections & dst,InstanceContext context)2598 void getGraphicsShaderCode (vk::SourceCollections& dst, InstanceContext context)
2599 {
2600 	// this function is used only by GraphicsTestGroupBuilder but it couldn't
2601 	// be implemented as a method because of how addFunctionCaseWithPrograms
2602 	// was implemented
2603 
2604 	SpirvVersion	targetSpirvVersion	= context.resources.spirvVersion;
2605 	const deUint32	vulkanVersion		= dst.usedVulkanVersion;
2606 
2607 	static const string vertexTemplate =
2608 		"OpCapability Shader\n"
2609 		"${vert_capabilities}"
2610 
2611 		"OpExtension \"SPV_KHR_float_controls\"\n"
2612 		"${vert_extensions}"
2613 
2614 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
2615 		"OpMemoryModel Logical GLSL450\n"
2616 		"OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex %BP_vertex_color %BP_vertex_result \n"
2617 		"${vert_execution_mode}"
2618 
2619 		"OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
2620 		"OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
2621 		"OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
2622 		"OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
2623 		"OpDecorate %BP_gl_PerVertex Block\n"
2624 		"OpDecorate %BP_position Location 0\n"
2625 		"OpDecorate %BP_color Location 1\n"
2626 		"OpDecorate %BP_vertex_color Location 1\n"
2627 		"OpDecorate %BP_vertex_result Location 2\n"
2628 		"OpDecorate %BP_vertex_result Flat\n"
2629 		"OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
2630 		"OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
2631 
2632 		// some tests require additional annotations
2633 		"${vert_annotations}"
2634 
2635 		// types required by most of tests
2636 		"%type_void            = OpTypeVoid\n"
2637 		"%type_voidf           = OpTypeFunction %type_void\n"
2638 		"%type_bool            = OpTypeBool\n"
2639 		"%type_i32             = OpTypeInt 32 1\n"
2640 		"%type_u32             = OpTypeInt 32 0\n"
2641 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
2642 		"%type_i32_iptr        = OpTypePointer Input %type_i32\n"
2643 		"%type_i32_optr        = OpTypePointer Output %type_i32\n"
2644 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
2645 
2646 		// constants required by most of tests
2647 		"%c_i32_0              = OpConstant %type_i32 0\n"
2648 		"%c_i32_1              = OpConstant %type_i32 1\n"
2649 		"%c_i32_2              = OpConstant %type_i32 2\n"
2650 		"%c_u32_1              = OpConstant %type_u32 1\n"
2651 
2652 		// if input float type has different width then output then
2653 		// both types are defined here along with all types derived from
2654 		// them that are commonly used by tests; some tests also define
2655 		// their own types (those that are needed just by this single test)
2656 		"${vert_types}"
2657 
2658 		// SSBO is not universally supported for storing
2659 		// data in vertex stages - it is onle read here
2660 		"${vert_io_definitions}"
2661 
2662 		"%BP_gl_PerVertex      = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
2663 		"%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
2664 		"%BP_stream            = OpVariable %BP_gl_PerVertex_optr Output\n"
2665 		"%BP_position          = OpVariable %type_f32_vec4_iptr Input\n"
2666 		"%BP_color             = OpVariable %type_f32_vec4_iptr Input\n"
2667 		"%BP_gl_VertexIndex    = OpVariable %type_i32_iptr Input\n"
2668 		"%BP_gl_InstanceIndex  = OpVariable %type_i32_iptr Input\n"
2669 		"%BP_vertex_color      = OpVariable %type_f32_vec4_optr Output\n"
2670 
2671 		// set of default constants per float type is placed here,
2672 		// operation tests can also define additional constants;
2673 		// note that O_RETURN_VAL defines function here and because
2674 		// of that this token needs to be directly before main function
2675 		"${vert_constants}"
2676 
2677 		"%main                 = OpFunction %type_void None %type_voidf\n"
2678 		"%label                = OpLabel\n"
2679 
2680 		"${vert_variables}"
2681 
2682 		"%position             = OpLoad %type_f32_vec4 %BP_position\n"
2683 		"%gl_pos               = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
2684 		"OpStore %gl_pos %position\n"
2685 		"%color                = OpLoad %type_f32_vec4 %BP_color\n"
2686 		"OpStore %BP_vertex_color %color\n"
2687 
2688 		// this token is filled only when vertex stage is tested;
2689 		// depending on test case arguments are either read from input ssbo
2690 		// or generated in spir-v code - in later case ssbo is not used
2691 		"${vert_arguments}"
2692 
2693 		// when vertex shader is tested then test operations are performed
2694 		// here and passed to fragment stage; if fragment stage ts tested
2695 		// then ${comands} and ${vert_process_result} are rplaced with nop
2696 		"${vert_commands}"
2697 
2698 		"${vert_process_result}"
2699 
2700 		"OpReturn\n"
2701 		"OpFunctionEnd\n";
2702 
2703 
2704 	static const string fragmentTemplate =
2705 		"OpCapability Shader\n"
2706 		"${frag_capabilities}"
2707 
2708 		"OpExtension \"SPV_KHR_float_controls\"\n"
2709 		"${frag_extensions}"
2710 
2711 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
2712 		"OpMemoryModel Logical GLSL450\n"
2713 		"OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
2714 		"OpExecutionMode %main OriginUpperLeft\n"
2715 		"${frag_execution_mode}"
2716 
2717 		"OpDecorate %BP_fragColor Location 0\n"
2718 		"OpDecorate %BP_vertex_color Location 1\n"
2719 		"OpDecorate %BP_vertex_result Location 2\n"
2720 		"OpDecorate %BP_vertex_result Flat\n"
2721 		"OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
2722 
2723 		// some tests require additional annotations
2724 		"${frag_annotations}"
2725 
2726 		// types required by most of tests
2727 		"%type_void            = OpTypeVoid\n"
2728 		"%type_voidf           = OpTypeFunction %type_void\n"
2729 		"%type_bool            = OpTypeBool\n"
2730 		"%type_i32             = OpTypeInt 32 1\n"
2731 		"%type_u32             = OpTypeInt 32 0\n"
2732 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
2733 		"%type_i32_iptr        = OpTypePointer Input %type_i32\n"
2734 		"%type_i32_optr        = OpTypePointer Output %type_i32\n"
2735 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
2736 
2737 		// constants required by most of tests
2738 		"%c_i32_0              = OpConstant %type_i32 0\n"
2739 		"%c_i32_1              = OpConstant %type_i32 1\n"
2740 		"%c_i32_2              = OpConstant %type_i32 2\n"
2741 		"%c_u32_1              = OpConstant %type_u32 1\n"
2742 
2743 		// if input float type has different width then output then
2744 		// both types are defined here along with all types derived from
2745 		// them that are commonly used by tests; some tests also define
2746 		// their own types (those that are needed just by this single test)
2747 		"${frag_types}"
2748 
2749 		"%BP_gl_FragCoord      = OpVariable %type_f32_vec4_iptr Input\n"
2750 		"%BP_vertex_color      = OpVariable %type_f32_vec4_iptr Input\n"
2751 		"%BP_fragColor         = OpVariable %type_f32_vec4_optr Output\n"
2752 
2753 		// SSBO definitions
2754 		"${frag_io_definitions}"
2755 
2756 		// set of default constants per float type is placed here,
2757 		// operation tests can also define additional constants;
2758 		// note that O_RETURN_VAL defines function here and because
2759 		// of that this token needs to be directly before main function
2760 		"${frag_constants}"
2761 
2762 		"%main                 = OpFunction %type_void None %type_voidf\n"
2763 		"%label                = OpLabel\n"
2764 
2765 		"${frag_variables}"
2766 
2767 		// just pass vertex color - rendered image is not important in our case
2768 		"%vertex_color         = OpLoad %type_f32_vec4 %BP_vertex_color\n"
2769 		"OpStore %BP_fragColor %vertex_color\n"
2770 
2771 		// this token is filled only when fragment stage is tested;
2772 		// depending on test case arguments are either read from input ssbo or
2773 		// generated in spir-v code - in later case ssbo is used only for output
2774 		"${frag_arguments}"
2775 
2776 		// when fragment shader is tested then test operations are performed
2777 		// here and saved to ssbo; if vertex stage was tested then its
2778 		// result is just saved to ssbo here
2779 		"${frag_commands}"
2780 		"${frag_process_result}"
2781 
2782 		"OpReturn\n"
2783 		"OpFunctionEnd\n";
2784 
2785 	dst.spirvAsmSources.add("vert", DE_NULL)
2786 		<< StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
2787 		<< SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
2788 	dst.spirvAsmSources.add("frag", DE_NULL)
2789 		<< StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
2790 		<< SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
2791 }
2792 
2793 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
2794 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
2795 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
2796 // because one of requirements during development was that SSBO wont be used in
2797 // vertex stage we couldn't use createTestForStage functions - we need a custom
2798 // version for both vertex and fragmen shaders at the same time. This was required
2799 // as we needed to pass result from vertex stage to fragment stage where it could
2800 // be saved to ssbo. To achieve that InstanceContext is created manually in
2801 // createInstanceContext method.
2802 class GraphicsTestGroupBuilder: public TestGroupBuilderBase
2803 {
2804 public:
2805 
2806 	void init();
2807 
2808 	void createTests(TestCaseGroup* group, FloatType floatType, bool argumentsFromInput);
2809 
2810 protected:
2811 
2812 	InstanceContext createInstanceContext(const TestCaseInfo& testCaseInfo) const;
2813 
2814 private:
2815 
2816 	TestCasesBuilder	m_testCaseBuilder;
2817 };
2818 
init()2819 void GraphicsTestGroupBuilder::init()
2820 {
2821 	m_testCaseBuilder.init();
2822 }
2823 
createTests(TestCaseGroup * group,FloatType floatType,bool argumentsFromInput)2824 void GraphicsTestGroupBuilder::createTests(TestCaseGroup* group, FloatType floatType, bool argumentsFromInput)
2825 {
2826 	// create test cases for vertex stage
2827 	TestCaseVect testCases;
2828 	m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
2829 
2830 	TestCaseVect::const_iterator currTestCase = testCases.begin();
2831 	TestCaseVect::const_iterator lastTestCase = testCases.end();
2832 	while(currTestCase != lastTestCase)
2833 	{
2834 		const OperationTestCase& testCase = *currTestCase;
2835 		++currTestCase;
2836 
2837 		// skip cases with undefined output
2838 		if (testCase.expectedOutput == V_UNUSED)
2839 			continue;
2840 
2841 		// FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
2842 		// argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
2843 		// PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
2844 		// in VS so this test case needs to be skiped for vertex stage.
2845 		if ((testCase.operationId == O_ORTZ_ROUND) || (testCase.operationId == O_ORTE_ROUND))
2846 			continue;
2847 
2848 		TestCaseInfo testCaseInfo =
2849 		{
2850 			floatType,
2851 			argumentsFromInput,
2852 			VK_SHADER_STAGE_VERTEX_BIT,
2853 			m_testCaseBuilder.getOperation(testCase.operationId),
2854 			testCase
2855 		};
2856 
2857 		InstanceContext ctxVertex	= createInstanceContext(testCaseInfo);
2858 		string			testName	= replace(testCase.baseName, "op", testCaseInfo.operation.name);
2859 
2860 		addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxVertex);
2861 	}
2862 
2863 	// create test cases for fragment stage
2864 	testCases.clear();
2865 	m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
2866 
2867 	currTestCase = testCases.begin();
2868 	lastTestCase = testCases.end();
2869 	while(currTestCase != lastTestCase)
2870 	{
2871 		const OperationTestCase& testCase = *currTestCase;
2872 		++currTestCase;
2873 
2874 		// skip cases with undefined output
2875 		if (testCase.expectedOutput == V_UNUSED)
2876 			continue;
2877 
2878 		TestCaseInfo testCaseInfo =
2879 		{
2880 			floatType,
2881 			argumentsFromInput,
2882 			VK_SHADER_STAGE_FRAGMENT_BIT,
2883 			m_testCaseBuilder.getOperation(testCase.operationId),
2884 			testCase
2885 		};
2886 
2887 		InstanceContext ctxFragment	= createInstanceContext(testCaseInfo);
2888 		string			testName	= replace(testCase.baseName, "op", testCaseInfo.operation.name);
2889 
2890 		addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxFragment);
2891 	}
2892 }
2893 
createInstanceContext(const TestCaseInfo & testCaseInfo) const2894 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const TestCaseInfo& testCaseInfo) const
2895 {
2896 	// LUT storing functions used to verify test results
2897 	const VerifyIOFunc checkFloatsLUT[] =
2898 	{
2899 		checkFloats<Float16, deFloat16>,
2900 		checkFloats<Float32, float>,
2901 		checkFloats<Float64, double>
2902 	};
2903 
2904 	// 32-bit float types are always needed for standard operations on color
2905 	// if tested operation does not require fp32 for either input or output
2906 	// then this minimal type definitions must be appended to types section
2907 	const string f32TypeMinimalRequired =
2908 		"%type_f32             = OpTypeFloat 32\n"
2909 		"%type_f32_arr_1       = OpTypeArray %type_f32 %c_i32_1\n"
2910 		"%type_f32_iptr        = OpTypePointer Input %type_f32\n"
2911 		"%type_f32_optr        = OpTypePointer Output %type_f32\n"
2912 		"%type_f32_vec4        = OpTypeVector %type_f32 4\n"
2913 		"%type_f32_vec4_iptr   = OpTypePointer Input %type_f32_vec4\n"
2914 		"%type_f32_vec4_optr   = OpTypePointer Output %type_f32_vec4\n";
2915 
2916 	const Operation&			testOperation	= testCaseInfo.operation;
2917 	const OperationTestCase&	testCase		= testCaseInfo.testCase;
2918 	FloatType					outFloatType	= testCaseInfo.outFloatType;
2919 	VkShaderStageFlagBits		testedStage		= testCaseInfo.testedStage;
2920 
2921 	DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
2922 
2923 	SpecializedOperation specOpData;
2924 	specializeOperation(testCaseInfo, specOpData);
2925 
2926 	TypeSnippetsSP	inTypeSnippets		= specOpData.inTypeSnippets;
2927 	TypeSnippetsSP	outTypeSnippets		= specOpData.outTypeSnippets;
2928 	FloatType		inFloatType			= specOpData.inFloatType;
2929 
2930 	// UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
2931 	// internaly operates on fp16 and this type should be used by float controls
2932 	FloatType		inFloatTypeForCaps		= inFloatType;
2933 	string			inFloatWidthForCaps		= inTypeSnippets->bitWidth;
2934 	if (testCase.operationId == O_UPH_DENORM)
2935 	{
2936 		inFloatTypeForCaps	= FP16;
2937 		inFloatWidthForCaps	= "16";
2938 	}
2939 
2940 	string behaviorCapability;
2941 	string behaviorExecutionMode;
2942 	getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
2943 										  inFloatWidthForCaps,
2944 										  outTypeSnippets->bitWidth,
2945 										  behaviorCapability,
2946 										  behaviorExecutionMode);
2947 
2948 	// check which format features are needed
2949 	bool float16FeatureRequired = (inFloatType == FP16) || (outFloatType == FP16);
2950 	bool float64FeatureRequired = (inFloatType == FP64) || (outFloatType == FP64);
2951 
2952 	string vertExecutionMode;
2953 	string fragExecutionMode;
2954 	string vertCapabilities;
2955 	string fragCapabilities;
2956 	string vertExtensions;
2957 	string fragExtensions;
2958 	string vertAnnotations;
2959 	string fragAnnotations;
2960 	string vertTypes;
2961 	string fragTypes;
2962 	string vertConstants;
2963 	string fragConstants;
2964 	string vertIODefinitions;
2965 	string fragIODefinitions;
2966 	string vertArguments;
2967 	string fragArguments;
2968 	string vertVariables;
2969 	string fragVariables;
2970 	string vertCommands;
2971 	string fragCommands;
2972 	string vertProcessResult;
2973 	string fragProcessResult;
2974 
2975 	// check if operation should be executed in vertex stage
2976 	if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
2977 	{
2978 		vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
2979 		fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
2980 
2981 		// check if input type is different from tested type (conversion operations)
2982 		if (testOperation.isInputTypeRestricted)
2983 		{
2984 			vertCapabilities	= behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
2985 			fragCapabilities	= outTypeSnippets->capabilities;
2986 			vertExtensions		= inTypeSnippets->extensions + outTypeSnippets->extensions;
2987 			fragExtensions		= outTypeSnippets->extensions;
2988 			vertTypes			= inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
2989 			fragTypes			= outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
2990 			vertConstants		= inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
2991 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
2992 		}
2993 		else
2994 		{
2995 			// input and output types are the same (majority of operations)
2996 
2997 			vertCapabilities	= behaviorCapability + outTypeSnippets->capabilities;
2998 			fragCapabilities	= vertCapabilities;
2999 			vertExtensions		= outTypeSnippets->extensions;
3000 			fragExtensions		= vertExtensions;
3001 			vertTypes			= outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
3002 			fragTypes			= vertTypes;
3003 			vertConstants		= outTypeSnippets->constantsDefinitionsSnippet;
3004 			fragConstants		= vertConstants;
3005 		}
3006 
3007 		if (outFloatType != FP32)
3008 		{
3009 			fragTypes += f32TypeMinimalRequired;
3010 			if (inFloatType != FP32)
3011 				vertTypes += f32TypeMinimalRequired;
3012 		}
3013 
3014 		vertAnnotations += specOpData.annotations;
3015 		vertTypes		+= specOpData.types;
3016 		vertConstants	+= specOpData.constans;
3017 
3018 		vertExecutionMode		= behaviorExecutionMode;
3019 		fragExecutionMode		= "";
3020 		vertIODefinitions		= inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
3021 		fragIODefinitions		= outTypeSnippets->outputDefinitionsSnippet + outTypeSnippets->inputVaryingsSnippet;
3022 		vertArguments			= specOpData.arguments;
3023 		fragArguments			= "";
3024 		vertVariables			= specOpData.variables;
3025 		fragVariables			= "";
3026 		vertCommands			= specOpData.commands;
3027 		fragCommands			= "";
3028 		vertProcessResult		= outTypeSnippets->storeVertexResultSnippet;
3029 		fragProcessResult		= outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
3030 	}
3031 	else // perform test in fragment stage - vertex stage is empty
3032 	{
3033 		// check if input type is different from tested type
3034 		if (testOperation.isInputTypeRestricted)
3035 		{
3036 			fragAnnotations		= inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
3037 								  outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
3038 			fragCapabilities	= behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
3039 			fragExtensions		= inTypeSnippets->extensions + outTypeSnippets->extensions;
3040 			fragTypes			= inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
3041 			fragConstants		= inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
3042 		}
3043 		else
3044 		{
3045 			// input and output types are the same
3046 
3047 			fragAnnotations		= inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
3048 								  outTypeSnippets->outputAnnotationsSnippet;
3049 			fragCapabilities	= behaviorCapability + outTypeSnippets->capabilities;
3050 			fragExtensions		= outTypeSnippets->extensions;
3051 			fragTypes			= outTypeSnippets->typeDefinitionsSnippet;
3052 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
3053 		}
3054 
3055 		// varying is not used but it needs to be specified so lets use type_i32 for it
3056 		string dummyVertVarying = "%BP_vertex_result     = OpVariable %type_i32_optr Output\n";
3057 		string dummyFragVarying = "%BP_vertex_result     = OpVariable %type_i32_iptr Input\n";
3058 
3059 		vertCapabilities	= "";
3060 		vertExtensions		= "";
3061 		vertAnnotations		= "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
3062 		vertTypes			= f32TypeMinimalRequired;
3063 		vertConstants		= "";
3064 
3065 		if ((outFloatType != FP32) && (inFloatType != FP32))
3066 			fragTypes += f32TypeMinimalRequired;
3067 
3068 		fragAnnotations += specOpData.annotations;
3069 		fragTypes		+= specOpData.types;
3070 		fragConstants	+= specOpData.constans;
3071 
3072 		vertExecutionMode	= "";
3073 		fragExecutionMode	= behaviorExecutionMode;
3074 		vertIODefinitions	= dummyVertVarying;
3075 		fragIODefinitions	= inTypeSnippets->inputDefinitionsSnippet +
3076 							  outTypeSnippets->outputDefinitionsSnippet + dummyFragVarying;
3077 		vertArguments		= "";
3078 		fragArguments		= specOpData.arguments;
3079 		vertVariables		= "";
3080 		fragVariables		= specOpData.variables;
3081 		vertCommands		= "";
3082 		fragCommands		= specOpData.commands;
3083 		vertProcessResult	= "";
3084 		fragProcessResult	= outTypeSnippets->storeResultsSnippet;
3085 	}
3086 
3087 	map<string, string> specializations;
3088 	specializations["vert_capabilities"]	= vertCapabilities;
3089 	specializations["vert_extensions"]		= vertExtensions;
3090 	specializations["vert_execution_mode"]	= vertExecutionMode;
3091 	specializations["vert_annotations"]		= vertAnnotations;
3092 	specializations["vert_types"]			= vertTypes;
3093 	specializations["vert_constants"]		= vertConstants;
3094 	specializations["vert_io_definitions"]	= vertIODefinitions;
3095 	specializations["vert_arguments"]		= vertArguments;
3096 	specializations["vert_variables"]		= vertVariables;
3097 	specializations["vert_commands"]		= vertCommands;
3098 	specializations["vert_process_result"]	= vertProcessResult;
3099 	specializations["frag_capabilities"]	= fragCapabilities;
3100 	specializations["frag_extensions"]		= fragExtensions;
3101 	specializations["frag_execution_mode"]	= fragExecutionMode;
3102 	specializations["frag_annotations"]		= fragAnnotations;
3103 	specializations["frag_types"]			= fragTypes;
3104 	specializations["frag_constants"]		= fragConstants;
3105 	specializations["frag_io_definitions"]	= fragIODefinitions;
3106 	specializations["frag_arguments"]		= fragArguments;
3107 	specializations["frag_variables"]		= fragVariables;
3108 	specializations["frag_commands"]		= fragCommands;
3109 	specializations["frag_process_result"]	= fragProcessResult;
3110 
3111 	// colors are not used by the test - input is passed via uniform buffer
3112 	RGBA defaultColors[4] = { RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue() };
3113 
3114 	// construct input and output buffers of proper types
3115 	TypeValuesSP inTypeValues	= m_typeData.at(inFloatType).values;
3116 	TypeValuesSP outTypeValues	= m_typeData.at(outFloatType).values;
3117 	BufferSp inBufferSp			= inTypeValues->constructInputBuffer(testCase.input);
3118 	BufferSp outBufferSp		= outTypeValues->constructOutputBuffer(testCase.expectedOutput);
3119 
3120 	vkt::SpirVAssembly::GraphicsResources resources;
3121 	resources.inputs.push_back( Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3122 	resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3123 	resources.verifyIO = checkFloatsLUT[outFloatType];
3124 
3125 	StageToSpecConstantMap	noSpecConstants;
3126 	PushConstants			noPushConstants;
3127 	GraphicsInterfaces		noInterfaces;
3128 
3129 	VulkanFeatures vulkanFeatures;
3130 	setupVulkanFeatures(inFloatTypeForCaps,		// usualy same as inFloatType - different only for UnpackHalf2x16
3131 						outFloatType,
3132 						testCase.behaviorFlags,
3133 						float64FeatureRequired,
3134 						vulkanFeatures);
3135 	vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
3136 
3137 	vector<string> extensions;
3138 	extensions.push_back("VK_KHR_shader_float_controls");
3139 	if (float16FeatureRequired)
3140 	{
3141 		extensions.push_back("VK_KHR_16bit_storage");
3142 		vulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3143 	}
3144 
3145 	InstanceContext ctx(defaultColors,
3146 						defaultColors,
3147 						specializations,
3148 						noSpecConstants,
3149 						noPushConstants,
3150 						resources,
3151 						noInterfaces,
3152 						extensions,
3153 						vulkanFeatures,
3154 						testedStage);
3155 
3156 	ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
3157 	ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
3158 
3159 	ctx.requiredStages			= static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
3160 	ctx.failResult				= QP_TEST_RESULT_FAIL;
3161 	ctx.failMessageTemplate		= "Output doesn't match with expected";
3162 
3163 	return ctx;
3164 }
3165 
3166 } // anonymous
3167 
createFloatControlsTestGroup(TestContext & testCtx,TestGroupBuilderBase * groupBuilder)3168 tcu::TestCaseGroup* createFloatControlsTestGroup (TestContext& testCtx, TestGroupBuilderBase* groupBuilder)
3169 {
3170 	de::MovePtr<TestCaseGroup>	group(new TestCaseGroup(testCtx, "float_controls", "Tests for VK_KHR_shader_float_controls extension"));
3171 
3172 	struct TestGroup
3173 	{
3174 		FloatType		floatType;
3175 		const char*		groupName;
3176 	};
3177 	TestGroup testGroups[] =
3178 	{
3179 		{ FP16, "fp16" },
3180 		{ FP32, "fp32" },
3181 		{ FP64, "fp64" },
3182 	};
3183 
3184 	for (int i = 0 ; i < DE_LENGTH_OF_ARRAY(testGroups) ; ++i)
3185 	{
3186 		const TestGroup& testGroup = testGroups[i];
3187 		TestCaseGroup* typeGroup = new TestCaseGroup(testCtx, testGroup.groupName, "");
3188 		group->addChild(typeGroup);
3189 
3190 		TestCaseGroup* inputArgsGroup = new TestCaseGroup(testCtx, "input_args", "");
3191 		groupBuilder->createTests(inputArgsGroup, testGroup.floatType, true);
3192 		typeGroup->addChild(inputArgsGroup);
3193 
3194 		TestCaseGroup* generatedArgsGroup = new TestCaseGroup(testCtx, "generated_args", "");
3195 		groupBuilder->createTests(generatedArgsGroup, testGroup.floatType, false);
3196 		typeGroup->addChild(generatedArgsGroup);
3197 	}
3198 
3199 	return group.release();
3200 }
3201 
createFloatControlsComputeGroup(TestContext & testCtx)3202 tcu::TestCaseGroup* createFloatControlsComputeGroup (TestContext& testCtx)
3203 {
3204 	ComputeTestGroupBuilder computeTestGroupBuilder;
3205 	computeTestGroupBuilder.init();
3206 
3207 	return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
3208 }
3209 
createFloatControlsGraphicsGroup(TestContext & testCtx)3210 tcu::TestCaseGroup* createFloatControlsGraphicsGroup (TestContext& testCtx)
3211 {
3212 	GraphicsTestGroupBuilder graphicsTestGroupBuilder;
3213 	graphicsTestGroupBuilder.init();
3214 
3215 	return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
3216 }
3217 
3218 } // SpirVAssembly
3219 } // vkt
3220