1 #ifndef _VKTSPVASMCOMPUTESHADERTESTUTIL_HPP
2 #define _VKTSPVASMCOMPUTESHADERTESTUTIL_HPP
3 /*-------------------------------------------------------------------------
4  * Vulkan Conformance Tests
5  * ------------------------
6  *
7  * Copyright (c) 2015 Google Inc.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Compute Shader Based Test Case Utility Structs/Functions
24  *//*--------------------------------------------------------------------*/
25 
26 #include "deDefs.h"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuTestLog.hpp"
30 #include "tcuVector.hpp"
31 #include "tcuTestLog.hpp"
32 #include "vkMemUtil.hpp"
33 #include "vktSpvAsmUtils.hpp"
34 
35 #include <string>
36 #include <vector>
37 #include <map>
38 
39 using namespace vk;
40 
41 namespace vkt
42 {
43 namespace SpirVAssembly
44 {
45 
46 enum OpAtomicType
47 {
48 	OPATOMIC_IADD = 0,
49 	OPATOMIC_ISUB,
50 	OPATOMIC_IINC,
51 	OPATOMIC_IDEC,
52 	OPATOMIC_LOAD,
53 	OPATOMIC_STORE,
54 	OPATOMIC_COMPEX,
55 
56 	OPATOMIC_LAST
57 };
58 
59 enum BufferType
60 {
61 	BUFFERTYPE_INPUT = 0,
62 	BUFFERTYPE_EXPECTED,
63 	BUFFERTYPE_ATOMIC_RET,
64 
65 	BUFFERTYPE_LAST
66 };
67 
fillRandomScalars(de::Random & rnd,deInt32 minValue,deInt32 maxValue,deInt32 * dst,deInt32 numValues)68 static void fillRandomScalars (de::Random& rnd, deInt32 minValue, deInt32 maxValue, deInt32* dst, deInt32 numValues)
69 {
70 	for (int i = 0; i < numValues; i++)
71 		dst[i] = rnd.getInt(minValue, maxValue);
72 }
73 
74 /*--------------------------------------------------------------------*//*!
75 * \brief Concrete class for an input/output storage buffer object used for OpAtomic tests
76 *//*--------------------------------------------------------------------*/
77 class OpAtomicBuffer : public BufferInterface
78 {
79 public:
OpAtomicBuffer(const deUint32 numInputElements,const deUint32 numOuptutElements,const OpAtomicType opAtomic,const BufferType type)80 						OpAtomicBuffer		(const deUint32 numInputElements, const deUint32 numOuptutElements, const OpAtomicType opAtomic, const BufferType type)
81 							: m_numInputElements	(numInputElements)
82 							, m_numOutputElements	(numOuptutElements)
83 							, m_opAtomic			(opAtomic)
84 							, m_type				(type)
85 						{}
86 
getBytes(std::vector<deUint8> & bytes) const87 	void getBytes (std::vector<deUint8>& bytes) const
88 	{
89 		std::vector<deInt32>	inputInts	(m_numInputElements, 0);
90 		de::Random				rnd			(m_opAtomic);
91 
92 		fillRandomScalars(rnd, 1, 100, &inputInts.front(), m_numInputElements);
93 
94 		// Return input values as is
95 		if (m_type == BUFFERTYPE_INPUT)
96 		{
97 			size_t					inputSize	= m_numInputElements * sizeof(deInt32);
98 
99 			bytes.resize(inputSize);
100 			deMemcpy(&bytes.front(), &inputInts.front(), inputSize);
101 		}
102 		// Calculate expected output values
103 		else if (m_type == BUFFERTYPE_EXPECTED)
104 		{
105 			size_t					outputSize	= m_numOutputElements * sizeof(deInt32);
106 			bytes.resize(outputSize, 0xffu);
107 
108 			for (size_t ndx = 0; ndx < m_numInputElements; ndx++)
109 			{
110 				deInt32* const bytesAsInt = reinterpret_cast<deInt32*>(&bytes.front());
111 
112 				switch (m_opAtomic)
113 				{
114 					case OPATOMIC_IADD:		bytesAsInt[0] += inputInts[ndx];						break;
115 					case OPATOMIC_ISUB:		bytesAsInt[0] -= inputInts[ndx];						break;
116 					case OPATOMIC_IINC:		bytesAsInt[0]++;										break;
117 					case OPATOMIC_IDEC:		bytesAsInt[0]--;										break;
118 					case OPATOMIC_LOAD:		bytesAsInt[ndx] = inputInts[ndx];						break;
119 					case OPATOMIC_STORE:	bytesAsInt[ndx] = inputInts[ndx];						break;
120 					case OPATOMIC_COMPEX:	bytesAsInt[ndx] = (inputInts[ndx] % 2) == 0 ? -1 : 1;	break;
121 					default:				DE_FATAL("Unknown OpAtomic type");
122 				}
123 			}
124 		}
125 		else if (m_type == BUFFERTYPE_ATOMIC_RET)
126 		{
127 			bytes.resize(m_numInputElements * sizeof(deInt32), 0xff);
128 
129 			if (m_opAtomic == OPATOMIC_COMPEX)
130 			{
131 				deInt32* const bytesAsInt = reinterpret_cast<deInt32*>(&bytes.front());
132 				for (size_t ndx = 0; ndx < m_numInputElements; ndx++)
133 					bytesAsInt[ndx] = inputInts[ndx] % 2;
134 			}
135 		}
136 		else
137 			DE_FATAL("Unknown buffer type");
138 	}
139 
getPackedBytes(std::vector<deUint8> & bytes) const140 	void getPackedBytes (std::vector<deUint8>& bytes) const
141 	{
142 		return getBytes(bytes);
143 	}
144 
getByteSize(void) const145 	size_t getByteSize (void) const
146 	{
147 		switch (m_type)
148 		{
149 			case BUFFERTYPE_ATOMIC_RET:
150 			case BUFFERTYPE_INPUT:
151 				return m_numInputElements * sizeof(deInt32);
152 			case BUFFERTYPE_EXPECTED:
153 				return m_numOutputElements * sizeof(deInt32);
154 			default:
155 				DE_FATAL("Unknown buffer type");
156 				return 0;
157 		}
158 	}
159 
160 	template <int OpAtomic>
compareWithRetvals(const std::vector<Resource> & inputs,const std::vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)161 	static bool compareWithRetvals (const std::vector<Resource>& inputs, const std::vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, tcu::TestLog& log)
162 	{
163 		if (outputAllocs.size() != 2 || inputs.size() != 1)
164 			DE_FATAL("Wrong number of buffers to compare");
165 
166 		for (size_t i = 0; i < outputAllocs.size(); ++i)
167 		{
168 			const deUint32*	values = reinterpret_cast<deUint32*>(outputAllocs[i]->getHostPtr());
169 
170 			if (i == 1 && OpAtomic != OPATOMIC_COMPEX)
171 			{
172 				// BUFFERTYPE_ATOMIC_RET for arithmetic operations must be verified manually by matching return values to inputs
173 				std::vector<deUint8>	inputBytes;
174 				inputs[0].getBytes(inputBytes);
175 
176 				const deUint32*			inputValues			= reinterpret_cast<deUint32*>(&inputBytes.front());
177 				const size_t			inputValuesCount	= inputBytes.size() / sizeof(deUint32);
178 
179 				// result of all atomic operations
180 				const deUint32			resultValue			= *reinterpret_cast<deUint32*>(outputAllocs[0]->getHostPtr());
181 
182 				if (!compareRetVals<OpAtomic>(inputValues, inputValuesCount, resultValue, values))
183 				{
184 					log << tcu::TestLog::Message << "Wrong contents of buffer with return values after atomic operation." << tcu::TestLog::EndMessage;
185 					return false;
186 				}
187 			}
188 			else
189 			{
190 				const BufferSp&			expectedOutput = expectedOutputs[i].getBuffer();
191 				std::vector<deUint8>	expectedBytes;
192 
193 				expectedOutput->getBytes(expectedBytes);
194 
195 				if (deMemCmp(&expectedBytes.front(), values, expectedBytes.size()))
196 				{
197 					log << tcu::TestLog::Message << "Wrong contents of buffer after atomic operation" << tcu::TestLog::EndMessage;
198 					return false;
199 				}
200 			}
201 		}
202 		return true;
203 	}
204 
205 	template <int OpAtomic>
compareRetVals(const deUint32 * inputValues,const size_t inputValuesCount,const deUint32 resultValue,const deUint32 * returnValues)206 	static bool compareRetVals (const deUint32* inputValues, const size_t inputValuesCount, const deUint32 resultValue, const deUint32* returnValues)
207 	{
208 		// as the order of execution is undefined, validation of return values for atomic operations is tricky:
209 		// each inputValue stands for one atomic operation. Iterate through all of
210 		// done operations in time, each time finding one matching current result and un-doing it.
211 
212 		std::vector<bool>		operationsUndone (inputValuesCount, false);
213 		deUint32				currentResult	 = resultValue;
214 
215 		for (size_t operationUndone = 0; operationUndone < inputValuesCount; ++operationUndone)
216 		{
217 			// find which of operations was done at this moment
218 			size_t ndx;
219 			for (ndx = 0; ndx < inputValuesCount; ++ndx)
220 			{
221 				if (operationsUndone[ndx]) continue;
222 
223 				deUint32 previousResult = currentResult;
224 
225 				switch (OpAtomic)
226 				{
227 					// operations are undone here, so the actual opeation is reversed
228 					case OPATOMIC_IADD:		previousResult -= inputValues[ndx];						break;
229 					case OPATOMIC_ISUB:		previousResult += inputValues[ndx];						break;
230 					case OPATOMIC_IINC:		previousResult--;										break;
231 					case OPATOMIC_IDEC:		previousResult++;										break;
232 					default:				DE_FATAL("Unsupported OpAtomic type for return value compare");
233 				}
234 
235 				if (previousResult == returnValues[ndx])
236 				{
237 					// found matching operation
238 					currentResult			= returnValues[ndx];
239 					operationsUndone[ndx]	= true;
240 					break;
241 				}
242 			}
243 			if (ndx == inputValuesCount)
244 			{
245 				// no operation matches the current result value
246 				return false;
247 			}
248 		}
249 		return true;
250 	}
251 
252 private:
253 	const deUint32		m_numInputElements;
254 	const deUint32		m_numOutputElements;
255 	const OpAtomicType	m_opAtomic;
256 	const BufferType	m_type;
257 };
258 
259 /*--------------------------------------------------------------------*//*!
260  * \brief Concrete class for an input/output storage buffer object
261  *//*--------------------------------------------------------------------*/
262 template<typename E>
263 class Buffer : public BufferInterface
264 {
265 public:
Buffer(const std::vector<E> & elements,deUint32 padding=0)266 	Buffer	(const std::vector<E>& elements, deUint32 padding = 0 /* in bytes */)
267 			: m_elements(elements)
268 			, m_padding(padding)
269 			{}
270 
getBytes(std::vector<deUint8> & bytes) const271 	void getBytes (std::vector<deUint8>& bytes) const
272 	{
273 		const size_t	count			= m_elements.size();
274 		const size_t	perSegmentSize	= sizeof(E) + m_padding;
275 		const size_t	size			= count * perSegmentSize;
276 
277 		bytes.resize(size);
278 
279 		if (m_padding == 0)
280 		{
281 			deMemcpy(&bytes.front(), &m_elements.front(), size);
282 		}
283 		else
284 		{
285 			deMemset(&bytes.front(), 0xff, size);
286 
287 			for (deUint32 elementIdx = 0; elementIdx < count; ++elementIdx)
288 				deMemcpy(&bytes[elementIdx * perSegmentSize], &m_elements[elementIdx], sizeof(E));
289 		}
290 	}
291 
getPackedBytes(std::vector<deUint8> & bytes) const292 	void getPackedBytes (std::vector<deUint8>& bytes) const
293 	{
294 		const size_t size = m_elements.size() * sizeof(E);
295 
296 		bytes.resize(size);
297 
298 		deMemcpy(&bytes.front(), &m_elements.front(), size);
299 	}
300 
getByteSize(void) const301 	size_t getByteSize (void) const
302 	{
303 		return m_elements.size() * (sizeof(E) + m_padding);
304 	}
305 
306 private:
307 	std::vector<E>		m_elements;
308 	deUint32			m_padding;
309 };
310 
311 DE_STATIC_ASSERT(sizeof(tcu::Vec4) == 4 * sizeof(float));
312 
313 typedef Buffer<float>		Float32Buffer;
314 typedef Buffer<deFloat16>	Float16Buffer;
315 typedef Buffer<double>		Float64Buffer;
316 typedef Buffer<deInt64>		Int64Buffer;
317 typedef Buffer<deInt32>		Int32Buffer;
318 typedef Buffer<deInt16>		Int16Buffer;
319 typedef Buffer<deInt8>		Int8Buffer;
320 typedef Buffer<deUint8>		Uint8Buffer;
321 typedef Buffer<deUint16>	Uint16Buffer;
322 typedef Buffer<deUint32>	Uint32Buffer;
323 typedef Buffer<deUint64>	Uint64Buffer;
324 typedef Buffer<tcu::Vec4>	Vec4Buffer;
325 
326 typedef bool (*ComputeVerifyBinaryFunc) (const ProgramBinary&	binary);
327 
328 /*--------------------------------------------------------------------*//*!
329  * \brief Specification for a compute shader.
330  *
331  * This struct bundles SPIR-V assembly code, input and expected output
332  * together.
333  *//*--------------------------------------------------------------------*/
334 struct ComputeShaderSpec
335 {
336 	std::string								assembly;
337 	std::string								entryPoint;
338 	std::vector<Resource>					inputs;
339 	std::vector<Resource>					outputs;
340 	tcu::IVec3								numWorkGroups;
341 	SpecConstants							specConstants;
342 	BufferSp								pushConstants;
343 	std::vector<std::string>				extensions;
344 	VulkanFeatures							requestedVulkanFeatures;
345 	qpTestResult							failResult;
346 	std::string								failMessage;
347 	// If null, a default verification will be performed by comparing the memory pointed to by outputAllocations
348 	// and the contents of expectedOutputs. Otherwise the function pointed to by verifyIO will be called.
349 	// If true is returned, then the test case is assumed to have passed, if false is returned, then the test
350 	// case is assumed to have failed. Exact meaning of failure can be customized with failResult.
351 	VerifyIOFunc							verifyIO;
352 	ComputeVerifyBinaryFunc					verifyBinary;
353 	SpirvVersion							spirvVersion;
354 	bool									coherentMemory;
355 
ComputeShaderSpecvkt::SpirVAssembly::ComputeShaderSpec356 											ComputeShaderSpec (void)
357 												: entryPoint					("main")
358 												, pushConstants					(DE_NULL)
359 												, requestedVulkanFeatures		()
360 												, failResult					(QP_TEST_RESULT_FAIL)
361 												, failMessage					("Output doesn't match with expected")
362 												, verifyIO						(DE_NULL)
363 												, verifyBinary					(DE_NULL)
364 												, spirvVersion					(SPIRV_VERSION_1_0)
365 												, coherentMemory				(false)
366 											{}
367 };
368 
369 /*--------------------------------------------------------------------*//*!
370  * \brief Helper functions for SPIR-V assembly shared by various tests
371  *//*--------------------------------------------------------------------*/
372 
373 std::string getComputeAsmShaderPreamble				(const std::string& capabilities = "", const std::string& extensions = "", const std::string& exeModes = "");
374 const char* getComputeAsmShaderPreambleWithoutLocalSize         (void);
375 std::string getComputeAsmCommonTypes				(std::string blockStorageClass = "Uniform");
376 const char*	getComputeAsmCommonInt64Types			(void);
377 
378 /*--------------------------------------------------------------------*//*!
379  * Declares two uniform variables (indata, outdata) of type
380  * "struct { float[] }". Depends on type "f32arr" (for "float[]").
381  *//*--------------------------------------------------------------------*/
382 const char* getComputeAsmInputOutputBuffer			(void);
383 /*--------------------------------------------------------------------*//*!
384  * Declares buffer type and layout for uniform variables indata and
385  * outdata. Both of them are SSBO bounded to descriptor set 0.
386  * indata is at binding point 0, while outdata is at 1.
387  *//*--------------------------------------------------------------------*/
388 const char* getComputeAsmInputOutputBufferTraits	(void);
389 
390 bool verifyOutput									(const std::vector<Resource>&,
391 													const std::vector<AllocationSp>&	outputAllocs,
392 													const std::vector<Resource>&		expectedOutputs,
393 													tcu::TestLog&						log);
394 
395 													// Creates vertex-shader assembly by specializing a boilerplate StringTemplate
396 
397 std::string makeComputeShaderAssembly(const std::map<std::string, std::string>& fragments);
398 
399 } // SpirVAssembly
400 } // vkt
401 
402 #endif // _VKTSPVASMCOMPUTESHADERTESTUTIL_HPP
403