1 #ifndef _TCUFLOAT_HPP
2 #define _TCUFLOAT_HPP
3 /*-------------------------------------------------------------------------
4  * drawElements Quality Program Tester Core
5  * ----------------------------------------
6  *
7  * Copyright 2014 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Reconfigurable floating-point value template.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "tcuDefs.hpp"
27 
28 // For memcpy().
29 #include <string.h>
30 
31 namespace tcu
32 {
33 
34 enum FloatFlags
35 {
36 	FLOAT_HAS_SIGN			= (1<<0),
37 	FLOAT_SUPPORT_DENORM	= (1<<1)
38 };
39 
40 /*--------------------------------------------------------------------*//*!
41  * \brief Floating-point format template
42  *
43  * This template implements arbitrary floating-point handling. Template
44  * can be used for conversion between different formats and checking
45  * various properties of floating-point values.
46  *//*--------------------------------------------------------------------*/
47 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
48 class Float
49 {
50 public:
51 	typedef StorageType_ StorageType;
52 
53 	enum
54 	{
55 		EXPONENT_BITS	= ExponentBits,
56 		MANTISSA_BITS	= MantissaBits,
57 		EXPONENT_BIAS	= ExponentBias,
58 		FLAGS			= Flags,
59 	};
60 
61 							Float			(void);
62 	explicit				Float			(StorageType value);
63 	explicit				Float			(float v);
64 	explicit				Float			(double v);
65 
66 	template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
67 	static Float			convert			(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src);
68 
convert(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & src)69 	static inline Float		convert			(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; }
70 
71 	/*--------------------------------------------------------------------*//*!
72 	 * \brief Construct floating point value
73 	 * \param sign		Sign. Must be +1/-1
74 	 * \param exponent	Exponent in range [1-ExponentBias, ExponentBias+1]
75 	 * \param mantissa	Mantissa bits with implicit leading bit explicitly set
76 	 * \return The specified float
77 	 *
78 	 * This function constructs a floating point value from its inputs.
79 	 * The normally implicit leading bit of the mantissa must be explicitly set.
80 	 * The exponent normally used for zero/subnormals is an invalid input. Such
81 	 * values are specified with the leading mantissa bit of zero and the lowest
82 	 * normal exponent (1-ExponentBias). Additionally having both exponent and
83 	 * mantissa set to zero is a shorthand notation for the correctly signed
84 	 * floating point zero. Inf and NaN must be specified directly with an
85 	 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
86 	 * bit set)
87 	 *//*--------------------------------------------------------------------*/
88 	static inline Float		construct		(int sign, int exponent, StorageType mantissa);
89 
90 	/*--------------------------------------------------------------------*//*!
91 	 * \brief Construct floating point value. Explicit version
92 	 * \param sign		Sign. Must be +1/-1
93 	 * \param exponent	Exponent in range [-ExponentBias, ExponentBias+1]
94 	 * \param mantissa	Mantissa bits
95 	 * \return The specified float
96 	 *
97 	 * This function constructs a floating point value from its inputs with
98 	 * minimal intervention.
99 	 * The sign is turned into a sign bit and the exponent bias is added.
100 	 * See IEEE-754 for additional information on the inputs and
101 	 * the encoding of special values.
102 	 *//*--------------------------------------------------------------------*/
103 	static Float			constructBits	(int sign, int exponent, StorageType mantissaBits);
104 
bits(void) const105 	StorageType				bits			(void) const	{ return m_value;															}
106 	float					asFloat			(void) const;
107 	double					asDouble		(void) const;
108 
signBit(void) const109 	inline int				signBit			(void) const	{ return (int)(m_value >> (ExponentBits+MantissaBits)) & 1;					}
exponentBits(void) const110 	inline StorageType		exponentBits	(void) const	{ return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1);	}
mantissaBits(void) const111 	inline StorageType		mantissaBits	(void) const	{ return m_value & ((StorageType(1)<<MantissaBits)-1);						}
112 
sign(void) const113 	inline int				sign			(void) const	{ return signBit() ? -1 : 1;																			}
exponent(void) const114 	inline int				exponent		(void) const	{ return isDenorm() ? 1	- ExponentBias : (int)exponentBits() - ExponentBias;							}
mantissa(void) const115 	inline StorageType		mantissa		(void) const	{ return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits));	}
116 
isInf(void) const117 	inline bool				isInf			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() == 0;	}
isNaN(void) const118 	inline bool				isNaN			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() != 0;	}
isZero(void) const119 	inline bool				isZero			(void) const	{ return exponentBits() == 0						&& mantissaBits() == 0;	}
isDenorm(void) const120 	inline bool				isDenorm		(void) const	{ return exponentBits() == 0						&& mantissaBits() != 0;	}
121 
122 	static Float			zero			(int sign);
123 	static Float			inf				(int sign);
124 	static Float			nan				(void);
125 
126 private:
127 	StorageType				m_value;
128 } DE_WARN_UNUSED_TYPE;
129 
130 // Common floating-point types.
131 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float16;	//!< IEEE 754-2008 16-bit floating-point value
132 typedef Float<deUint32,  8, 23,  127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float32;	//!< IEEE 754 32-bit floating-point value
133 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float64;	//!< IEEE 754 64-bit floating-point value
134 
135 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN>	Float16Denormless;	//!< IEEE 754-2008 16-bit floating-point value without denormalized support
136 
137 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(void)138 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
139 	: m_value(0)
140 {
141 }
142 
143 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(StorageType value)144 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
145 	: m_value(value)
146 {
147 }
148 
149 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(float value)150 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value)
151 	: m_value(0)
152 {
153 	deUint32 u32;
154 	memcpy(&u32, &value, sizeof(deUint32));
155 	*this = convert(Float32(u32));
156 }
157 
158 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(double value)159 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value)
160 	: m_value(0)
161 {
162 	deUint64 u64;
163 	memcpy(&u64, &value, sizeof(deUint64));
164 	*this = convert(Float64(u64));
165 }
166 
167 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asFloat(void) const168 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
169 {
170 	float		v;
171 	deUint32	u32		= Float32::convert(*this).bits();
172 	memcpy(&v, &u32, sizeof(deUint32));
173 	return v;
174 }
175 
176 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asDouble(void) const177 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
178 {
179 	double		v;
180 	deUint64	u64		= Float64::convert(*this).bits();
181 	memcpy(&v, &u64, sizeof(deUint64));
182 	return v;
183 }
184 
185 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
zero(int sign)186 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
187 {
188 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
189 	return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
190 }
191 
192 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
inf(int sign)193 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
194 {
195 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
196 	return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
197 }
198 
199 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
nan(void)200 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
201 {
202 	return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
203 }
204 
205 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
206 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
construct(int sign,int exponent,StorageType mantissa)207 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
208 	(int sign, int exponent, StorageType mantissa)
209 {
210 	// Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
211 	const bool			isShorthandZero	= exponent == 0 && mantissa == 0;
212 
213 	// Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
214 	// Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
215 	const bool			isDenormOrZero	= (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
216 	const StorageType	s				= StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
217 	const StorageType	exp				= (isShorthandZero  || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
218 
219 	DE_ASSERT(sign == +1 || sign == -1);
220 	DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
221 	DE_ASSERT(exp >> ExponentBits == 0);
222 
223 	return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
224 }
225 
226 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
227 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
constructBits(int sign,int exponent,StorageType mantissaBits)228 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
229 	(int sign, int exponent, StorageType mantissaBits)
230 {
231 	const StorageType signBit		= static_cast<StorageType>(sign < 0 ? 1 : 0);
232 	const StorageType exponentBits	= static_cast<StorageType>(exponent + ExponentBias);
233 
234 	DE_ASSERT(sign == +1 || sign == -1 );
235 	DE_ASSERT(exponentBits >> ExponentBits == 0);
236 	DE_ASSERT(mantissaBits >> MantissaBits == 0);
237 
238 	return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
239 }
240 
241 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
242 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
243 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
convert(const Float<OtherStorageType,OtherExponentBits,OtherMantissaBits,OtherExponentBias,OtherFlags> & other)244 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
245 	(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other)
246 {
247 	if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
248 	{
249 		// Negative number, truncate to zero.
250 		return zero(+1);
251 	}
252 	else if (other.isInf())
253 	{
254 		return inf(other.sign());
255 	}
256 	else if (other.isNaN())
257 	{
258 		return nan();
259 	}
260 	else if (other.isZero())
261 	{
262 		return zero(other.sign());
263 	}
264 	else
265 	{
266 		const int			eMin	= 1 - ExponentBias;
267 		const int			eMax	= ((1<<ExponentBits)-2) - ExponentBias;
268 
269 		const StorageType	s		= StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
270 		int					e		= other.exponent();
271 		deUint64			m		= other.mantissa();
272 
273 		// Normalize denormalized values prior to conversion.
274 		while (!(m & (1ull<<OtherMantissaBits)))
275 		{
276 			m <<= 1;
277 			e  -= 1;
278 		}
279 
280 		if (e < eMin)
281 		{
282 			// Underflow.
283 			if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
284 			{
285 				// Shift and round (RTE).
286 				int			bitDiff	= (OtherMantissaBits-MantissaBits) + (eMin-e);
287 				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
288 				deUint64	bias	= (m >> bitDiff) & 1;
289 
290 				return Float(StorageType(s | (m + half + bias) >> bitDiff));
291 			}
292 			else
293 				return zero(other.sign());
294 		}
295 		else
296 		{
297 			// Remove leading 1.
298 			m = m & ~(1ull<<OtherMantissaBits);
299 
300 			if (MantissaBits < OtherMantissaBits)
301 			{
302 				// Round mantissa (round to nearest even).
303 				int			bitDiff	= OtherMantissaBits-MantissaBits;
304 				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
305 				deUint64	bias	= (m >> bitDiff) & 1;
306 
307 				m = (m + half + bias) >> bitDiff;
308 
309 				if (m & (1ull<<MantissaBits))
310 				{
311 					// Overflow in mantissa.
312 					m  = 0;
313 					e += 1;
314 				}
315 			}
316 			else
317 			{
318 				int bitDiff = MantissaBits-OtherMantissaBits;
319 				m = m << bitDiff;
320 			}
321 
322 			if (e > eMax)
323 			{
324 				// Overflow.
325 				return inf(other.sign());
326 			}
327 			else
328 			{
329 				DE_ASSERT(de::inRange(e, eMin, eMax));
330 				DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
331 				DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
332 
333 				return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
334 			}
335 		}
336 	}
337 }
338 
339 } // tcu
340 
341 #endif // _TCUFLOAT_HPP
342