1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Tester Core
3  * ----------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Adjustable-precision floating point operations.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "tcuFloatFormat.hpp"
25 
26 #include "deMath.h"
27 #include "deUniquePtr.hpp"
28 
29 #include <sstream>
30 #include <iomanip>
31 #include <limits>
32 
33 namespace tcu
34 {
35 namespace
36 {
37 
chooseInterval(YesNoMaybe choice,const Interval & no,const Interval & yes)38 Interval chooseInterval(YesNoMaybe choice, const Interval& no, const Interval& yes)
39 {
40 	switch (choice)
41 	{
42 		case NO:	return no;
43 		case YES:	return yes;
44 		case MAYBE:	return no | yes;
45 		default:	DE_FATAL("Impossible case");
46 	}
47 
48 	return Interval();
49 }
50 
computeMaxValue(int maxExp,int fractionBits)51 double computeMaxValue (int maxExp, int fractionBits)
52 {
53 	return (deLdExp(1.0, maxExp) +
54 			deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits));
55 }
56 
57 } // anonymous
58 
FloatFormat(int minExp,int maxExp,int fractionBits,bool exactPrecision,YesNoMaybe hasSubnormal_,YesNoMaybe hasInf_,YesNoMaybe hasNaN_)59 FloatFormat::FloatFormat (int			minExp,
60 						  int			maxExp,
61 						  int			fractionBits,
62 						  bool			exactPrecision,
63 						  YesNoMaybe	hasSubnormal_,
64 						  YesNoMaybe	hasInf_,
65 						  YesNoMaybe	hasNaN_)
66 	: m_minExp			(minExp)
67 	, m_maxExp			(maxExp)
68 	, m_fractionBits	(fractionBits)
69 	, m_hasSubnormal	(hasSubnormal_)
70 	, m_hasInf			(hasInf_)
71 	, m_hasNaN			(hasNaN_)
72 	, m_exactPrecision	(exactPrecision)
73 	, m_maxValue		(computeMaxValue(maxExp, fractionBits))
74 {
75 	DE_ASSERT(minExp <= maxExp);
76 }
77 
78 /*-------------------------------------------------------------------------
79  * On the definition of ULP
80  *
81  * The GLSL spec does not define ULP. However, it refers to IEEE 754, which
82  * (reportedly) uses Harrison's definition:
83  *
84  * ULP(x) is the distance between the closest floating point numbers
85  * a and be such that a <= x <= b and a != b
86  *
87  * Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the
88  * distance to the next lowest float, not next highest.
89  *
90  * Furthermore, it is assumed that ULP is calculated relative to the exact
91  * value, not the approximation. This is because otherwise a less accurate
92  * approximation could be closer in ULPs, because its ULPs are bigger.
93  *
94  * For details, see "On the definition of ulp(x)" by Jean-Michel Muller
95  *
96  *-----------------------------------------------------------------------*/
97 
ulp(double x,double count) const98 double FloatFormat::ulp (double x, double count) const
99 {
100 	int				exp		= 0;
101 	const double	frac	= deFractExp(deAbs(x), &exp);
102 
103 	if (deIsNaN(frac))
104 		return TCU_NAN;
105 	else if (deIsInf(frac))
106 		return deLdExp(1.0, m_maxExp - m_fractionBits);
107 	else if (frac == 1.0)
108 	{
109 		// Harrison's ULP: choose distance to closest (i.e. next lower) at binade
110 		// boundary.
111 		--exp;
112 	}
113 	else if (frac == 0.0)
114 		exp = m_minExp;
115 
116 	// ULP cannot be lower than the smallest quantum.
117 	exp = de::max(exp, m_minExp);
118 
119 	{
120 		const double		oneULP	= deLdExp(1.0, exp - m_fractionBits);
121 		ScopedRoundingMode	ctx		(DE_ROUNDINGMODE_TO_POSITIVE_INF);
122 
123 		return oneULP * count;
124 	}
125 }
126 
127 //! Return the difference between the given nominal exponent and
128 //! the exponent of the lowest significand bit of the
129 //! representation of a number with this format.
130 //! For normal numbers this is the number of significand bits, but
131 //! for subnormals it is less and for values of exp where 2^exp is too
132 //! small to represent it is <0
exponentShift(int exp) const133 int FloatFormat::exponentShift (int exp) const
134 {
135 	return m_fractionBits - de::max(m_minExp - exp, 0);
136 }
137 
138 //! Return the number closest to `d` that is exactly representable with the
139 //! significand bits and minimum exponent of the floatformat. Round up if
140 //! `upward` is true, otherwise down.
round(double d,bool upward) const141 double FloatFormat::round (double d, bool upward) const
142 {
143 	int				exp			= 0;
144 	const double	frac		= deFractExp(d, &exp);
145 	const int		shift		= exponentShift(exp);
146 	const double	shiftFrac	= deLdExp(frac, shift);
147 	const double	roundFrac	= upward ? deCeil(shiftFrac) : deFloor(shiftFrac);
148 
149 	return deLdExp(roundFrac, exp - shift);
150 }
151 
152 //! Return the range of numbers that `d` might be converted to in the
153 //! floatformat, given its limitations with infinities, subnormals and maximum
154 //! exponent.
clampValue(double d) const155 Interval FloatFormat::clampValue (double d) const
156 {
157 	const double	rSign		= deSign(d);
158 	int				rExp		= 0;
159 
160 	DE_ASSERT(!deIsNaN(d));
161 
162 	deFractExp(d, &rExp);
163 	if (rExp < m_minExp)
164 		return chooseInterval(m_hasSubnormal, rSign * 0.0, d);
165 	else if (deIsInf(d) || rExp > m_maxExp)
166 		return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY);
167 
168 	return Interval(d);
169 }
170 
171 //! Return the range of numbers that might be used with this format to
172 //! represent a number within `x`.
convert(const Interval & x) const173 Interval FloatFormat::convert (const Interval& x) const
174 {
175 	Interval ret;
176 	Interval tmp = x;
177 
178 	if (x.hasNaN())
179 	{
180 		// If NaN might be supported, NaN is a legal return value
181 		if (m_hasNaN != NO)
182 			ret |= TCU_NAN;
183 
184 		// If NaN might not be supported, any (non-NaN) value is legal,
185 		// _subject_ to clamping. Hence we modify tmp, not ret.
186 		if (m_hasNaN != YES)
187 			tmp = Interval::unbounded();
188 	}
189 
190 	// Round both bounds _inwards_ to closest representable values.
191 	if (!tmp.empty())
192 		ret |= clampValue(round(tmp.lo(), false)) | clampValue(round(tmp.hi(), true));
193 
194 	// If this format's precision is not exact, the (possibly out-of-bounds)
195 	// original value is also a possible result.
196 	if (!m_exactPrecision)
197 		ret |= x;
198 
199 	return ret;
200 }
201 
roundOut(double d,bool upward,bool roundUnderOverflow) const202 double FloatFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const
203 {
204 	int	exp	= 0;
205 	deFractExp(d, &exp);
206 
207 	if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0)))
208 		return deSign(d) * getMaxValue();
209 	else
210 		return round(d, upward);
211 }
212 
213 //! Round output of an operation.
214 //! \param roundUnderOverflow Can +/-inf rounded to min/max representable;
215 //!							  should be false if any of operands was inf, true otherwise.
roundOut(const Interval & x,bool roundUnderOverflow) const216 Interval FloatFormat::roundOut (const Interval& x, bool roundUnderOverflow) const
217 {
218 	Interval ret = x.nan();
219 
220 	if (!x.empty())
221 		ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow),
222 						roundOut(x.hi(), true, roundUnderOverflow));
223 
224 	return ret;
225 }
226 
floatToHex(double x) const227 std::string	FloatFormat::floatToHex	(double x) const
228 {
229 	if (deIsNaN(x))
230 		return "NaN";
231 	else if (deIsInf(x))
232 		return (x < 0.0 ? "-" : "+") + std::string("inf");
233 	else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero
234 		return "0.0";
235 
236 	int					exp			= 0;
237 	const double		frac		= deFractExp(deAbs(x), &exp);
238 	const int			shift		= exponentShift(exp);
239 	const deUint64		bits		= deUint64(deLdExp(frac, shift));
240 	const deUint64		whole		= bits >> m_fractionBits;
241 	const deUint64		fraction	= bits & ((deUint64(1) << m_fractionBits) - 1);
242 	const int			exponent	= exp + m_fractionBits - shift;
243 	const int			numDigits	= (m_fractionBits + 3) / 4;
244 	const deUint64		aligned		= fraction << (numDigits * 4 - m_fractionBits);
245 	std::ostringstream	oss;
246 
247 	oss << (x < 0 ? "-" : "")
248 		<< "0x" << whole << "."
249 		<< std::hex << std::setw(numDigits) << std::setfill('0') << aligned
250 		<< "p" << std::dec << std::setw(0) << exponent;
251 
252 	return oss.str();
253 }
254 
intervalToHex(const Interval & interval) const255 std::string FloatFormat::intervalToHex (const Interval& interval) const
256 {
257 	if (interval.empty())
258 		return interval.hasNaN() ? "{ NaN }" : "{}";
259 
260 	else if (interval.lo() == interval.hi())
261 		return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") +
262 				floatToHex(interval.lo()) + " }");
263 	else if (interval == Interval::unbounded(true))
264 		return "<any>";
265 
266 	return (std::string(interval.hasNaN() ? "{ NaN } | " : "") +
267 			"[" + floatToHex(interval.lo()) + ", " + floatToHex(interval.hi()) + "]");
268 }
269 
270 template <typename T>
nativeFormat(void)271 static FloatFormat nativeFormat (void)
272 {
273 	typedef std::numeric_limits<T> Limits;
274 
275 	DE_ASSERT(Limits::radix == 2);
276 
277 	return FloatFormat(Limits::min_exponent - 1,	// These have a built-in offset of one
278 					   Limits::max_exponent - 1,
279 					   Limits::digits - 1,			// don't count the hidden bit
280 					   Limits::has_denorm != std::denorm_absent,
281 					   Limits::has_infinity ? YES : NO,
282 					   Limits::has_quiet_NaN ? YES : NO,
283 					   ((Limits::has_denorm == std::denorm_present) ? YES :
284 						(Limits::has_denorm == std::denorm_absent) ? NO :
285 						MAYBE));
286 }
287 
nativeFloat(void)288 FloatFormat	FloatFormat::nativeFloat (void)
289 {
290 	return nativeFormat<float>();
291 }
292 
nativeDouble(void)293 FloatFormat	FloatFormat::nativeDouble (void)
294 {
295 	return nativeFormat<double>();
296 }
297 
NormalizedFormat(int fractionBits)298 NormalizedFormat::NormalizedFormat (int		fractionBits)
299 	: FloatFormat(0, 0, fractionBits, true, tcu::YES)
300 {
301 
302 }
303 
round(double d,bool upward) const304 double NormalizedFormat::round(double d, bool upward) const
305 {
306 	const int fractionBits = getFractionBits();
307 
308 	if (fractionBits <= 0)
309 		return d;
310 
311 	const int maxIntValue = (1 << fractionBits) - 1;
312 	const double value = d * maxIntValue;
313 	const double normValue = upward ? deCeil(value) : deFloor(value);
314 	return normValue / maxIntValue;
315 }
316 
ulp(double x,double count) const317 double NormalizedFormat::ulp(double x, double count) const
318 {
319 	(void) x;
320 
321 	const int maxIntValue = (1 << getFractionBits()) - 1;
322 	const double precision = 1.0 / maxIntValue;
323 	return precision * count;
324 }
325 
roundOut(double d,bool upward,bool roundUnderOverflow) const326 double NormalizedFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const
327 {
328 	if (roundUnderOverflow && deAbs(d) > 1.0 && (upward == (d < 0.0)))
329 		return deSign(d);
330 	else
331 		return round(d, upward);
332 }
333 
334 namespace
335 {
336 
337 using std::string;
338 using std::ostringstream;
339 using de::MovePtr;
340 using de::UniquePtr;
341 
342 class Test
343 {
344 protected:
345 
Test(MovePtr<FloatFormat> fmt)346 							Test		(MovePtr<FloatFormat> fmt) : m_fmt(fmt) {}
p(int e) const347 	double					p			(int e) const				{ return deLdExp(1.0, e); }
348 	void					check		(const string&	expr,
349 										 double			result,
350 										 double			reference) const;
351 	void					testULP		(double arg, double ref) const;
352 	void					testRound	(double arg, double refDown, double refUp) const;
353 
354 	UniquePtr<FloatFormat>	m_fmt;
355 };
356 
check(const string & expr,double result,double reference) const357 void Test::check (const string& expr, double result, double reference) const
358 {
359 	if (result != reference)
360 	{
361 		ostringstream oss;
362 		oss << expr << " returned " << result << ", expected " << reference;
363 		TCU_FAIL(oss.str().c_str());
364 	}
365 }
366 
testULP(double arg,double ref) const367 void Test::testULP (double arg, double ref) const
368 {
369 	ostringstream	oss;
370 
371 	oss << "ulp(" << arg << ")";
372 	check(oss.str(), m_fmt->ulp(arg), ref);
373 }
374 
testRound(double arg,double refDown,double refUp) const375 void Test::testRound (double arg, double refDown, double refUp) const
376 {
377 	{
378 		ostringstream oss;
379 		oss << "round(" << arg << ", false)";
380 		check(oss.str(), m_fmt->round(arg, false), refDown);
381 	}
382 	{
383 		ostringstream oss;
384 		oss << "round(" << arg << ", true)";
385 		check(oss.str(), m_fmt->round(arg, true), refUp);
386 	}
387 }
388 
389 class TestBinary32 : public Test
390 {
391 public:
TestBinary32(void)392 			TestBinary32 (void)
393 				: Test (MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true))) {}
394 
395 	void	runTest	(void) const;
396 };
397 
runTest(void) const398 void TestBinary32::runTest (void) const
399 {
400 	testULP(p(0),				p(-24));
401 	testULP(p(0) + p(-23),		p(-23));
402 	testULP(p(-124),			p(-148));
403 	testULP(p(-125),			p(-149));
404 	testULP(p(-125) + p(-140),	p(-148));
405 	testULP(p(-126),			p(-149));
406 	testULP(p(-130),			p(-149));
407 
408 	testRound(p(0) + p(-20) + p(-40),	p(0) + p(-20),		p(0) + p(-20) + p(-23));
409 	testRound(p(-126) - p(-150),		p(-126) - p(-149),	p(-126));
410 
411 	TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0");
412 	TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8");
413 	TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
414 	TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
415 	TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125");
416 }
417 
418 } // anonymous
419 
FloatFormat_selfTest(void)420 void FloatFormat_selfTest (void)
421 {
422 	TestBinary32	test32;
423 	test32.runTest();
424 }
425 
426 } // tcu
427