1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/MathExtras.h"
24 #include <cstring>
25 #include <limits.h>
26
27 using namespace llvm;
28
29 /// A macro used to combine two fcCategory enums into one key which can be used
30 /// in a switch statement to classify how the interaction of two APFloat's
31 /// categories affects an operation.
32 ///
33 /// TODO: If clang source code is ever allowed to use constexpr in its own
34 /// codebase, change this into a static inline function.
35 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
36
37 /* Assumed in hexadecimal significand parsing, and conversion to
38 hexadecimal strings. */
39 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
40
41 namespace llvm {
42
43 /* Represents floating point arithmetic semantics. */
44 struct fltSemantics {
45 /* The largest E such that 2^E is representable; this matches the
46 definition of IEEE 754. */
47 APFloat::ExponentType maxExponent;
48
49 /* The smallest E such that 2^E is a normalized number; this
50 matches the definition of IEEE 754. */
51 APFloat::ExponentType minExponent;
52
53 /* Number of bits in the significand. This includes the integer
54 bit. */
55 unsigned int precision;
56
57 /* Number of bits actually used in the semantics. */
58 unsigned int sizeInBits;
59 };
60
61 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 };
62 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 };
63 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 };
64 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 };
65 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 };
66 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 };
67
68 /* The PowerPC format consists of two doubles. It does not map cleanly
69 onto the usual format above. It is approximated using twice the
70 mantissa bits. Note that for exponents near the double minimum,
71 we no longer can represent the full 106 mantissa bits, so those
72 will be treated as denormal numbers.
73
74 FIXME: While this approximation is equivalent to what GCC uses for
75 compile-time arithmetic on PPC double-double numbers, it is not able
76 to represent all possible values held by a PPC double-double number,
77 for example: (long double) 1.0 + (long double) 0x1p-106
78 Should this be replaced by a full emulation of PPC double-double? */
79 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 };
80
81 /* A tight upper bound on number of parts required to hold the value
82 pow(5, power) is
83
84 power * 815 / (351 * integerPartWidth) + 1
85
86 However, whilst the result may require only this many parts,
87 because we are multiplying two values to get it, the
88 multiplication may require an extra part with the excess part
89 being zero (consider the trivial case of 1 * 1, tcFullMultiply
90 requires two parts to hold the single-part result). So we add an
91 extra one to guarantee enough space whilst multiplying. */
92 const unsigned int maxExponent = 16383;
93 const unsigned int maxPrecision = 113;
94 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
95 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
96 / (351 * integerPartWidth));
97 }
98
99 /* A bunch of private, handy routines. */
100
101 static inline unsigned int
partCountForBits(unsigned int bits)102 partCountForBits(unsigned int bits)
103 {
104 return ((bits) + integerPartWidth - 1) / integerPartWidth;
105 }
106
107 /* Returns 0U-9U. Return values >= 10U are not digits. */
108 static inline unsigned int
decDigitValue(unsigned int c)109 decDigitValue(unsigned int c)
110 {
111 return c - '0';
112 }
113
114 /* Return the value of a decimal exponent of the form
115 [+-]ddddddd.
116
117 If the exponent overflows, returns a large exponent with the
118 appropriate sign. */
119 static int
readExponent(StringRef::iterator begin,StringRef::iterator end)120 readExponent(StringRef::iterator begin, StringRef::iterator end)
121 {
122 bool isNegative;
123 unsigned int absExponent;
124 const unsigned int overlargeExponent = 24000; /* FIXME. */
125 StringRef::iterator p = begin;
126
127 assert(p != end && "Exponent has no digits");
128
129 isNegative = (*p == '-');
130 if (*p == '-' || *p == '+') {
131 p++;
132 assert(p != end && "Exponent has no digits");
133 }
134
135 absExponent = decDigitValue(*p++);
136 assert(absExponent < 10U && "Invalid character in exponent");
137
138 for (; p != end; ++p) {
139 unsigned int value;
140
141 value = decDigitValue(*p);
142 assert(value < 10U && "Invalid character in exponent");
143
144 value += absExponent * 10;
145 if (absExponent >= overlargeExponent) {
146 absExponent = overlargeExponent;
147 p = end; /* outwit assert below */
148 break;
149 }
150 absExponent = value;
151 }
152
153 assert(p == end && "Invalid exponent in exponent");
154
155 if (isNegative)
156 return -(int) absExponent;
157 else
158 return (int) absExponent;
159 }
160
161 /* This is ugly and needs cleaning up, but I don't immediately see
162 how whilst remaining safe. */
163 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)164 totalExponent(StringRef::iterator p, StringRef::iterator end,
165 int exponentAdjustment)
166 {
167 int unsignedExponent;
168 bool negative, overflow;
169 int exponent = 0;
170
171 assert(p != end && "Exponent has no digits");
172
173 negative = *p == '-';
174 if (*p == '-' || *p == '+') {
175 p++;
176 assert(p != end && "Exponent has no digits");
177 }
178
179 unsignedExponent = 0;
180 overflow = false;
181 for (; p != end; ++p) {
182 unsigned int value;
183
184 value = decDigitValue(*p);
185 assert(value < 10U && "Invalid character in exponent");
186
187 unsignedExponent = unsignedExponent * 10 + value;
188 if (unsignedExponent > 32767) {
189 overflow = true;
190 break;
191 }
192 }
193
194 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
195 overflow = true;
196
197 if (!overflow) {
198 exponent = unsignedExponent;
199 if (negative)
200 exponent = -exponent;
201 exponent += exponentAdjustment;
202 if (exponent > 32767 || exponent < -32768)
203 overflow = true;
204 }
205
206 if (overflow)
207 exponent = negative ? -32768: 32767;
208
209 return exponent;
210 }
211
212 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)213 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
214 StringRef::iterator *dot)
215 {
216 StringRef::iterator p = begin;
217 *dot = end;
218 while (p != end && *p == '0')
219 p++;
220
221 if (p != end && *p == '.') {
222 *dot = p++;
223
224 assert(end - begin != 1 && "Significand has no digits");
225
226 while (p != end && *p == '0')
227 p++;
228 }
229
230 return p;
231 }
232
233 /* Given a normal decimal floating point number of the form
234
235 dddd.dddd[eE][+-]ddd
236
237 where the decimal point and exponent are optional, fill out the
238 structure D. Exponent is appropriate if the significand is
239 treated as an integer, and normalizedExponent if the significand
240 is taken to have the decimal point after a single leading
241 non-zero digit.
242
243 If the value is zero, V->firstSigDigit points to a non-digit, and
244 the return exponent is zero.
245 */
246 struct decimalInfo {
247 const char *firstSigDigit;
248 const char *lastSigDigit;
249 int exponent;
250 int normalizedExponent;
251 };
252
253 static void
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)254 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
255 decimalInfo *D)
256 {
257 StringRef::iterator dot = end;
258 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
259
260 D->firstSigDigit = p;
261 D->exponent = 0;
262 D->normalizedExponent = 0;
263
264 for (; p != end; ++p) {
265 if (*p == '.') {
266 assert(dot == end && "String contains multiple dots");
267 dot = p++;
268 if (p == end)
269 break;
270 }
271 if (decDigitValue(*p) >= 10U)
272 break;
273 }
274
275 if (p != end) {
276 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
277 assert(p != begin && "Significand has no digits");
278 assert((dot == end || p - begin != 1) && "Significand has no digits");
279
280 /* p points to the first non-digit in the string */
281 D->exponent = readExponent(p + 1, end);
282
283 /* Implied decimal point? */
284 if (dot == end)
285 dot = p;
286 }
287
288 /* If number is all zeroes accept any exponent. */
289 if (p != D->firstSigDigit) {
290 /* Drop insignificant trailing zeroes. */
291 if (p != begin) {
292 do
293 do
294 p--;
295 while (p != begin && *p == '0');
296 while (p != begin && *p == '.');
297 }
298
299 /* Adjust the exponents for any decimal point. */
300 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
301 D->normalizedExponent = (D->exponent +
302 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
303 - (dot > D->firstSigDigit && dot < p)));
304 }
305
306 D->lastSigDigit = p;
307 }
308
309 /* Return the trailing fraction of a hexadecimal number.
310 DIGITVALUE is the first hex digit of the fraction, P points to
311 the next digit. */
312 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)313 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
314 unsigned int digitValue)
315 {
316 unsigned int hexDigit;
317
318 /* If the first trailing digit isn't 0 or 8 we can work out the
319 fraction immediately. */
320 if (digitValue > 8)
321 return lfMoreThanHalf;
322 else if (digitValue < 8 && digitValue > 0)
323 return lfLessThanHalf;
324
325 // Otherwise we need to find the first non-zero digit.
326 while (p != end && (*p == '0' || *p == '.'))
327 p++;
328
329 assert(p != end && "Invalid trailing hexadecimal fraction!");
330
331 hexDigit = hexDigitValue(*p);
332
333 /* If we ran off the end it is exactly zero or one-half, otherwise
334 a little more. */
335 if (hexDigit == -1U)
336 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
337 else
338 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
339 }
340
341 /* Return the fraction lost were a bignum truncated losing the least
342 significant BITS bits. */
343 static lostFraction
lostFractionThroughTruncation(const integerPart * parts,unsigned int partCount,unsigned int bits)344 lostFractionThroughTruncation(const integerPart *parts,
345 unsigned int partCount,
346 unsigned int bits)
347 {
348 unsigned int lsb;
349
350 lsb = APInt::tcLSB(parts, partCount);
351
352 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
353 if (bits <= lsb)
354 return lfExactlyZero;
355 if (bits == lsb + 1)
356 return lfExactlyHalf;
357 if (bits <= partCount * integerPartWidth &&
358 APInt::tcExtractBit(parts, bits - 1))
359 return lfMoreThanHalf;
360
361 return lfLessThanHalf;
362 }
363
364 /* Shift DST right BITS bits noting lost fraction. */
365 static lostFraction
shiftRight(integerPart * dst,unsigned int parts,unsigned int bits)366 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
367 {
368 lostFraction lost_fraction;
369
370 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
371
372 APInt::tcShiftRight(dst, parts, bits);
373
374 return lost_fraction;
375 }
376
377 /* Combine the effect of two lost fractions. */
378 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)379 combineLostFractions(lostFraction moreSignificant,
380 lostFraction lessSignificant)
381 {
382 if (lessSignificant != lfExactlyZero) {
383 if (moreSignificant == lfExactlyZero)
384 moreSignificant = lfLessThanHalf;
385 else if (moreSignificant == lfExactlyHalf)
386 moreSignificant = lfMoreThanHalf;
387 }
388
389 return moreSignificant;
390 }
391
392 /* The error from the true value, in half-ulps, on multiplying two
393 floating point numbers, which differ from the value they
394 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
395 than the returned value.
396
397 See "How to Read Floating Point Numbers Accurately" by William D
398 Clinger. */
399 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)400 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
401 {
402 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
403
404 if (HUerr1 + HUerr2 == 0)
405 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
406 else
407 return inexactMultiply + 2 * (HUerr1 + HUerr2);
408 }
409
410 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
411 when the least significant BITS are truncated. BITS cannot be
412 zero. */
413 static integerPart
ulpsFromBoundary(const integerPart * parts,unsigned int bits,bool isNearest)414 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
415 {
416 unsigned int count, partBits;
417 integerPart part, boundary;
418
419 assert(bits != 0);
420
421 bits--;
422 count = bits / integerPartWidth;
423 partBits = bits % integerPartWidth + 1;
424
425 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
426
427 if (isNearest)
428 boundary = (integerPart) 1 << (partBits - 1);
429 else
430 boundary = 0;
431
432 if (count == 0) {
433 if (part - boundary <= boundary - part)
434 return part - boundary;
435 else
436 return boundary - part;
437 }
438
439 if (part == boundary) {
440 while (--count)
441 if (parts[count])
442 return ~(integerPart) 0; /* A lot. */
443
444 return parts[0];
445 } else if (part == boundary - 1) {
446 while (--count)
447 if (~parts[count])
448 return ~(integerPart) 0; /* A lot. */
449
450 return -parts[0];
451 }
452
453 return ~(integerPart) 0; /* A lot. */
454 }
455
456 /* Place pow(5, power) in DST, and return the number of parts used.
457 DST must be at least one part larger than size of the answer. */
458 static unsigned int
powerOf5(integerPart * dst,unsigned int power)459 powerOf5(integerPart *dst, unsigned int power)
460 {
461 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
462 15625, 78125 };
463 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
464 pow5s[0] = 78125 * 5;
465
466 unsigned int partsCount[16] = { 1 };
467 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
468 unsigned int result;
469 assert(power <= maxExponent);
470
471 p1 = dst;
472 p2 = scratch;
473
474 *p1 = firstEightPowers[power & 7];
475 power >>= 3;
476
477 result = 1;
478 pow5 = pow5s;
479
480 for (unsigned int n = 0; power; power >>= 1, n++) {
481 unsigned int pc;
482
483 pc = partsCount[n];
484
485 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
486 if (pc == 0) {
487 pc = partsCount[n - 1];
488 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
489 pc *= 2;
490 if (pow5[pc - 1] == 0)
491 pc--;
492 partsCount[n] = pc;
493 }
494
495 if (power & 1) {
496 integerPart *tmp;
497
498 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
499 result += pc;
500 if (p2[result - 1] == 0)
501 result--;
502
503 /* Now result is in p1 with partsCount parts and p2 is scratch
504 space. */
505 tmp = p1;
506 p1 = p2;
507 p2 = tmp;
508 }
509
510 pow5 += pc;
511 }
512
513 if (p1 != dst)
514 APInt::tcAssign(dst, p1, result);
515
516 return result;
517 }
518
519 /* Zero at the end to avoid modular arithmetic when adding one; used
520 when rounding up during hexadecimal output. */
521 static const char hexDigitsLower[] = "0123456789abcdef0";
522 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
523 static const char infinityL[] = "infinity";
524 static const char infinityU[] = "INFINITY";
525 static const char NaNL[] = "nan";
526 static const char NaNU[] = "NAN";
527
528 /* Write out an integerPart in hexadecimal, starting with the most
529 significant nibble. Write out exactly COUNT hexdigits, return
530 COUNT. */
531 static unsigned int
partAsHex(char * dst,integerPart part,unsigned int count,const char * hexDigitChars)532 partAsHex (char *dst, integerPart part, unsigned int count,
533 const char *hexDigitChars)
534 {
535 unsigned int result = count;
536
537 assert(count != 0 && count <= integerPartWidth / 4);
538
539 part >>= (integerPartWidth - 4 * count);
540 while (count--) {
541 dst[count] = hexDigitChars[part & 0xf];
542 part >>= 4;
543 }
544
545 return result;
546 }
547
548 /* Write out an unsigned decimal integer. */
549 static char *
writeUnsignedDecimal(char * dst,unsigned int n)550 writeUnsignedDecimal (char *dst, unsigned int n)
551 {
552 char buff[40], *p;
553
554 p = buff;
555 do
556 *p++ = '0' + n % 10;
557 while (n /= 10);
558
559 do
560 *dst++ = *--p;
561 while (p != buff);
562
563 return dst;
564 }
565
566 /* Write out a signed decimal integer. */
567 static char *
writeSignedDecimal(char * dst,int value)568 writeSignedDecimal (char *dst, int value)
569 {
570 if (value < 0) {
571 *dst++ = '-';
572 dst = writeUnsignedDecimal(dst, -(unsigned) value);
573 } else
574 dst = writeUnsignedDecimal(dst, value);
575
576 return dst;
577 }
578
579 /* Constructors. */
580 void
initialize(const fltSemantics * ourSemantics)581 APFloat::initialize(const fltSemantics *ourSemantics)
582 {
583 unsigned int count;
584
585 semantics = ourSemantics;
586 count = partCount();
587 if (count > 1)
588 significand.parts = new integerPart[count];
589 }
590
591 void
freeSignificand()592 APFloat::freeSignificand()
593 {
594 if (needsCleanup())
595 delete [] significand.parts;
596 }
597
598 void
assign(const APFloat & rhs)599 APFloat::assign(const APFloat &rhs)
600 {
601 assert(semantics == rhs.semantics);
602
603 sign = rhs.sign;
604 category = rhs.category;
605 exponent = rhs.exponent;
606 if (isFiniteNonZero() || category == fcNaN)
607 copySignificand(rhs);
608 }
609
610 void
copySignificand(const APFloat & rhs)611 APFloat::copySignificand(const APFloat &rhs)
612 {
613 assert(isFiniteNonZero() || category == fcNaN);
614 assert(rhs.partCount() >= partCount());
615
616 APInt::tcAssign(significandParts(), rhs.significandParts(),
617 partCount());
618 }
619
620 /* Make this number a NaN, with an arbitrary but deterministic value
621 for the significand. If double or longer, this is a signalling NaN,
622 which may not be ideal. If float, this is QNaN(0). */
makeNaN(bool SNaN,bool Negative,const APInt * fill)623 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
624 {
625 category = fcNaN;
626 sign = Negative;
627
628 integerPart *significand = significandParts();
629 unsigned numParts = partCount();
630
631 // Set the significand bits to the fill.
632 if (!fill || fill->getNumWords() < numParts)
633 APInt::tcSet(significand, 0, numParts);
634 if (fill) {
635 APInt::tcAssign(significand, fill->getRawData(),
636 std::min(fill->getNumWords(), numParts));
637
638 // Zero out the excess bits of the significand.
639 unsigned bitsToPreserve = semantics->precision - 1;
640 unsigned part = bitsToPreserve / 64;
641 bitsToPreserve %= 64;
642 significand[part] &= ((1ULL << bitsToPreserve) - 1);
643 for (part++; part != numParts; ++part)
644 significand[part] = 0;
645 }
646
647 unsigned QNaNBit = semantics->precision - 2;
648
649 if (SNaN) {
650 // We always have to clear the QNaN bit to make it an SNaN.
651 APInt::tcClearBit(significand, QNaNBit);
652
653 // If there are no bits set in the payload, we have to set
654 // *something* to make it a NaN instead of an infinity;
655 // conventionally, this is the next bit down from the QNaN bit.
656 if (APInt::tcIsZero(significand, numParts))
657 APInt::tcSetBit(significand, QNaNBit - 1);
658 } else {
659 // We always have to set the QNaN bit to make it a QNaN.
660 APInt::tcSetBit(significand, QNaNBit);
661 }
662
663 // For x87 extended precision, we want to make a NaN, not a
664 // pseudo-NaN. Maybe we should expose the ability to make
665 // pseudo-NaNs?
666 if (semantics == &APFloat::x87DoubleExtended)
667 APInt::tcSetBit(significand, QNaNBit + 1);
668 }
669
makeNaN(const fltSemantics & Sem,bool SNaN,bool Negative,const APInt * fill)670 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
671 const APInt *fill) {
672 APFloat value(Sem, uninitialized);
673 value.makeNaN(SNaN, Negative, fill);
674 return value;
675 }
676
677 APFloat &
operator =(const APFloat & rhs)678 APFloat::operator=(const APFloat &rhs)
679 {
680 if (this != &rhs) {
681 if (semantics != rhs.semantics) {
682 freeSignificand();
683 initialize(rhs.semantics);
684 }
685 assign(rhs);
686 }
687
688 return *this;
689 }
690
691 APFloat &
operator =(APFloat && rhs)692 APFloat::operator=(APFloat &&rhs) {
693 freeSignificand();
694
695 semantics = rhs.semantics;
696 significand = rhs.significand;
697 exponent = rhs.exponent;
698 category = rhs.category;
699 sign = rhs.sign;
700
701 rhs.semantics = &Bogus;
702 return *this;
703 }
704
705 bool
isDenormal() const706 APFloat::isDenormal() const {
707 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
708 (APInt::tcExtractBit(significandParts(),
709 semantics->precision - 1) == 0);
710 }
711
712 bool
isSmallest() const713 APFloat::isSmallest() const {
714 // The smallest number by magnitude in our format will be the smallest
715 // denormal, i.e. the floating point number with exponent being minimum
716 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
717 return isFiniteNonZero() && exponent == semantics->minExponent &&
718 significandMSB() == 0;
719 }
720
isSignificandAllOnes() const721 bool APFloat::isSignificandAllOnes() const {
722 // Test if the significand excluding the integral bit is all ones. This allows
723 // us to test for binade boundaries.
724 const integerPart *Parts = significandParts();
725 const unsigned PartCount = partCount();
726 for (unsigned i = 0; i < PartCount - 1; i++)
727 if (~Parts[i])
728 return false;
729
730 // Set the unused high bits to all ones when we compare.
731 const unsigned NumHighBits =
732 PartCount*integerPartWidth - semantics->precision + 1;
733 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
734 "fill than integerPartWidth");
735 const integerPart HighBitFill =
736 ~integerPart(0) << (integerPartWidth - NumHighBits);
737 if (~(Parts[PartCount - 1] | HighBitFill))
738 return false;
739
740 return true;
741 }
742
isSignificandAllZeros() const743 bool APFloat::isSignificandAllZeros() const {
744 // Test if the significand excluding the integral bit is all zeros. This
745 // allows us to test for binade boundaries.
746 const integerPart *Parts = significandParts();
747 const unsigned PartCount = partCount();
748
749 for (unsigned i = 0; i < PartCount - 1; i++)
750 if (Parts[i])
751 return false;
752
753 const unsigned NumHighBits =
754 PartCount*integerPartWidth - semantics->precision + 1;
755 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
756 "clear than integerPartWidth");
757 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
758
759 if (Parts[PartCount - 1] & HighBitMask)
760 return false;
761
762 return true;
763 }
764
765 bool
isLargest() const766 APFloat::isLargest() const {
767 // The largest number by magnitude in our format will be the floating point
768 // number with maximum exponent and with significand that is all ones.
769 return isFiniteNonZero() && exponent == semantics->maxExponent
770 && isSignificandAllOnes();
771 }
772
773 bool
isInteger() const774 APFloat::isInteger() const {
775 // This could be made more efficient; I'm going for obviously correct.
776 if (!isFinite()) return false;
777 APFloat truncated = *this;
778 truncated.roundToIntegral(rmTowardZero);
779 return compare(truncated) == cmpEqual;
780 }
781
782 bool
bitwiseIsEqual(const APFloat & rhs) const783 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
784 if (this == &rhs)
785 return true;
786 if (semantics != rhs.semantics ||
787 category != rhs.category ||
788 sign != rhs.sign)
789 return false;
790 if (category==fcZero || category==fcInfinity)
791 return true;
792
793 if (isFiniteNonZero() && exponent != rhs.exponent)
794 return false;
795
796 return std::equal(significandParts(), significandParts() + partCount(),
797 rhs.significandParts());
798 }
799
APFloat(const fltSemantics & ourSemantics,integerPart value)800 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
801 initialize(&ourSemantics);
802 sign = 0;
803 category = fcNormal;
804 zeroSignificand();
805 exponent = ourSemantics.precision - 1;
806 significandParts()[0] = value;
807 normalize(rmNearestTiesToEven, lfExactlyZero);
808 }
809
APFloat(const fltSemantics & ourSemantics)810 APFloat::APFloat(const fltSemantics &ourSemantics) {
811 initialize(&ourSemantics);
812 category = fcZero;
813 sign = false;
814 }
815
APFloat(const fltSemantics & ourSemantics,uninitializedTag tag)816 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
817 // Allocates storage if necessary but does not initialize it.
818 initialize(&ourSemantics);
819 }
820
APFloat(const fltSemantics & ourSemantics,StringRef text)821 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
822 initialize(&ourSemantics);
823 convertFromString(text, rmNearestTiesToEven);
824 }
825
APFloat(const APFloat & rhs)826 APFloat::APFloat(const APFloat &rhs) {
827 initialize(rhs.semantics);
828 assign(rhs);
829 }
830
APFloat(APFloat && rhs)831 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
832 *this = std::move(rhs);
833 }
834
~APFloat()835 APFloat::~APFloat()
836 {
837 freeSignificand();
838 }
839
840 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const841 void APFloat::Profile(FoldingSetNodeID& ID) const {
842 ID.Add(bitcastToAPInt());
843 }
844
845 unsigned int
partCount() const846 APFloat::partCount() const
847 {
848 return partCountForBits(semantics->precision + 1);
849 }
850
851 unsigned int
semanticsPrecision(const fltSemantics & semantics)852 APFloat::semanticsPrecision(const fltSemantics &semantics)
853 {
854 return semantics.precision;
855 }
856 APFloat::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)857 APFloat::semanticsMaxExponent(const fltSemantics &semantics)
858 {
859 return semantics.maxExponent;
860 }
861 APFloat::ExponentType
semanticsMinExponent(const fltSemantics & semantics)862 APFloat::semanticsMinExponent(const fltSemantics &semantics)
863 {
864 return semantics.minExponent;
865 }
866 unsigned int
semanticsSizeInBits(const fltSemantics & semantics)867 APFloat::semanticsSizeInBits(const fltSemantics &semantics)
868 {
869 return semantics.sizeInBits;
870 }
871
872 const integerPart *
significandParts() const873 APFloat::significandParts() const
874 {
875 return const_cast<APFloat *>(this)->significandParts();
876 }
877
878 integerPart *
significandParts()879 APFloat::significandParts()
880 {
881 if (partCount() > 1)
882 return significand.parts;
883 else
884 return &significand.part;
885 }
886
887 void
zeroSignificand()888 APFloat::zeroSignificand()
889 {
890 APInt::tcSet(significandParts(), 0, partCount());
891 }
892
893 /* Increment an fcNormal floating point number's significand. */
894 void
incrementSignificand()895 APFloat::incrementSignificand()
896 {
897 integerPart carry;
898
899 carry = APInt::tcIncrement(significandParts(), partCount());
900
901 /* Our callers should never cause us to overflow. */
902 assert(carry == 0);
903 (void)carry;
904 }
905
906 /* Add the significand of the RHS. Returns the carry flag. */
907 integerPart
addSignificand(const APFloat & rhs)908 APFloat::addSignificand(const APFloat &rhs)
909 {
910 integerPart *parts;
911
912 parts = significandParts();
913
914 assert(semantics == rhs.semantics);
915 assert(exponent == rhs.exponent);
916
917 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
918 }
919
920 /* Subtract the significand of the RHS with a borrow flag. Returns
921 the borrow flag. */
922 integerPart
subtractSignificand(const APFloat & rhs,integerPart borrow)923 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
924 {
925 integerPart *parts;
926
927 parts = significandParts();
928
929 assert(semantics == rhs.semantics);
930 assert(exponent == rhs.exponent);
931
932 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
933 partCount());
934 }
935
936 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
937 on to the full-precision result of the multiplication. Returns the
938 lost fraction. */
939 lostFraction
multiplySignificand(const APFloat & rhs,const APFloat * addend)940 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
941 {
942 unsigned int omsb; // One, not zero, based MSB.
943 unsigned int partsCount, newPartsCount, precision;
944 integerPart *lhsSignificand;
945 integerPart scratch[4];
946 integerPart *fullSignificand;
947 lostFraction lost_fraction;
948 bool ignored;
949
950 assert(semantics == rhs.semantics);
951
952 precision = semantics->precision;
953
954 // Allocate space for twice as many bits as the original significand, plus one
955 // extra bit for the addition to overflow into.
956 newPartsCount = partCountForBits(precision * 2 + 1);
957
958 if (newPartsCount > 4)
959 fullSignificand = new integerPart[newPartsCount];
960 else
961 fullSignificand = scratch;
962
963 lhsSignificand = significandParts();
964 partsCount = partCount();
965
966 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
967 rhs.significandParts(), partsCount, partsCount);
968
969 lost_fraction = lfExactlyZero;
970 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
971 exponent += rhs.exponent;
972
973 // Assume the operands involved in the multiplication are single-precision
974 // FP, and the two multiplicants are:
975 // *this = a23 . a22 ... a0 * 2^e1
976 // rhs = b23 . b22 ... b0 * 2^e2
977 // the result of multiplication is:
978 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
979 // Note that there are three significant bits at the left-hand side of the
980 // radix point: two for the multiplication, and an overflow bit for the
981 // addition (that will always be zero at this point). Move the radix point
982 // toward left by two bits, and adjust exponent accordingly.
983 exponent += 2;
984
985 if (addend && addend->isNonZero()) {
986 // The intermediate result of the multiplication has "2 * precision"
987 // signicant bit; adjust the addend to be consistent with mul result.
988 //
989 Significand savedSignificand = significand;
990 const fltSemantics *savedSemantics = semantics;
991 fltSemantics extendedSemantics;
992 opStatus status;
993 unsigned int extendedPrecision;
994
995 // Normalize our MSB to one below the top bit to allow for overflow.
996 extendedPrecision = 2 * precision + 1;
997 if (omsb != extendedPrecision - 1) {
998 assert(extendedPrecision > omsb);
999 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1000 (extendedPrecision - 1) - omsb);
1001 exponent -= (extendedPrecision - 1) - omsb;
1002 }
1003
1004 /* Create new semantics. */
1005 extendedSemantics = *semantics;
1006 extendedSemantics.precision = extendedPrecision;
1007
1008 if (newPartsCount == 1)
1009 significand.part = fullSignificand[0];
1010 else
1011 significand.parts = fullSignificand;
1012 semantics = &extendedSemantics;
1013
1014 APFloat extendedAddend(*addend);
1015 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1016 assert(status == opOK);
1017 (void)status;
1018
1019 // Shift the significand of the addend right by one bit. This guarantees
1020 // that the high bit of the significand is zero (same as fullSignificand),
1021 // so the addition will overflow (if it does overflow at all) into the top bit.
1022 lost_fraction = extendedAddend.shiftSignificandRight(1);
1023 assert(lost_fraction == lfExactlyZero &&
1024 "Lost precision while shifting addend for fused-multiply-add.");
1025
1026 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1027
1028 /* Restore our state. */
1029 if (newPartsCount == 1)
1030 fullSignificand[0] = significand.part;
1031 significand = savedSignificand;
1032 semantics = savedSemantics;
1033
1034 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1035 }
1036
1037 // Convert the result having "2 * precision" significant-bits back to the one
1038 // having "precision" significant-bits. First, move the radix point from
1039 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1040 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1041 exponent -= precision + 1;
1042
1043 // In case MSB resides at the left-hand side of radix point, shift the
1044 // mantissa right by some amount to make sure the MSB reside right before
1045 // the radix point (i.e. "MSB . rest-significant-bits").
1046 //
1047 // Note that the result is not normalized when "omsb < precision". So, the
1048 // caller needs to call APFloat::normalize() if normalized value is expected.
1049 if (omsb > precision) {
1050 unsigned int bits, significantParts;
1051 lostFraction lf;
1052
1053 bits = omsb - precision;
1054 significantParts = partCountForBits(omsb);
1055 lf = shiftRight(fullSignificand, significantParts, bits);
1056 lost_fraction = combineLostFractions(lf, lost_fraction);
1057 exponent += bits;
1058 }
1059
1060 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1061
1062 if (newPartsCount > 4)
1063 delete [] fullSignificand;
1064
1065 return lost_fraction;
1066 }
1067
1068 /* Multiply the significands of LHS and RHS to DST. */
1069 lostFraction
divideSignificand(const APFloat & rhs)1070 APFloat::divideSignificand(const APFloat &rhs)
1071 {
1072 unsigned int bit, i, partsCount;
1073 const integerPart *rhsSignificand;
1074 integerPart *lhsSignificand, *dividend, *divisor;
1075 integerPart scratch[4];
1076 lostFraction lost_fraction;
1077
1078 assert(semantics == rhs.semantics);
1079
1080 lhsSignificand = significandParts();
1081 rhsSignificand = rhs.significandParts();
1082 partsCount = partCount();
1083
1084 if (partsCount > 2)
1085 dividend = new integerPart[partsCount * 2];
1086 else
1087 dividend = scratch;
1088
1089 divisor = dividend + partsCount;
1090
1091 /* Copy the dividend and divisor as they will be modified in-place. */
1092 for (i = 0; i < partsCount; i++) {
1093 dividend[i] = lhsSignificand[i];
1094 divisor[i] = rhsSignificand[i];
1095 lhsSignificand[i] = 0;
1096 }
1097
1098 exponent -= rhs.exponent;
1099
1100 unsigned int precision = semantics->precision;
1101
1102 /* Normalize the divisor. */
1103 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1104 if (bit) {
1105 exponent += bit;
1106 APInt::tcShiftLeft(divisor, partsCount, bit);
1107 }
1108
1109 /* Normalize the dividend. */
1110 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1111 if (bit) {
1112 exponent -= bit;
1113 APInt::tcShiftLeft(dividend, partsCount, bit);
1114 }
1115
1116 /* Ensure the dividend >= divisor initially for the loop below.
1117 Incidentally, this means that the division loop below is
1118 guaranteed to set the integer bit to one. */
1119 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1120 exponent--;
1121 APInt::tcShiftLeft(dividend, partsCount, 1);
1122 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1123 }
1124
1125 /* Long division. */
1126 for (bit = precision; bit; bit -= 1) {
1127 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1128 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1129 APInt::tcSetBit(lhsSignificand, bit - 1);
1130 }
1131
1132 APInt::tcShiftLeft(dividend, partsCount, 1);
1133 }
1134
1135 /* Figure out the lost fraction. */
1136 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1137
1138 if (cmp > 0)
1139 lost_fraction = lfMoreThanHalf;
1140 else if (cmp == 0)
1141 lost_fraction = lfExactlyHalf;
1142 else if (APInt::tcIsZero(dividend, partsCount))
1143 lost_fraction = lfExactlyZero;
1144 else
1145 lost_fraction = lfLessThanHalf;
1146
1147 if (partsCount > 2)
1148 delete [] dividend;
1149
1150 return lost_fraction;
1151 }
1152
1153 unsigned int
significandMSB() const1154 APFloat::significandMSB() const
1155 {
1156 return APInt::tcMSB(significandParts(), partCount());
1157 }
1158
1159 unsigned int
significandLSB() const1160 APFloat::significandLSB() const
1161 {
1162 return APInt::tcLSB(significandParts(), partCount());
1163 }
1164
1165 /* Note that a zero result is NOT normalized to fcZero. */
1166 lostFraction
shiftSignificandRight(unsigned int bits)1167 APFloat::shiftSignificandRight(unsigned int bits)
1168 {
1169 /* Our exponent should not overflow. */
1170 assert((ExponentType) (exponent + bits) >= exponent);
1171
1172 exponent += bits;
1173
1174 return shiftRight(significandParts(), partCount(), bits);
1175 }
1176
1177 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1178 void
shiftSignificandLeft(unsigned int bits)1179 APFloat::shiftSignificandLeft(unsigned int bits)
1180 {
1181 assert(bits < semantics->precision);
1182
1183 if (bits) {
1184 unsigned int partsCount = partCount();
1185
1186 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1187 exponent -= bits;
1188
1189 assert(!APInt::tcIsZero(significandParts(), partsCount));
1190 }
1191 }
1192
1193 APFloat::cmpResult
compareAbsoluteValue(const APFloat & rhs) const1194 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1195 {
1196 int compare;
1197
1198 assert(semantics == rhs.semantics);
1199 assert(isFiniteNonZero());
1200 assert(rhs.isFiniteNonZero());
1201
1202 compare = exponent - rhs.exponent;
1203
1204 /* If exponents are equal, do an unsigned bignum comparison of the
1205 significands. */
1206 if (compare == 0)
1207 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1208 partCount());
1209
1210 if (compare > 0)
1211 return cmpGreaterThan;
1212 else if (compare < 0)
1213 return cmpLessThan;
1214 else
1215 return cmpEqual;
1216 }
1217
1218 /* Handle overflow. Sign is preserved. We either become infinity or
1219 the largest finite number. */
1220 APFloat::opStatus
handleOverflow(roundingMode rounding_mode)1221 APFloat::handleOverflow(roundingMode rounding_mode)
1222 {
1223 /* Infinity? */
1224 if (rounding_mode == rmNearestTiesToEven ||
1225 rounding_mode == rmNearestTiesToAway ||
1226 (rounding_mode == rmTowardPositive && !sign) ||
1227 (rounding_mode == rmTowardNegative && sign)) {
1228 category = fcInfinity;
1229 return (opStatus) (opOverflow | opInexact);
1230 }
1231
1232 /* Otherwise we become the largest finite number. */
1233 category = fcNormal;
1234 exponent = semantics->maxExponent;
1235 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1236 semantics->precision);
1237
1238 return opInexact;
1239 }
1240
1241 /* Returns TRUE if, when truncating the current number, with BIT the
1242 new LSB, with the given lost fraction and rounding mode, the result
1243 would need to be rounded away from zero (i.e., by increasing the
1244 signficand). This routine must work for fcZero of both signs, and
1245 fcNormal numbers. */
1246 bool
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1247 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1248 lostFraction lost_fraction,
1249 unsigned int bit) const
1250 {
1251 /* NaNs and infinities should not have lost fractions. */
1252 assert(isFiniteNonZero() || category == fcZero);
1253
1254 /* Current callers never pass this so we don't handle it. */
1255 assert(lost_fraction != lfExactlyZero);
1256
1257 switch (rounding_mode) {
1258 case rmNearestTiesToAway:
1259 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1260
1261 case rmNearestTiesToEven:
1262 if (lost_fraction == lfMoreThanHalf)
1263 return true;
1264
1265 /* Our zeroes don't have a significand to test. */
1266 if (lost_fraction == lfExactlyHalf && category != fcZero)
1267 return APInt::tcExtractBit(significandParts(), bit);
1268
1269 return false;
1270
1271 case rmTowardZero:
1272 return false;
1273
1274 case rmTowardPositive:
1275 return !sign;
1276
1277 case rmTowardNegative:
1278 return sign;
1279 }
1280 llvm_unreachable("Invalid rounding mode found");
1281 }
1282
1283 APFloat::opStatus
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1284 APFloat::normalize(roundingMode rounding_mode,
1285 lostFraction lost_fraction)
1286 {
1287 unsigned int omsb; /* One, not zero, based MSB. */
1288 int exponentChange;
1289
1290 if (!isFiniteNonZero())
1291 return opOK;
1292
1293 /* Before rounding normalize the exponent of fcNormal numbers. */
1294 omsb = significandMSB() + 1;
1295
1296 if (omsb) {
1297 /* OMSB is numbered from 1. We want to place it in the integer
1298 bit numbered PRECISION if possible, with a compensating change in
1299 the exponent. */
1300 exponentChange = omsb - semantics->precision;
1301
1302 /* If the resulting exponent is too high, overflow according to
1303 the rounding mode. */
1304 if (exponent + exponentChange > semantics->maxExponent)
1305 return handleOverflow(rounding_mode);
1306
1307 /* Subnormal numbers have exponent minExponent, and their MSB
1308 is forced based on that. */
1309 if (exponent + exponentChange < semantics->minExponent)
1310 exponentChange = semantics->minExponent - exponent;
1311
1312 /* Shifting left is easy as we don't lose precision. */
1313 if (exponentChange < 0) {
1314 assert(lost_fraction == lfExactlyZero);
1315
1316 shiftSignificandLeft(-exponentChange);
1317
1318 return opOK;
1319 }
1320
1321 if (exponentChange > 0) {
1322 lostFraction lf;
1323
1324 /* Shift right and capture any new lost fraction. */
1325 lf = shiftSignificandRight(exponentChange);
1326
1327 lost_fraction = combineLostFractions(lf, lost_fraction);
1328
1329 /* Keep OMSB up-to-date. */
1330 if (omsb > (unsigned) exponentChange)
1331 omsb -= exponentChange;
1332 else
1333 omsb = 0;
1334 }
1335 }
1336
1337 /* Now round the number according to rounding_mode given the lost
1338 fraction. */
1339
1340 /* As specified in IEEE 754, since we do not trap we do not report
1341 underflow for exact results. */
1342 if (lost_fraction == lfExactlyZero) {
1343 /* Canonicalize zeroes. */
1344 if (omsb == 0)
1345 category = fcZero;
1346
1347 return opOK;
1348 }
1349
1350 /* Increment the significand if we're rounding away from zero. */
1351 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1352 if (omsb == 0)
1353 exponent = semantics->minExponent;
1354
1355 incrementSignificand();
1356 omsb = significandMSB() + 1;
1357
1358 /* Did the significand increment overflow? */
1359 if (omsb == (unsigned) semantics->precision + 1) {
1360 /* Renormalize by incrementing the exponent and shifting our
1361 significand right one. However if we already have the
1362 maximum exponent we overflow to infinity. */
1363 if (exponent == semantics->maxExponent) {
1364 category = fcInfinity;
1365
1366 return (opStatus) (opOverflow | opInexact);
1367 }
1368
1369 shiftSignificandRight(1);
1370
1371 return opInexact;
1372 }
1373 }
1374
1375 /* The normal case - we were and are not denormal, and any
1376 significand increment above didn't overflow. */
1377 if (omsb == semantics->precision)
1378 return opInexact;
1379
1380 /* We have a non-zero denormal. */
1381 assert(omsb < semantics->precision);
1382
1383 /* Canonicalize zeroes. */
1384 if (omsb == 0)
1385 category = fcZero;
1386
1387 /* The fcZero case is a denormal that underflowed to zero. */
1388 return (opStatus) (opUnderflow | opInexact);
1389 }
1390
1391 APFloat::opStatus
addOrSubtractSpecials(const APFloat & rhs,bool subtract)1392 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1393 {
1394 switch (PackCategoriesIntoKey(category, rhs.category)) {
1395 default:
1396 llvm_unreachable(nullptr);
1397
1398 case PackCategoriesIntoKey(fcNaN, fcZero):
1399 case PackCategoriesIntoKey(fcNaN, fcNormal):
1400 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1401 case PackCategoriesIntoKey(fcNaN, fcNaN):
1402 case PackCategoriesIntoKey(fcNormal, fcZero):
1403 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1404 case PackCategoriesIntoKey(fcInfinity, fcZero):
1405 return opOK;
1406
1407 case PackCategoriesIntoKey(fcZero, fcNaN):
1408 case PackCategoriesIntoKey(fcNormal, fcNaN):
1409 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1410 // We need to be sure to flip the sign here for subtraction because we
1411 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1412 sign = rhs.sign ^ subtract;
1413 category = fcNaN;
1414 copySignificand(rhs);
1415 return opOK;
1416
1417 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1418 case PackCategoriesIntoKey(fcZero, fcInfinity):
1419 category = fcInfinity;
1420 sign = rhs.sign ^ subtract;
1421 return opOK;
1422
1423 case PackCategoriesIntoKey(fcZero, fcNormal):
1424 assign(rhs);
1425 sign = rhs.sign ^ subtract;
1426 return opOK;
1427
1428 case PackCategoriesIntoKey(fcZero, fcZero):
1429 /* Sign depends on rounding mode; handled by caller. */
1430 return opOK;
1431
1432 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1433 /* Differently signed infinities can only be validly
1434 subtracted. */
1435 if (((sign ^ rhs.sign)!=0) != subtract) {
1436 makeNaN();
1437 return opInvalidOp;
1438 }
1439
1440 return opOK;
1441
1442 case PackCategoriesIntoKey(fcNormal, fcNormal):
1443 return opDivByZero;
1444 }
1445 }
1446
1447 /* Add or subtract two normal numbers. */
1448 lostFraction
addOrSubtractSignificand(const APFloat & rhs,bool subtract)1449 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1450 {
1451 integerPart carry;
1452 lostFraction lost_fraction;
1453 int bits;
1454
1455 /* Determine if the operation on the absolute values is effectively
1456 an addition or subtraction. */
1457 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1458
1459 /* Are we bigger exponent-wise than the RHS? */
1460 bits = exponent - rhs.exponent;
1461
1462 /* Subtraction is more subtle than one might naively expect. */
1463 if (subtract) {
1464 APFloat temp_rhs(rhs);
1465 bool reverse;
1466
1467 if (bits == 0) {
1468 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1469 lost_fraction = lfExactlyZero;
1470 } else if (bits > 0) {
1471 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1472 shiftSignificandLeft(1);
1473 reverse = false;
1474 } else {
1475 lost_fraction = shiftSignificandRight(-bits - 1);
1476 temp_rhs.shiftSignificandLeft(1);
1477 reverse = true;
1478 }
1479
1480 if (reverse) {
1481 carry = temp_rhs.subtractSignificand
1482 (*this, lost_fraction != lfExactlyZero);
1483 copySignificand(temp_rhs);
1484 sign = !sign;
1485 } else {
1486 carry = subtractSignificand
1487 (temp_rhs, lost_fraction != lfExactlyZero);
1488 }
1489
1490 /* Invert the lost fraction - it was on the RHS and
1491 subtracted. */
1492 if (lost_fraction == lfLessThanHalf)
1493 lost_fraction = lfMoreThanHalf;
1494 else if (lost_fraction == lfMoreThanHalf)
1495 lost_fraction = lfLessThanHalf;
1496
1497 /* The code above is intended to ensure that no borrow is
1498 necessary. */
1499 assert(!carry);
1500 (void)carry;
1501 } else {
1502 if (bits > 0) {
1503 APFloat temp_rhs(rhs);
1504
1505 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1506 carry = addSignificand(temp_rhs);
1507 } else {
1508 lost_fraction = shiftSignificandRight(-bits);
1509 carry = addSignificand(rhs);
1510 }
1511
1512 /* We have a guard bit; generating a carry cannot happen. */
1513 assert(!carry);
1514 (void)carry;
1515 }
1516
1517 return lost_fraction;
1518 }
1519
1520 APFloat::opStatus
multiplySpecials(const APFloat & rhs)1521 APFloat::multiplySpecials(const APFloat &rhs)
1522 {
1523 switch (PackCategoriesIntoKey(category, rhs.category)) {
1524 default:
1525 llvm_unreachable(nullptr);
1526
1527 case PackCategoriesIntoKey(fcNaN, fcZero):
1528 case PackCategoriesIntoKey(fcNaN, fcNormal):
1529 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1530 case PackCategoriesIntoKey(fcNaN, fcNaN):
1531 sign = false;
1532 return opOK;
1533
1534 case PackCategoriesIntoKey(fcZero, fcNaN):
1535 case PackCategoriesIntoKey(fcNormal, fcNaN):
1536 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1537 sign = false;
1538 category = fcNaN;
1539 copySignificand(rhs);
1540 return opOK;
1541
1542 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1543 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1544 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1545 category = fcInfinity;
1546 return opOK;
1547
1548 case PackCategoriesIntoKey(fcZero, fcNormal):
1549 case PackCategoriesIntoKey(fcNormal, fcZero):
1550 case PackCategoriesIntoKey(fcZero, fcZero):
1551 category = fcZero;
1552 return opOK;
1553
1554 case PackCategoriesIntoKey(fcZero, fcInfinity):
1555 case PackCategoriesIntoKey(fcInfinity, fcZero):
1556 makeNaN();
1557 return opInvalidOp;
1558
1559 case PackCategoriesIntoKey(fcNormal, fcNormal):
1560 return opOK;
1561 }
1562 }
1563
1564 APFloat::opStatus
divideSpecials(const APFloat & rhs)1565 APFloat::divideSpecials(const APFloat &rhs)
1566 {
1567 switch (PackCategoriesIntoKey(category, rhs.category)) {
1568 default:
1569 llvm_unreachable(nullptr);
1570
1571 case PackCategoriesIntoKey(fcZero, fcNaN):
1572 case PackCategoriesIntoKey(fcNormal, fcNaN):
1573 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1574 category = fcNaN;
1575 copySignificand(rhs);
1576 case PackCategoriesIntoKey(fcNaN, fcZero):
1577 case PackCategoriesIntoKey(fcNaN, fcNormal):
1578 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1579 case PackCategoriesIntoKey(fcNaN, fcNaN):
1580 sign = false;
1581 case PackCategoriesIntoKey(fcInfinity, fcZero):
1582 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1583 case PackCategoriesIntoKey(fcZero, fcInfinity):
1584 case PackCategoriesIntoKey(fcZero, fcNormal):
1585 return opOK;
1586
1587 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1588 category = fcZero;
1589 return opOK;
1590
1591 case PackCategoriesIntoKey(fcNormal, fcZero):
1592 category = fcInfinity;
1593 return opDivByZero;
1594
1595 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1596 case PackCategoriesIntoKey(fcZero, fcZero):
1597 makeNaN();
1598 return opInvalidOp;
1599
1600 case PackCategoriesIntoKey(fcNormal, fcNormal):
1601 return opOK;
1602 }
1603 }
1604
1605 APFloat::opStatus
modSpecials(const APFloat & rhs)1606 APFloat::modSpecials(const APFloat &rhs)
1607 {
1608 switch (PackCategoriesIntoKey(category, rhs.category)) {
1609 default:
1610 llvm_unreachable(nullptr);
1611
1612 case PackCategoriesIntoKey(fcNaN, fcZero):
1613 case PackCategoriesIntoKey(fcNaN, fcNormal):
1614 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1615 case PackCategoriesIntoKey(fcNaN, fcNaN):
1616 case PackCategoriesIntoKey(fcZero, fcInfinity):
1617 case PackCategoriesIntoKey(fcZero, fcNormal):
1618 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1619 return opOK;
1620
1621 case PackCategoriesIntoKey(fcZero, fcNaN):
1622 case PackCategoriesIntoKey(fcNormal, fcNaN):
1623 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1624 sign = false;
1625 category = fcNaN;
1626 copySignificand(rhs);
1627 return opOK;
1628
1629 case PackCategoriesIntoKey(fcNormal, fcZero):
1630 case PackCategoriesIntoKey(fcInfinity, fcZero):
1631 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1632 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1633 case PackCategoriesIntoKey(fcZero, fcZero):
1634 makeNaN();
1635 return opInvalidOp;
1636
1637 case PackCategoriesIntoKey(fcNormal, fcNormal):
1638 return opOK;
1639 }
1640 }
1641
1642 /* Change sign. */
1643 void
changeSign()1644 APFloat::changeSign()
1645 {
1646 /* Look mummy, this one's easy. */
1647 sign = !sign;
1648 }
1649
1650 void
clearSign()1651 APFloat::clearSign()
1652 {
1653 /* So is this one. */
1654 sign = 0;
1655 }
1656
1657 void
copySign(const APFloat & rhs)1658 APFloat::copySign(const APFloat &rhs)
1659 {
1660 /* And this one. */
1661 sign = rhs.sign;
1662 }
1663
1664 /* Normalized addition or subtraction. */
1665 APFloat::opStatus
addOrSubtract(const APFloat & rhs,roundingMode rounding_mode,bool subtract)1666 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1667 bool subtract)
1668 {
1669 opStatus fs;
1670
1671 fs = addOrSubtractSpecials(rhs, subtract);
1672
1673 /* This return code means it was not a simple case. */
1674 if (fs == opDivByZero) {
1675 lostFraction lost_fraction;
1676
1677 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1678 fs = normalize(rounding_mode, lost_fraction);
1679
1680 /* Can only be zero if we lost no fraction. */
1681 assert(category != fcZero || lost_fraction == lfExactlyZero);
1682 }
1683
1684 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1685 positive zero unless rounding to minus infinity, except that
1686 adding two like-signed zeroes gives that zero. */
1687 if (category == fcZero) {
1688 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1689 sign = (rounding_mode == rmTowardNegative);
1690 }
1691
1692 return fs;
1693 }
1694
1695 /* Normalized addition. */
1696 APFloat::opStatus
add(const APFloat & rhs,roundingMode rounding_mode)1697 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1698 {
1699 return addOrSubtract(rhs, rounding_mode, false);
1700 }
1701
1702 /* Normalized subtraction. */
1703 APFloat::opStatus
subtract(const APFloat & rhs,roundingMode rounding_mode)1704 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1705 {
1706 return addOrSubtract(rhs, rounding_mode, true);
1707 }
1708
1709 /* Normalized multiply. */
1710 APFloat::opStatus
multiply(const APFloat & rhs,roundingMode rounding_mode)1711 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1712 {
1713 opStatus fs;
1714
1715 sign ^= rhs.sign;
1716 fs = multiplySpecials(rhs);
1717
1718 if (isFiniteNonZero()) {
1719 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1720 fs = normalize(rounding_mode, lost_fraction);
1721 if (lost_fraction != lfExactlyZero)
1722 fs = (opStatus) (fs | opInexact);
1723 }
1724
1725 return fs;
1726 }
1727
1728 /* Normalized divide. */
1729 APFloat::opStatus
divide(const APFloat & rhs,roundingMode rounding_mode)1730 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1731 {
1732 opStatus fs;
1733
1734 sign ^= rhs.sign;
1735 fs = divideSpecials(rhs);
1736
1737 if (isFiniteNonZero()) {
1738 lostFraction lost_fraction = divideSignificand(rhs);
1739 fs = normalize(rounding_mode, lost_fraction);
1740 if (lost_fraction != lfExactlyZero)
1741 fs = (opStatus) (fs | opInexact);
1742 }
1743
1744 return fs;
1745 }
1746
1747 /* Normalized remainder. This is not currently correct in all cases. */
1748 APFloat::opStatus
remainder(const APFloat & rhs)1749 APFloat::remainder(const APFloat &rhs)
1750 {
1751 opStatus fs;
1752 APFloat V = *this;
1753 unsigned int origSign = sign;
1754
1755 fs = V.divide(rhs, rmNearestTiesToEven);
1756 if (fs == opDivByZero)
1757 return fs;
1758
1759 int parts = partCount();
1760 integerPart *x = new integerPart[parts];
1761 bool ignored;
1762 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1763 rmNearestTiesToEven, &ignored);
1764 if (fs==opInvalidOp)
1765 return fs;
1766
1767 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1768 rmNearestTiesToEven);
1769 assert(fs==opOK); // should always work
1770
1771 fs = V.multiply(rhs, rmNearestTiesToEven);
1772 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1773
1774 fs = subtract(V, rmNearestTiesToEven);
1775 assert(fs==opOK || fs==opInexact); // likewise
1776
1777 if (isZero())
1778 sign = origSign; // IEEE754 requires this
1779 delete[] x;
1780 return fs;
1781 }
1782
1783 /* Normalized llvm frem (C fmod).
1784 This is not currently correct in all cases. */
1785 APFloat::opStatus
mod(const APFloat & rhs)1786 APFloat::mod(const APFloat &rhs)
1787 {
1788 opStatus fs;
1789 fs = modSpecials(rhs);
1790
1791 if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1792 APFloat V = *this;
1793 unsigned int origSign = sign;
1794
1795 fs = V.divide(rhs, rmNearestTiesToEven);
1796 if (fs == opDivByZero)
1797 return fs;
1798
1799 int parts = partCount();
1800 integerPart *x = new integerPart[parts];
1801 bool ignored;
1802 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1803 rmTowardZero, &ignored);
1804 if (fs==opInvalidOp)
1805 return fs;
1806
1807 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1808 rmNearestTiesToEven);
1809 assert(fs==opOK); // should always work
1810
1811 fs = V.multiply(rhs, rmNearestTiesToEven);
1812 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1813
1814 fs = subtract(V, rmNearestTiesToEven);
1815 assert(fs==opOK || fs==opInexact); // likewise
1816
1817 if (isZero())
1818 sign = origSign; // IEEE754 requires this
1819 delete[] x;
1820 }
1821 return fs;
1822 }
1823
1824 /* Normalized fused-multiply-add. */
1825 APFloat::opStatus
fusedMultiplyAdd(const APFloat & multiplicand,const APFloat & addend,roundingMode rounding_mode)1826 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1827 const APFloat &addend,
1828 roundingMode rounding_mode)
1829 {
1830 opStatus fs;
1831
1832 /* Post-multiplication sign, before addition. */
1833 sign ^= multiplicand.sign;
1834
1835 /* If and only if all arguments are normal do we need to do an
1836 extended-precision calculation. */
1837 if (isFiniteNonZero() &&
1838 multiplicand.isFiniteNonZero() &&
1839 addend.isFinite()) {
1840 lostFraction lost_fraction;
1841
1842 lost_fraction = multiplySignificand(multiplicand, &addend);
1843 fs = normalize(rounding_mode, lost_fraction);
1844 if (lost_fraction != lfExactlyZero)
1845 fs = (opStatus) (fs | opInexact);
1846
1847 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1848 positive zero unless rounding to minus infinity, except that
1849 adding two like-signed zeroes gives that zero. */
1850 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1851 sign = (rounding_mode == rmTowardNegative);
1852 } else {
1853 fs = multiplySpecials(multiplicand);
1854
1855 /* FS can only be opOK or opInvalidOp. There is no more work
1856 to do in the latter case. The IEEE-754R standard says it is
1857 implementation-defined in this case whether, if ADDEND is a
1858 quiet NaN, we raise invalid op; this implementation does so.
1859
1860 If we need to do the addition we can do so with normal
1861 precision. */
1862 if (fs == opOK)
1863 fs = addOrSubtract(addend, rounding_mode, false);
1864 }
1865
1866 return fs;
1867 }
1868
1869 /* Rounding-mode corrrect round to integral value. */
roundToIntegral(roundingMode rounding_mode)1870 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1871 opStatus fs;
1872
1873 // If the exponent is large enough, we know that this value is already
1874 // integral, and the arithmetic below would potentially cause it to saturate
1875 // to +/-Inf. Bail out early instead.
1876 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1877 return opOK;
1878
1879 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1880 // precision of our format, and then subtract it back off again. The choice
1881 // of rounding modes for the addition/subtraction determines the rounding mode
1882 // for our integral rounding as well.
1883 // NOTE: When the input value is negative, we do subtraction followed by
1884 // addition instead.
1885 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1886 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1887 APFloat MagicConstant(*semantics);
1888 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1889 rmNearestTiesToEven);
1890 MagicConstant.copySign(*this);
1891
1892 if (fs != opOK)
1893 return fs;
1894
1895 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1896 bool inputSign = isNegative();
1897
1898 fs = add(MagicConstant, rounding_mode);
1899 if (fs != opOK && fs != opInexact)
1900 return fs;
1901
1902 fs = subtract(MagicConstant, rounding_mode);
1903
1904 // Restore the input sign.
1905 if (inputSign != isNegative())
1906 changeSign();
1907
1908 return fs;
1909 }
1910
1911
1912 /* Comparison requires normalized numbers. */
1913 APFloat::cmpResult
compare(const APFloat & rhs) const1914 APFloat::compare(const APFloat &rhs) const
1915 {
1916 cmpResult result;
1917
1918 assert(semantics == rhs.semantics);
1919
1920 switch (PackCategoriesIntoKey(category, rhs.category)) {
1921 default:
1922 llvm_unreachable(nullptr);
1923
1924 case PackCategoriesIntoKey(fcNaN, fcZero):
1925 case PackCategoriesIntoKey(fcNaN, fcNormal):
1926 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1927 case PackCategoriesIntoKey(fcNaN, fcNaN):
1928 case PackCategoriesIntoKey(fcZero, fcNaN):
1929 case PackCategoriesIntoKey(fcNormal, fcNaN):
1930 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1931 return cmpUnordered;
1932
1933 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1934 case PackCategoriesIntoKey(fcInfinity, fcZero):
1935 case PackCategoriesIntoKey(fcNormal, fcZero):
1936 if (sign)
1937 return cmpLessThan;
1938 else
1939 return cmpGreaterThan;
1940
1941 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1942 case PackCategoriesIntoKey(fcZero, fcInfinity):
1943 case PackCategoriesIntoKey(fcZero, fcNormal):
1944 if (rhs.sign)
1945 return cmpGreaterThan;
1946 else
1947 return cmpLessThan;
1948
1949 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1950 if (sign == rhs.sign)
1951 return cmpEqual;
1952 else if (sign)
1953 return cmpLessThan;
1954 else
1955 return cmpGreaterThan;
1956
1957 case PackCategoriesIntoKey(fcZero, fcZero):
1958 return cmpEqual;
1959
1960 case PackCategoriesIntoKey(fcNormal, fcNormal):
1961 break;
1962 }
1963
1964 /* Two normal numbers. Do they have the same sign? */
1965 if (sign != rhs.sign) {
1966 if (sign)
1967 result = cmpLessThan;
1968 else
1969 result = cmpGreaterThan;
1970 } else {
1971 /* Compare absolute values; invert result if negative. */
1972 result = compareAbsoluteValue(rhs);
1973
1974 if (sign) {
1975 if (result == cmpLessThan)
1976 result = cmpGreaterThan;
1977 else if (result == cmpGreaterThan)
1978 result = cmpLessThan;
1979 }
1980 }
1981
1982 return result;
1983 }
1984
1985 /// APFloat::convert - convert a value of one floating point type to another.
1986 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1987 /// records whether the transformation lost information, i.e. whether
1988 /// converting the result back to the original type will produce the
1989 /// original value (this is almost the same as return value==fsOK, but there
1990 /// are edge cases where this is not so).
1991
1992 APFloat::opStatus
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1993 APFloat::convert(const fltSemantics &toSemantics,
1994 roundingMode rounding_mode, bool *losesInfo)
1995 {
1996 lostFraction lostFraction;
1997 unsigned int newPartCount, oldPartCount;
1998 opStatus fs;
1999 int shift;
2000 const fltSemantics &fromSemantics = *semantics;
2001
2002 lostFraction = lfExactlyZero;
2003 newPartCount = partCountForBits(toSemantics.precision + 1);
2004 oldPartCount = partCount();
2005 shift = toSemantics.precision - fromSemantics.precision;
2006
2007 bool X86SpecialNan = false;
2008 if (&fromSemantics == &APFloat::x87DoubleExtended &&
2009 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
2010 (!(*significandParts() & 0x8000000000000000ULL) ||
2011 !(*significandParts() & 0x4000000000000000ULL))) {
2012 // x86 has some unusual NaNs which cannot be represented in any other
2013 // format; note them here.
2014 X86SpecialNan = true;
2015 }
2016
2017 // If this is a truncation of a denormal number, and the target semantics
2018 // has larger exponent range than the source semantics (this can happen
2019 // when truncating from PowerPC double-double to double format), the
2020 // right shift could lose result mantissa bits. Adjust exponent instead
2021 // of performing excessive shift.
2022 if (shift < 0 && isFiniteNonZero()) {
2023 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2024 if (exponent + exponentChange < toSemantics.minExponent)
2025 exponentChange = toSemantics.minExponent - exponent;
2026 if (exponentChange < shift)
2027 exponentChange = shift;
2028 if (exponentChange < 0) {
2029 shift -= exponentChange;
2030 exponent += exponentChange;
2031 }
2032 }
2033
2034 // If this is a truncation, perform the shift before we narrow the storage.
2035 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2036 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2037
2038 // Fix the storage so it can hold to new value.
2039 if (newPartCount > oldPartCount) {
2040 // The new type requires more storage; make it available.
2041 integerPart *newParts;
2042 newParts = new integerPart[newPartCount];
2043 APInt::tcSet(newParts, 0, newPartCount);
2044 if (isFiniteNonZero() || category==fcNaN)
2045 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2046 freeSignificand();
2047 significand.parts = newParts;
2048 } else if (newPartCount == 1 && oldPartCount != 1) {
2049 // Switch to built-in storage for a single part.
2050 integerPart newPart = 0;
2051 if (isFiniteNonZero() || category==fcNaN)
2052 newPart = significandParts()[0];
2053 freeSignificand();
2054 significand.part = newPart;
2055 }
2056
2057 // Now that we have the right storage, switch the semantics.
2058 semantics = &toSemantics;
2059
2060 // If this is an extension, perform the shift now that the storage is
2061 // available.
2062 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2063 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2064
2065 if (isFiniteNonZero()) {
2066 fs = normalize(rounding_mode, lostFraction);
2067 *losesInfo = (fs != opOK);
2068 } else if (category == fcNaN) {
2069 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2070
2071 // For x87 extended precision, we want to make a NaN, not a special NaN if
2072 // the input wasn't special either.
2073 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2074 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2075
2076 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2077 // does not give you back the same bits. This is dubious, and we
2078 // don't currently do it. You're really supposed to get
2079 // an invalid operation signal at runtime, but nobody does that.
2080 fs = opOK;
2081 } else {
2082 *losesInfo = false;
2083 fs = opOK;
2084 }
2085
2086 return fs;
2087 }
2088
2089 /* Convert a floating point number to an integer according to the
2090 rounding mode. If the rounded integer value is out of range this
2091 returns an invalid operation exception and the contents of the
2092 destination parts are unspecified. If the rounded value is in
2093 range but the floating point number is not the exact integer, the C
2094 standard doesn't require an inexact exception to be raised. IEEE
2095 854 does require it so we do that.
2096
2097 Note that for conversions to integer type the C standard requires
2098 round-to-zero to always be used. */
2099 APFloat::opStatus
convertToSignExtendedInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2100 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2101 bool isSigned,
2102 roundingMode rounding_mode,
2103 bool *isExact) const
2104 {
2105 lostFraction lost_fraction;
2106 const integerPart *src;
2107 unsigned int dstPartsCount, truncatedBits;
2108
2109 *isExact = false;
2110
2111 /* Handle the three special cases first. */
2112 if (category == fcInfinity || category == fcNaN)
2113 return opInvalidOp;
2114
2115 dstPartsCount = partCountForBits(width);
2116
2117 if (category == fcZero) {
2118 APInt::tcSet(parts, 0, dstPartsCount);
2119 // Negative zero can't be represented as an int.
2120 *isExact = !sign;
2121 return opOK;
2122 }
2123
2124 src = significandParts();
2125
2126 /* Step 1: place our absolute value, with any fraction truncated, in
2127 the destination. */
2128 if (exponent < 0) {
2129 /* Our absolute value is less than one; truncate everything. */
2130 APInt::tcSet(parts, 0, dstPartsCount);
2131 /* For exponent -1 the integer bit represents .5, look at that.
2132 For smaller exponents leftmost truncated bit is 0. */
2133 truncatedBits = semantics->precision -1U - exponent;
2134 } else {
2135 /* We want the most significant (exponent + 1) bits; the rest are
2136 truncated. */
2137 unsigned int bits = exponent + 1U;
2138
2139 /* Hopelessly large in magnitude? */
2140 if (bits > width)
2141 return opInvalidOp;
2142
2143 if (bits < semantics->precision) {
2144 /* We truncate (semantics->precision - bits) bits. */
2145 truncatedBits = semantics->precision - bits;
2146 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2147 } else {
2148 /* We want at least as many bits as are available. */
2149 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2150 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2151 truncatedBits = 0;
2152 }
2153 }
2154
2155 /* Step 2: work out any lost fraction, and increment the absolute
2156 value if we would round away from zero. */
2157 if (truncatedBits) {
2158 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2159 truncatedBits);
2160 if (lost_fraction != lfExactlyZero &&
2161 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2162 if (APInt::tcIncrement(parts, dstPartsCount))
2163 return opInvalidOp; /* Overflow. */
2164 }
2165 } else {
2166 lost_fraction = lfExactlyZero;
2167 }
2168
2169 /* Step 3: check if we fit in the destination. */
2170 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2171
2172 if (sign) {
2173 if (!isSigned) {
2174 /* Negative numbers cannot be represented as unsigned. */
2175 if (omsb != 0)
2176 return opInvalidOp;
2177 } else {
2178 /* It takes omsb bits to represent the unsigned integer value.
2179 We lose a bit for the sign, but care is needed as the
2180 maximally negative integer is a special case. */
2181 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2182 return opInvalidOp;
2183
2184 /* This case can happen because of rounding. */
2185 if (omsb > width)
2186 return opInvalidOp;
2187 }
2188
2189 APInt::tcNegate (parts, dstPartsCount);
2190 } else {
2191 if (omsb >= width + !isSigned)
2192 return opInvalidOp;
2193 }
2194
2195 if (lost_fraction == lfExactlyZero) {
2196 *isExact = true;
2197 return opOK;
2198 } else
2199 return opInexact;
2200 }
2201
2202 /* Same as convertToSignExtendedInteger, except we provide
2203 deterministic values in case of an invalid operation exception,
2204 namely zero for NaNs and the minimal or maximal value respectively
2205 for underflow or overflow.
2206 The *isExact output tells whether the result is exact, in the sense
2207 that converting it back to the original floating point type produces
2208 the original value. This is almost equivalent to result==opOK,
2209 except for negative zeroes.
2210 */
2211 APFloat::opStatus
convertToInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2212 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2213 bool isSigned,
2214 roundingMode rounding_mode, bool *isExact) const
2215 {
2216 opStatus fs;
2217
2218 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2219 isExact);
2220
2221 if (fs == opInvalidOp) {
2222 unsigned int bits, dstPartsCount;
2223
2224 dstPartsCount = partCountForBits(width);
2225
2226 if (category == fcNaN)
2227 bits = 0;
2228 else if (sign)
2229 bits = isSigned;
2230 else
2231 bits = width - isSigned;
2232
2233 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2234 if (sign && isSigned)
2235 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2236 }
2237
2238 return fs;
2239 }
2240
2241 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2242 an APSInt, whose initial bit-width and signed-ness are used to determine the
2243 precision of the conversion.
2244 */
2245 APFloat::opStatus
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const2246 APFloat::convertToInteger(APSInt &result,
2247 roundingMode rounding_mode, bool *isExact) const
2248 {
2249 unsigned bitWidth = result.getBitWidth();
2250 SmallVector<uint64_t, 4> parts(result.getNumWords());
2251 opStatus status = convertToInteger(
2252 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2253 // Keeps the original signed-ness.
2254 result = APInt(bitWidth, parts);
2255 return status;
2256 }
2257
2258 /* Convert an unsigned integer SRC to a floating point number,
2259 rounding according to ROUNDING_MODE. The sign of the floating
2260 point number is not modified. */
2261 APFloat::opStatus
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2262 APFloat::convertFromUnsignedParts(const integerPart *src,
2263 unsigned int srcCount,
2264 roundingMode rounding_mode)
2265 {
2266 unsigned int omsb, precision, dstCount;
2267 integerPart *dst;
2268 lostFraction lost_fraction;
2269
2270 category = fcNormal;
2271 omsb = APInt::tcMSB(src, srcCount) + 1;
2272 dst = significandParts();
2273 dstCount = partCount();
2274 precision = semantics->precision;
2275
2276 /* We want the most significant PRECISION bits of SRC. There may not
2277 be that many; extract what we can. */
2278 if (precision <= omsb) {
2279 exponent = omsb - 1;
2280 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2281 omsb - precision);
2282 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2283 } else {
2284 exponent = precision - 1;
2285 lost_fraction = lfExactlyZero;
2286 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2287 }
2288
2289 return normalize(rounding_mode, lost_fraction);
2290 }
2291
2292 APFloat::opStatus
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2293 APFloat::convertFromAPInt(const APInt &Val,
2294 bool isSigned,
2295 roundingMode rounding_mode)
2296 {
2297 unsigned int partCount = Val.getNumWords();
2298 APInt api = Val;
2299
2300 sign = false;
2301 if (isSigned && api.isNegative()) {
2302 sign = true;
2303 api = -api;
2304 }
2305
2306 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2307 }
2308
2309 /* Convert a two's complement integer SRC to a floating point number,
2310 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2311 integer is signed, in which case it must be sign-extended. */
2312 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2313 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2314 unsigned int srcCount,
2315 bool isSigned,
2316 roundingMode rounding_mode)
2317 {
2318 opStatus status;
2319
2320 if (isSigned &&
2321 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2322 integerPart *copy;
2323
2324 /* If we're signed and negative negate a copy. */
2325 sign = true;
2326 copy = new integerPart[srcCount];
2327 APInt::tcAssign(copy, src, srcCount);
2328 APInt::tcNegate(copy, srcCount);
2329 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2330 delete [] copy;
2331 } else {
2332 sign = false;
2333 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2334 }
2335
2336 return status;
2337 }
2338
2339 /* FIXME: should this just take a const APInt reference? */
2340 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2341 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2342 unsigned int width, bool isSigned,
2343 roundingMode rounding_mode)
2344 {
2345 unsigned int partCount = partCountForBits(width);
2346 APInt api = APInt(width, makeArrayRef(parts, partCount));
2347
2348 sign = false;
2349 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2350 sign = true;
2351 api = -api;
2352 }
2353
2354 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2355 }
2356
2357 APFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2358 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2359 {
2360 lostFraction lost_fraction = lfExactlyZero;
2361
2362 category = fcNormal;
2363 zeroSignificand();
2364 exponent = 0;
2365
2366 integerPart *significand = significandParts();
2367 unsigned partsCount = partCount();
2368 unsigned bitPos = partsCount * integerPartWidth;
2369 bool computedTrailingFraction = false;
2370
2371 // Skip leading zeroes and any (hexa)decimal point.
2372 StringRef::iterator begin = s.begin();
2373 StringRef::iterator end = s.end();
2374 StringRef::iterator dot;
2375 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2376 StringRef::iterator firstSignificantDigit = p;
2377
2378 while (p != end) {
2379 integerPart hex_value;
2380
2381 if (*p == '.') {
2382 assert(dot == end && "String contains multiple dots");
2383 dot = p++;
2384 continue;
2385 }
2386
2387 hex_value = hexDigitValue(*p);
2388 if (hex_value == -1U)
2389 break;
2390
2391 p++;
2392
2393 // Store the number while we have space.
2394 if (bitPos) {
2395 bitPos -= 4;
2396 hex_value <<= bitPos % integerPartWidth;
2397 significand[bitPos / integerPartWidth] |= hex_value;
2398 } else if (!computedTrailingFraction) {
2399 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2400 computedTrailingFraction = true;
2401 }
2402 }
2403
2404 /* Hex floats require an exponent but not a hexadecimal point. */
2405 assert(p != end && "Hex strings require an exponent");
2406 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2407 assert(p != begin && "Significand has no digits");
2408 assert((dot == end || p - begin != 1) && "Significand has no digits");
2409
2410 /* Ignore the exponent if we are zero. */
2411 if (p != firstSignificantDigit) {
2412 int expAdjustment;
2413
2414 /* Implicit hexadecimal point? */
2415 if (dot == end)
2416 dot = p;
2417
2418 /* Calculate the exponent adjustment implicit in the number of
2419 significant digits. */
2420 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2421 if (expAdjustment < 0)
2422 expAdjustment++;
2423 expAdjustment = expAdjustment * 4 - 1;
2424
2425 /* Adjust for writing the significand starting at the most
2426 significant nibble. */
2427 expAdjustment += semantics->precision;
2428 expAdjustment -= partsCount * integerPartWidth;
2429
2430 /* Adjust for the given exponent. */
2431 exponent = totalExponent(p + 1, end, expAdjustment);
2432 }
2433
2434 return normalize(rounding_mode, lost_fraction);
2435 }
2436
2437 APFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2438 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2439 unsigned sigPartCount, int exp,
2440 roundingMode rounding_mode)
2441 {
2442 unsigned int parts, pow5PartCount;
2443 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2444 integerPart pow5Parts[maxPowerOfFiveParts];
2445 bool isNearest;
2446
2447 isNearest = (rounding_mode == rmNearestTiesToEven ||
2448 rounding_mode == rmNearestTiesToAway);
2449
2450 parts = partCountForBits(semantics->precision + 11);
2451
2452 /* Calculate pow(5, abs(exp)). */
2453 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2454
2455 for (;; parts *= 2) {
2456 opStatus sigStatus, powStatus;
2457 unsigned int excessPrecision, truncatedBits;
2458
2459 calcSemantics.precision = parts * integerPartWidth - 1;
2460 excessPrecision = calcSemantics.precision - semantics->precision;
2461 truncatedBits = excessPrecision;
2462
2463 APFloat decSig = APFloat::getZero(calcSemantics, sign);
2464 APFloat pow5(calcSemantics);
2465
2466 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2467 rmNearestTiesToEven);
2468 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2469 rmNearestTiesToEven);
2470 /* Add exp, as 10^n = 5^n * 2^n. */
2471 decSig.exponent += exp;
2472
2473 lostFraction calcLostFraction;
2474 integerPart HUerr, HUdistance;
2475 unsigned int powHUerr;
2476
2477 if (exp >= 0) {
2478 /* multiplySignificand leaves the precision-th bit set to 1. */
2479 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2480 powHUerr = powStatus != opOK;
2481 } else {
2482 calcLostFraction = decSig.divideSignificand(pow5);
2483 /* Denormal numbers have less precision. */
2484 if (decSig.exponent < semantics->minExponent) {
2485 excessPrecision += (semantics->minExponent - decSig.exponent);
2486 truncatedBits = excessPrecision;
2487 if (excessPrecision > calcSemantics.precision)
2488 excessPrecision = calcSemantics.precision;
2489 }
2490 /* Extra half-ulp lost in reciprocal of exponent. */
2491 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2492 }
2493
2494 /* Both multiplySignificand and divideSignificand return the
2495 result with the integer bit set. */
2496 assert(APInt::tcExtractBit
2497 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2498
2499 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2500 powHUerr);
2501 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2502 excessPrecision, isNearest);
2503
2504 /* Are we guaranteed to round correctly if we truncate? */
2505 if (HUdistance >= HUerr) {
2506 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2507 calcSemantics.precision - excessPrecision,
2508 excessPrecision);
2509 /* Take the exponent of decSig. If we tcExtract-ed less bits
2510 above we must adjust our exponent to compensate for the
2511 implicit right shift. */
2512 exponent = (decSig.exponent + semantics->precision
2513 - (calcSemantics.precision - excessPrecision));
2514 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2515 decSig.partCount(),
2516 truncatedBits);
2517 return normalize(rounding_mode, calcLostFraction);
2518 }
2519 }
2520 }
2521
2522 APFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2523 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2524 {
2525 decimalInfo D;
2526 opStatus fs;
2527
2528 /* Scan the text. */
2529 StringRef::iterator p = str.begin();
2530 interpretDecimal(p, str.end(), &D);
2531
2532 /* Handle the quick cases. First the case of no significant digits,
2533 i.e. zero, and then exponents that are obviously too large or too
2534 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2535 definitely overflows if
2536
2537 (exp - 1) * L >= maxExponent
2538
2539 and definitely underflows to zero where
2540
2541 (exp + 1) * L <= minExponent - precision
2542
2543 With integer arithmetic the tightest bounds for L are
2544
2545 93/28 < L < 196/59 [ numerator <= 256 ]
2546 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2547 */
2548
2549 // Test if we have a zero number allowing for strings with no null terminators
2550 // and zero decimals with non-zero exponents.
2551 //
2552 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2553 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2554 // be at most one dot. On the other hand, if we have a zero with a non-zero
2555 // exponent, then we know that D.firstSigDigit will be non-numeric.
2556 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2557 category = fcZero;
2558 fs = opOK;
2559
2560 /* Check whether the normalized exponent is high enough to overflow
2561 max during the log-rebasing in the max-exponent check below. */
2562 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2563 fs = handleOverflow(rounding_mode);
2564
2565 /* If it wasn't, then it also wasn't high enough to overflow max
2566 during the log-rebasing in the min-exponent check. Check that it
2567 won't overflow min in either check, then perform the min-exponent
2568 check. */
2569 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2570 (D.normalizedExponent + 1) * 28738 <=
2571 8651 * (semantics->minExponent - (int) semantics->precision)) {
2572 /* Underflow to zero and round. */
2573 category = fcNormal;
2574 zeroSignificand();
2575 fs = normalize(rounding_mode, lfLessThanHalf);
2576
2577 /* We can finally safely perform the max-exponent check. */
2578 } else if ((D.normalizedExponent - 1) * 42039
2579 >= 12655 * semantics->maxExponent) {
2580 /* Overflow and round. */
2581 fs = handleOverflow(rounding_mode);
2582 } else {
2583 integerPart *decSignificand;
2584 unsigned int partCount;
2585
2586 /* A tight upper bound on number of bits required to hold an
2587 N-digit decimal integer is N * 196 / 59. Allocate enough space
2588 to hold the full significand, and an extra part required by
2589 tcMultiplyPart. */
2590 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2591 partCount = partCountForBits(1 + 196 * partCount / 59);
2592 decSignificand = new integerPart[partCount + 1];
2593 partCount = 0;
2594
2595 /* Convert to binary efficiently - we do almost all multiplication
2596 in an integerPart. When this would overflow do we do a single
2597 bignum multiplication, and then revert again to multiplication
2598 in an integerPart. */
2599 do {
2600 integerPart decValue, val, multiplier;
2601
2602 val = 0;
2603 multiplier = 1;
2604
2605 do {
2606 if (*p == '.') {
2607 p++;
2608 if (p == str.end()) {
2609 break;
2610 }
2611 }
2612 decValue = decDigitValue(*p++);
2613 assert(decValue < 10U && "Invalid character in significand");
2614 multiplier *= 10;
2615 val = val * 10 + decValue;
2616 /* The maximum number that can be multiplied by ten with any
2617 digit added without overflowing an integerPart. */
2618 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2619
2620 /* Multiply out the current part. */
2621 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2622 partCount, partCount + 1, false);
2623
2624 /* If we used another part (likely but not guaranteed), increase
2625 the count. */
2626 if (decSignificand[partCount])
2627 partCount++;
2628 } while (p <= D.lastSigDigit);
2629
2630 category = fcNormal;
2631 fs = roundSignificandWithExponent(decSignificand, partCount,
2632 D.exponent, rounding_mode);
2633
2634 delete [] decSignificand;
2635 }
2636
2637 return fs;
2638 }
2639
2640 bool
convertFromStringSpecials(StringRef str)2641 APFloat::convertFromStringSpecials(StringRef str) {
2642 if (str.equals("inf") || str.equals("INFINITY")) {
2643 makeInf(false);
2644 return true;
2645 }
2646
2647 if (str.equals("-inf") || str.equals("-INFINITY")) {
2648 makeInf(true);
2649 return true;
2650 }
2651
2652 if (str.equals("nan") || str.equals("NaN")) {
2653 makeNaN(false, false);
2654 return true;
2655 }
2656
2657 if (str.equals("-nan") || str.equals("-NaN")) {
2658 makeNaN(false, true);
2659 return true;
2660 }
2661
2662 return false;
2663 }
2664
2665 APFloat::opStatus
convertFromString(StringRef str,roundingMode rounding_mode)2666 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2667 {
2668 assert(!str.empty() && "Invalid string length");
2669
2670 // Handle special cases.
2671 if (convertFromStringSpecials(str))
2672 return opOK;
2673
2674 /* Handle a leading minus sign. */
2675 StringRef::iterator p = str.begin();
2676 size_t slen = str.size();
2677 sign = *p == '-' ? 1 : 0;
2678 if (*p == '-' || *p == '+') {
2679 p++;
2680 slen--;
2681 assert(slen && "String has no digits");
2682 }
2683
2684 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2685 assert(slen - 2 && "Invalid string");
2686 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2687 rounding_mode);
2688 }
2689
2690 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2691 }
2692
2693 /* Write out a hexadecimal representation of the floating point value
2694 to DST, which must be of sufficient size, in the C99 form
2695 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2696 excluding the terminating NUL.
2697
2698 If UPPERCASE, the output is in upper case, otherwise in lower case.
2699
2700 HEXDIGITS digits appear altogether, rounding the value if
2701 necessary. If HEXDIGITS is 0, the minimal precision to display the
2702 number precisely is used instead. If nothing would appear after
2703 the decimal point it is suppressed.
2704
2705 The decimal exponent is always printed and has at least one digit.
2706 Zero values display an exponent of zero. Infinities and NaNs
2707 appear as "infinity" or "nan" respectively.
2708
2709 The above rules are as specified by C99. There is ambiguity about
2710 what the leading hexadecimal digit should be. This implementation
2711 uses whatever is necessary so that the exponent is displayed as
2712 stored. This implies the exponent will fall within the IEEE format
2713 range, and the leading hexadecimal digit will be 0 (for denormals),
2714 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2715 any other digits zero).
2716 */
2717 unsigned int
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2718 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2719 bool upperCase, roundingMode rounding_mode) const
2720 {
2721 char *p;
2722
2723 p = dst;
2724 if (sign)
2725 *dst++ = '-';
2726
2727 switch (category) {
2728 case fcInfinity:
2729 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2730 dst += sizeof infinityL - 1;
2731 break;
2732
2733 case fcNaN:
2734 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2735 dst += sizeof NaNU - 1;
2736 break;
2737
2738 case fcZero:
2739 *dst++ = '0';
2740 *dst++ = upperCase ? 'X': 'x';
2741 *dst++ = '0';
2742 if (hexDigits > 1) {
2743 *dst++ = '.';
2744 memset (dst, '0', hexDigits - 1);
2745 dst += hexDigits - 1;
2746 }
2747 *dst++ = upperCase ? 'P': 'p';
2748 *dst++ = '0';
2749 break;
2750
2751 case fcNormal:
2752 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2753 break;
2754 }
2755
2756 *dst = 0;
2757
2758 return static_cast<unsigned int>(dst - p);
2759 }
2760
2761 /* Does the hard work of outputting the correctly rounded hexadecimal
2762 form of a normal floating point number with the specified number of
2763 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2764 digits necessary to print the value precisely is output. */
2765 char *
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2766 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2767 bool upperCase,
2768 roundingMode rounding_mode) const
2769 {
2770 unsigned int count, valueBits, shift, partsCount, outputDigits;
2771 const char *hexDigitChars;
2772 const integerPart *significand;
2773 char *p;
2774 bool roundUp;
2775
2776 *dst++ = '0';
2777 *dst++ = upperCase ? 'X': 'x';
2778
2779 roundUp = false;
2780 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2781
2782 significand = significandParts();
2783 partsCount = partCount();
2784
2785 /* +3 because the first digit only uses the single integer bit, so
2786 we have 3 virtual zero most-significant-bits. */
2787 valueBits = semantics->precision + 3;
2788 shift = integerPartWidth - valueBits % integerPartWidth;
2789
2790 /* The natural number of digits required ignoring trailing
2791 insignificant zeroes. */
2792 outputDigits = (valueBits - significandLSB () + 3) / 4;
2793
2794 /* hexDigits of zero means use the required number for the
2795 precision. Otherwise, see if we are truncating. If we are,
2796 find out if we need to round away from zero. */
2797 if (hexDigits) {
2798 if (hexDigits < outputDigits) {
2799 /* We are dropping non-zero bits, so need to check how to round.
2800 "bits" is the number of dropped bits. */
2801 unsigned int bits;
2802 lostFraction fraction;
2803
2804 bits = valueBits - hexDigits * 4;
2805 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2806 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2807 }
2808 outputDigits = hexDigits;
2809 }
2810
2811 /* Write the digits consecutively, and start writing in the location
2812 of the hexadecimal point. We move the most significant digit
2813 left and add the hexadecimal point later. */
2814 p = ++dst;
2815
2816 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2817
2818 while (outputDigits && count) {
2819 integerPart part;
2820
2821 /* Put the most significant integerPartWidth bits in "part". */
2822 if (--count == partsCount)
2823 part = 0; /* An imaginary higher zero part. */
2824 else
2825 part = significand[count] << shift;
2826
2827 if (count && shift)
2828 part |= significand[count - 1] >> (integerPartWidth - shift);
2829
2830 /* Convert as much of "part" to hexdigits as we can. */
2831 unsigned int curDigits = integerPartWidth / 4;
2832
2833 if (curDigits > outputDigits)
2834 curDigits = outputDigits;
2835 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2836 outputDigits -= curDigits;
2837 }
2838
2839 if (roundUp) {
2840 char *q = dst;
2841
2842 /* Note that hexDigitChars has a trailing '0'. */
2843 do {
2844 q--;
2845 *q = hexDigitChars[hexDigitValue (*q) + 1];
2846 } while (*q == '0');
2847 assert(q >= p);
2848 } else {
2849 /* Add trailing zeroes. */
2850 memset (dst, '0', outputDigits);
2851 dst += outputDigits;
2852 }
2853
2854 /* Move the most significant digit to before the point, and if there
2855 is something after the decimal point add it. This must come
2856 after rounding above. */
2857 p[-1] = p[0];
2858 if (dst -1 == p)
2859 dst--;
2860 else
2861 p[0] = '.';
2862
2863 /* Finally output the exponent. */
2864 *dst++ = upperCase ? 'P': 'p';
2865
2866 return writeSignedDecimal (dst, exponent);
2867 }
2868
hash_value(const APFloat & Arg)2869 hash_code llvm::hash_value(const APFloat &Arg) {
2870 if (!Arg.isFiniteNonZero())
2871 return hash_combine((uint8_t)Arg.category,
2872 // NaN has no sign, fix it at zero.
2873 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2874 Arg.semantics->precision);
2875
2876 // Normal floats need their exponent and significand hashed.
2877 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2878 Arg.semantics->precision, Arg.exponent,
2879 hash_combine_range(
2880 Arg.significandParts(),
2881 Arg.significandParts() + Arg.partCount()));
2882 }
2883
2884 // Conversion from APFloat to/from host float/double. It may eventually be
2885 // possible to eliminate these and have everybody deal with APFloats, but that
2886 // will take a while. This approach will not easily extend to long double.
2887 // Current implementation requires integerPartWidth==64, which is correct at
2888 // the moment but could be made more general.
2889
2890 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2891 // the actual IEEE respresentations. We compensate for that here.
2892
2893 APInt
convertF80LongDoubleAPFloatToAPInt() const2894 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2895 {
2896 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2897 assert(partCount()==2);
2898
2899 uint64_t myexponent, mysignificand;
2900
2901 if (isFiniteNonZero()) {
2902 myexponent = exponent+16383; //bias
2903 mysignificand = significandParts()[0];
2904 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2905 myexponent = 0; // denormal
2906 } else if (category==fcZero) {
2907 myexponent = 0;
2908 mysignificand = 0;
2909 } else if (category==fcInfinity) {
2910 myexponent = 0x7fff;
2911 mysignificand = 0x8000000000000000ULL;
2912 } else {
2913 assert(category == fcNaN && "Unknown category");
2914 myexponent = 0x7fff;
2915 mysignificand = significandParts()[0];
2916 }
2917
2918 uint64_t words[2];
2919 words[0] = mysignificand;
2920 words[1] = ((uint64_t)(sign & 1) << 15) |
2921 (myexponent & 0x7fffLL);
2922 return APInt(80, words);
2923 }
2924
2925 APInt
convertPPCDoubleDoubleAPFloatToAPInt() const2926 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2927 {
2928 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2929 assert(partCount()==2);
2930
2931 uint64_t words[2];
2932 opStatus fs;
2933 bool losesInfo;
2934
2935 // Convert number to double. To avoid spurious underflows, we re-
2936 // normalize against the "double" minExponent first, and only *then*
2937 // truncate the mantissa. The result of that second conversion
2938 // may be inexact, but should never underflow.
2939 // Declare fltSemantics before APFloat that uses it (and
2940 // saves pointer to it) to ensure correct destruction order.
2941 fltSemantics extendedSemantics = *semantics;
2942 extendedSemantics.minExponent = IEEEdouble.minExponent;
2943 APFloat extended(*this);
2944 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2945 assert(fs == opOK && !losesInfo);
2946 (void)fs;
2947
2948 APFloat u(extended);
2949 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2950 assert(fs == opOK || fs == opInexact);
2951 (void)fs;
2952 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2953
2954 // If conversion was exact or resulted in a special case, we're done;
2955 // just set the second double to zero. Otherwise, re-convert back to
2956 // the extended format and compute the difference. This now should
2957 // convert exactly to double.
2958 if (u.isFiniteNonZero() && losesInfo) {
2959 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2960 assert(fs == opOK && !losesInfo);
2961 (void)fs;
2962
2963 APFloat v(extended);
2964 v.subtract(u, rmNearestTiesToEven);
2965 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2966 assert(fs == opOK && !losesInfo);
2967 (void)fs;
2968 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2969 } else {
2970 words[1] = 0;
2971 }
2972
2973 return APInt(128, words);
2974 }
2975
2976 APInt
convertQuadrupleAPFloatToAPInt() const2977 APFloat::convertQuadrupleAPFloatToAPInt() const
2978 {
2979 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2980 assert(partCount()==2);
2981
2982 uint64_t myexponent, mysignificand, mysignificand2;
2983
2984 if (isFiniteNonZero()) {
2985 myexponent = exponent+16383; //bias
2986 mysignificand = significandParts()[0];
2987 mysignificand2 = significandParts()[1];
2988 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2989 myexponent = 0; // denormal
2990 } else if (category==fcZero) {
2991 myexponent = 0;
2992 mysignificand = mysignificand2 = 0;
2993 } else if (category==fcInfinity) {
2994 myexponent = 0x7fff;
2995 mysignificand = mysignificand2 = 0;
2996 } else {
2997 assert(category == fcNaN && "Unknown category!");
2998 myexponent = 0x7fff;
2999 mysignificand = significandParts()[0];
3000 mysignificand2 = significandParts()[1];
3001 }
3002
3003 uint64_t words[2];
3004 words[0] = mysignificand;
3005 words[1] = ((uint64_t)(sign & 1) << 63) |
3006 ((myexponent & 0x7fff) << 48) |
3007 (mysignificand2 & 0xffffffffffffLL);
3008
3009 return APInt(128, words);
3010 }
3011
3012 APInt
convertDoubleAPFloatToAPInt() const3013 APFloat::convertDoubleAPFloatToAPInt() const
3014 {
3015 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
3016 assert(partCount()==1);
3017
3018 uint64_t myexponent, mysignificand;
3019
3020 if (isFiniteNonZero()) {
3021 myexponent = exponent+1023; //bias
3022 mysignificand = *significandParts();
3023 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3024 myexponent = 0; // denormal
3025 } else if (category==fcZero) {
3026 myexponent = 0;
3027 mysignificand = 0;
3028 } else if (category==fcInfinity) {
3029 myexponent = 0x7ff;
3030 mysignificand = 0;
3031 } else {
3032 assert(category == fcNaN && "Unknown category!");
3033 myexponent = 0x7ff;
3034 mysignificand = *significandParts();
3035 }
3036
3037 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3038 ((myexponent & 0x7ff) << 52) |
3039 (mysignificand & 0xfffffffffffffLL))));
3040 }
3041
3042 APInt
convertFloatAPFloatToAPInt() const3043 APFloat::convertFloatAPFloatToAPInt() const
3044 {
3045 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
3046 assert(partCount()==1);
3047
3048 uint32_t myexponent, mysignificand;
3049
3050 if (isFiniteNonZero()) {
3051 myexponent = exponent+127; //bias
3052 mysignificand = (uint32_t)*significandParts();
3053 if (myexponent == 1 && !(mysignificand & 0x800000))
3054 myexponent = 0; // denormal
3055 } else if (category==fcZero) {
3056 myexponent = 0;
3057 mysignificand = 0;
3058 } else if (category==fcInfinity) {
3059 myexponent = 0xff;
3060 mysignificand = 0;
3061 } else {
3062 assert(category == fcNaN && "Unknown category!");
3063 myexponent = 0xff;
3064 mysignificand = (uint32_t)*significandParts();
3065 }
3066
3067 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3068 (mysignificand & 0x7fffff)));
3069 }
3070
3071 APInt
convertHalfAPFloatToAPInt() const3072 APFloat::convertHalfAPFloatToAPInt() const
3073 {
3074 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3075 assert(partCount()==1);
3076
3077 uint32_t myexponent, mysignificand;
3078
3079 if (isFiniteNonZero()) {
3080 myexponent = exponent+15; //bias
3081 mysignificand = (uint32_t)*significandParts();
3082 if (myexponent == 1 && !(mysignificand & 0x400))
3083 myexponent = 0; // denormal
3084 } else if (category==fcZero) {
3085 myexponent = 0;
3086 mysignificand = 0;
3087 } else if (category==fcInfinity) {
3088 myexponent = 0x1f;
3089 mysignificand = 0;
3090 } else {
3091 assert(category == fcNaN && "Unknown category!");
3092 myexponent = 0x1f;
3093 mysignificand = (uint32_t)*significandParts();
3094 }
3095
3096 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3097 (mysignificand & 0x3ff)));
3098 }
3099
3100 // This function creates an APInt that is just a bit map of the floating
3101 // point constant as it would appear in memory. It is not a conversion,
3102 // and treating the result as a normal integer is unlikely to be useful.
3103
3104 APInt
bitcastToAPInt() const3105 APFloat::bitcastToAPInt() const
3106 {
3107 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3108 return convertHalfAPFloatToAPInt();
3109
3110 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3111 return convertFloatAPFloatToAPInt();
3112
3113 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3114 return convertDoubleAPFloatToAPInt();
3115
3116 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3117 return convertQuadrupleAPFloatToAPInt();
3118
3119 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3120 return convertPPCDoubleDoubleAPFloatToAPInt();
3121
3122 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3123 "unknown format!");
3124 return convertF80LongDoubleAPFloatToAPInt();
3125 }
3126
3127 float
convertToFloat() const3128 APFloat::convertToFloat() const
3129 {
3130 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3131 "Float semantics are not IEEEsingle");
3132 APInt api = bitcastToAPInt();
3133 return api.bitsToFloat();
3134 }
3135
3136 double
convertToDouble() const3137 APFloat::convertToDouble() const
3138 {
3139 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3140 "Float semantics are not IEEEdouble");
3141 APInt api = bitcastToAPInt();
3142 return api.bitsToDouble();
3143 }
3144
3145 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3146 /// does not support these bit patterns:
3147 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3148 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3149 /// exponent = 0, integer bit 1 ("pseudodenormal")
3150 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3151 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3152 void
initFromF80LongDoubleAPInt(const APInt & api)3153 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3154 {
3155 assert(api.getBitWidth()==80);
3156 uint64_t i1 = api.getRawData()[0];
3157 uint64_t i2 = api.getRawData()[1];
3158 uint64_t myexponent = (i2 & 0x7fff);
3159 uint64_t mysignificand = i1;
3160
3161 initialize(&APFloat::x87DoubleExtended);
3162 assert(partCount()==2);
3163
3164 sign = static_cast<unsigned int>(i2>>15);
3165 if (myexponent==0 && mysignificand==0) {
3166 // exponent, significand meaningless
3167 category = fcZero;
3168 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3169 // exponent, significand meaningless
3170 category = fcInfinity;
3171 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3172 // exponent meaningless
3173 category = fcNaN;
3174 significandParts()[0] = mysignificand;
3175 significandParts()[1] = 0;
3176 } else {
3177 category = fcNormal;
3178 exponent = myexponent - 16383;
3179 significandParts()[0] = mysignificand;
3180 significandParts()[1] = 0;
3181 if (myexponent==0) // denormal
3182 exponent = -16382;
3183 }
3184 }
3185
3186 void
initFromPPCDoubleDoubleAPInt(const APInt & api)3187 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3188 {
3189 assert(api.getBitWidth()==128);
3190 uint64_t i1 = api.getRawData()[0];
3191 uint64_t i2 = api.getRawData()[1];
3192 opStatus fs;
3193 bool losesInfo;
3194
3195 // Get the first double and convert to our format.
3196 initFromDoubleAPInt(APInt(64, i1));
3197 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3198 assert(fs == opOK && !losesInfo);
3199 (void)fs;
3200
3201 // Unless we have a special case, add in second double.
3202 if (isFiniteNonZero()) {
3203 APFloat v(IEEEdouble, APInt(64, i2));
3204 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3205 assert(fs == opOK && !losesInfo);
3206 (void)fs;
3207
3208 add(v, rmNearestTiesToEven);
3209 }
3210 }
3211
3212 void
initFromQuadrupleAPInt(const APInt & api)3213 APFloat::initFromQuadrupleAPInt(const APInt &api)
3214 {
3215 assert(api.getBitWidth()==128);
3216 uint64_t i1 = api.getRawData()[0];
3217 uint64_t i2 = api.getRawData()[1];
3218 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3219 uint64_t mysignificand = i1;
3220 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3221
3222 initialize(&APFloat::IEEEquad);
3223 assert(partCount()==2);
3224
3225 sign = static_cast<unsigned int>(i2>>63);
3226 if (myexponent==0 &&
3227 (mysignificand==0 && mysignificand2==0)) {
3228 // exponent, significand meaningless
3229 category = fcZero;
3230 } else if (myexponent==0x7fff &&
3231 (mysignificand==0 && mysignificand2==0)) {
3232 // exponent, significand meaningless
3233 category = fcInfinity;
3234 } else if (myexponent==0x7fff &&
3235 (mysignificand!=0 || mysignificand2 !=0)) {
3236 // exponent meaningless
3237 category = fcNaN;
3238 significandParts()[0] = mysignificand;
3239 significandParts()[1] = mysignificand2;
3240 } else {
3241 category = fcNormal;
3242 exponent = myexponent - 16383;
3243 significandParts()[0] = mysignificand;
3244 significandParts()[1] = mysignificand2;
3245 if (myexponent==0) // denormal
3246 exponent = -16382;
3247 else
3248 significandParts()[1] |= 0x1000000000000LL; // integer bit
3249 }
3250 }
3251
3252 void
initFromDoubleAPInt(const APInt & api)3253 APFloat::initFromDoubleAPInt(const APInt &api)
3254 {
3255 assert(api.getBitWidth()==64);
3256 uint64_t i = *api.getRawData();
3257 uint64_t myexponent = (i >> 52) & 0x7ff;
3258 uint64_t mysignificand = i & 0xfffffffffffffLL;
3259
3260 initialize(&APFloat::IEEEdouble);
3261 assert(partCount()==1);
3262
3263 sign = static_cast<unsigned int>(i>>63);
3264 if (myexponent==0 && mysignificand==0) {
3265 // exponent, significand meaningless
3266 category = fcZero;
3267 } else if (myexponent==0x7ff && mysignificand==0) {
3268 // exponent, significand meaningless
3269 category = fcInfinity;
3270 } else if (myexponent==0x7ff && mysignificand!=0) {
3271 // exponent meaningless
3272 category = fcNaN;
3273 *significandParts() = mysignificand;
3274 } else {
3275 category = fcNormal;
3276 exponent = myexponent - 1023;
3277 *significandParts() = mysignificand;
3278 if (myexponent==0) // denormal
3279 exponent = -1022;
3280 else
3281 *significandParts() |= 0x10000000000000LL; // integer bit
3282 }
3283 }
3284
3285 void
initFromFloatAPInt(const APInt & api)3286 APFloat::initFromFloatAPInt(const APInt & api)
3287 {
3288 assert(api.getBitWidth()==32);
3289 uint32_t i = (uint32_t)*api.getRawData();
3290 uint32_t myexponent = (i >> 23) & 0xff;
3291 uint32_t mysignificand = i & 0x7fffff;
3292
3293 initialize(&APFloat::IEEEsingle);
3294 assert(partCount()==1);
3295
3296 sign = i >> 31;
3297 if (myexponent==0 && mysignificand==0) {
3298 // exponent, significand meaningless
3299 category = fcZero;
3300 } else if (myexponent==0xff && mysignificand==0) {
3301 // exponent, significand meaningless
3302 category = fcInfinity;
3303 } else if (myexponent==0xff && mysignificand!=0) {
3304 // sign, exponent, significand meaningless
3305 category = fcNaN;
3306 *significandParts() = mysignificand;
3307 } else {
3308 category = fcNormal;
3309 exponent = myexponent - 127; //bias
3310 *significandParts() = mysignificand;
3311 if (myexponent==0) // denormal
3312 exponent = -126;
3313 else
3314 *significandParts() |= 0x800000; // integer bit
3315 }
3316 }
3317
3318 void
initFromHalfAPInt(const APInt & api)3319 APFloat::initFromHalfAPInt(const APInt & api)
3320 {
3321 assert(api.getBitWidth()==16);
3322 uint32_t i = (uint32_t)*api.getRawData();
3323 uint32_t myexponent = (i >> 10) & 0x1f;
3324 uint32_t mysignificand = i & 0x3ff;
3325
3326 initialize(&APFloat::IEEEhalf);
3327 assert(partCount()==1);
3328
3329 sign = i >> 15;
3330 if (myexponent==0 && mysignificand==0) {
3331 // exponent, significand meaningless
3332 category = fcZero;
3333 } else if (myexponent==0x1f && mysignificand==0) {
3334 // exponent, significand meaningless
3335 category = fcInfinity;
3336 } else if (myexponent==0x1f && mysignificand!=0) {
3337 // sign, exponent, significand meaningless
3338 category = fcNaN;
3339 *significandParts() = mysignificand;
3340 } else {
3341 category = fcNormal;
3342 exponent = myexponent - 15; //bias
3343 *significandParts() = mysignificand;
3344 if (myexponent==0) // denormal
3345 exponent = -14;
3346 else
3347 *significandParts() |= 0x400; // integer bit
3348 }
3349 }
3350
3351 /// Treat api as containing the bits of a floating point number. Currently
3352 /// we infer the floating point type from the size of the APInt. The
3353 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3354 /// when the size is anything else).
3355 void
initFromAPInt(const fltSemantics * Sem,const APInt & api)3356 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3357 {
3358 if (Sem == &IEEEhalf)
3359 return initFromHalfAPInt(api);
3360 if (Sem == &IEEEsingle)
3361 return initFromFloatAPInt(api);
3362 if (Sem == &IEEEdouble)
3363 return initFromDoubleAPInt(api);
3364 if (Sem == &x87DoubleExtended)
3365 return initFromF80LongDoubleAPInt(api);
3366 if (Sem == &IEEEquad)
3367 return initFromQuadrupleAPInt(api);
3368 if (Sem == &PPCDoubleDouble)
3369 return initFromPPCDoubleDoubleAPInt(api);
3370
3371 llvm_unreachable(nullptr);
3372 }
3373
3374 APFloat
getAllOnesValue(unsigned BitWidth,bool isIEEE)3375 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3376 {
3377 switch (BitWidth) {
3378 case 16:
3379 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3380 case 32:
3381 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3382 case 64:
3383 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3384 case 80:
3385 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3386 case 128:
3387 if (isIEEE)
3388 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3389 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3390 default:
3391 llvm_unreachable("Unknown floating bit width");
3392 }
3393 }
3394
getSizeInBits(const fltSemantics & Sem)3395 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) {
3396 return Sem.sizeInBits;
3397 }
3398
3399 /// Make this number the largest magnitude normal number in the given
3400 /// semantics.
makeLargest(bool Negative)3401 void APFloat::makeLargest(bool Negative) {
3402 // We want (in interchange format):
3403 // sign = {Negative}
3404 // exponent = 1..10
3405 // significand = 1..1
3406 category = fcNormal;
3407 sign = Negative;
3408 exponent = semantics->maxExponent;
3409
3410 // Use memset to set all but the highest integerPart to all ones.
3411 integerPart *significand = significandParts();
3412 unsigned PartCount = partCount();
3413 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3414
3415 // Set the high integerPart especially setting all unused top bits for
3416 // internal consistency.
3417 const unsigned NumUnusedHighBits =
3418 PartCount*integerPartWidth - semantics->precision;
3419 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3420 ? (~integerPart(0) >> NumUnusedHighBits)
3421 : 0;
3422 }
3423
3424 /// Make this number the smallest magnitude denormal number in the given
3425 /// semantics.
makeSmallest(bool Negative)3426 void APFloat::makeSmallest(bool Negative) {
3427 // We want (in interchange format):
3428 // sign = {Negative}
3429 // exponent = 0..0
3430 // significand = 0..01
3431 category = fcNormal;
3432 sign = Negative;
3433 exponent = semantics->minExponent;
3434 APInt::tcSet(significandParts(), 1, partCount());
3435 }
3436
3437
getLargest(const fltSemantics & Sem,bool Negative)3438 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3439 // We want (in interchange format):
3440 // sign = {Negative}
3441 // exponent = 1..10
3442 // significand = 1..1
3443 APFloat Val(Sem, uninitialized);
3444 Val.makeLargest(Negative);
3445 return Val;
3446 }
3447
getSmallest(const fltSemantics & Sem,bool Negative)3448 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3449 // We want (in interchange format):
3450 // sign = {Negative}
3451 // exponent = 0..0
3452 // significand = 0..01
3453 APFloat Val(Sem, uninitialized);
3454 Val.makeSmallest(Negative);
3455 return Val;
3456 }
3457
getSmallestNormalized(const fltSemantics & Sem,bool Negative)3458 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3459 APFloat Val(Sem, uninitialized);
3460
3461 // We want (in interchange format):
3462 // sign = {Negative}
3463 // exponent = 0..0
3464 // significand = 10..0
3465
3466 Val.category = fcNormal;
3467 Val.zeroSignificand();
3468 Val.sign = Negative;
3469 Val.exponent = Sem.minExponent;
3470 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3471 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3472
3473 return Val;
3474 }
3475
APFloat(const fltSemantics & Sem,const APInt & API)3476 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3477 initFromAPInt(&Sem, API);
3478 }
3479
APFloat(float f)3480 APFloat::APFloat(float f) {
3481 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3482 }
3483
APFloat(double d)3484 APFloat::APFloat(double d) {
3485 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3486 }
3487
3488 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3489 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3490 Buffer.append(Str.begin(), Str.end());
3491 }
3492
3493 /// Removes data from the given significand until it is no more
3494 /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3495 void AdjustToPrecision(APInt &significand,
3496 int &exp, unsigned FormatPrecision) {
3497 unsigned bits = significand.getActiveBits();
3498
3499 // 196/59 is a very slight overestimate of lg_2(10).
3500 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3501
3502 if (bits <= bitsRequired) return;
3503
3504 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3505 if (!tensRemovable) return;
3506
3507 exp += tensRemovable;
3508
3509 APInt divisor(significand.getBitWidth(), 1);
3510 APInt powten(significand.getBitWidth(), 10);
3511 while (true) {
3512 if (tensRemovable & 1)
3513 divisor *= powten;
3514 tensRemovable >>= 1;
3515 if (!tensRemovable) break;
3516 powten *= powten;
3517 }
3518
3519 significand = significand.udiv(divisor);
3520
3521 // Truncate the significand down to its active bit count.
3522 significand = significand.trunc(significand.getActiveBits());
3523 }
3524
3525
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3526 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3527 int &exp, unsigned FormatPrecision) {
3528 unsigned N = buffer.size();
3529 if (N <= FormatPrecision) return;
3530
3531 // The most significant figures are the last ones in the buffer.
3532 unsigned FirstSignificant = N - FormatPrecision;
3533
3534 // Round.
3535 // FIXME: this probably shouldn't use 'round half up'.
3536
3537 // Rounding down is just a truncation, except we also want to drop
3538 // trailing zeros from the new result.
3539 if (buffer[FirstSignificant - 1] < '5') {
3540 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3541 FirstSignificant++;
3542
3543 exp += FirstSignificant;
3544 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3545 return;
3546 }
3547
3548 // Rounding up requires a decimal add-with-carry. If we continue
3549 // the carry, the newly-introduced zeros will just be truncated.
3550 for (unsigned I = FirstSignificant; I != N; ++I) {
3551 if (buffer[I] == '9') {
3552 FirstSignificant++;
3553 } else {
3554 buffer[I]++;
3555 break;
3556 }
3557 }
3558
3559 // If we carried through, we have exactly one digit of precision.
3560 if (FirstSignificant == N) {
3561 exp += FirstSignificant;
3562 buffer.clear();
3563 buffer.push_back('1');
3564 return;
3565 }
3566
3567 exp += FirstSignificant;
3568 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3569 }
3570 }
3571
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding) const3572 void APFloat::toString(SmallVectorImpl<char> &Str,
3573 unsigned FormatPrecision,
3574 unsigned FormatMaxPadding) const {
3575 switch (category) {
3576 case fcInfinity:
3577 if (isNegative())
3578 return append(Str, "-Inf");
3579 else
3580 return append(Str, "+Inf");
3581
3582 case fcNaN: return append(Str, "NaN");
3583
3584 case fcZero:
3585 if (isNegative())
3586 Str.push_back('-');
3587
3588 if (!FormatMaxPadding)
3589 append(Str, "0.0E+0");
3590 else
3591 Str.push_back('0');
3592 return;
3593
3594 case fcNormal:
3595 break;
3596 }
3597
3598 if (isNegative())
3599 Str.push_back('-');
3600
3601 // Decompose the number into an APInt and an exponent.
3602 int exp = exponent - ((int) semantics->precision - 1);
3603 APInt significand(semantics->precision,
3604 makeArrayRef(significandParts(),
3605 partCountForBits(semantics->precision)));
3606
3607 // Set FormatPrecision if zero. We want to do this before we
3608 // truncate trailing zeros, as those are part of the precision.
3609 if (!FormatPrecision) {
3610 // We use enough digits so the number can be round-tripped back to an
3611 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3612 // Accurately" by Steele and White.
3613 // FIXME: Using a formula based purely on the precision is conservative;
3614 // we can print fewer digits depending on the actual value being printed.
3615
3616 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3617 FormatPrecision = 2 + semantics->precision * 59 / 196;
3618 }
3619
3620 // Ignore trailing binary zeros.
3621 int trailingZeros = significand.countTrailingZeros();
3622 exp += trailingZeros;
3623 significand = significand.lshr(trailingZeros);
3624
3625 // Change the exponent from 2^e to 10^e.
3626 if (exp == 0) {
3627 // Nothing to do.
3628 } else if (exp > 0) {
3629 // Just shift left.
3630 significand = significand.zext(semantics->precision + exp);
3631 significand <<= exp;
3632 exp = 0;
3633 } else { /* exp < 0 */
3634 int texp = -exp;
3635
3636 // We transform this using the identity:
3637 // (N)(2^-e) == (N)(5^e)(10^-e)
3638 // This means we have to multiply N (the significand) by 5^e.
3639 // To avoid overflow, we have to operate on numbers large
3640 // enough to store N * 5^e:
3641 // log2(N * 5^e) == log2(N) + e * log2(5)
3642 // <= semantics->precision + e * 137 / 59
3643 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3644
3645 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3646
3647 // Multiply significand by 5^e.
3648 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3649 significand = significand.zext(precision);
3650 APInt five_to_the_i(precision, 5);
3651 while (true) {
3652 if (texp & 1) significand *= five_to_the_i;
3653
3654 texp >>= 1;
3655 if (!texp) break;
3656 five_to_the_i *= five_to_the_i;
3657 }
3658 }
3659
3660 AdjustToPrecision(significand, exp, FormatPrecision);
3661
3662 SmallVector<char, 256> buffer;
3663
3664 // Fill the buffer.
3665 unsigned precision = significand.getBitWidth();
3666 APInt ten(precision, 10);
3667 APInt digit(precision, 0);
3668
3669 bool inTrail = true;
3670 while (significand != 0) {
3671 // digit <- significand % 10
3672 // significand <- significand / 10
3673 APInt::udivrem(significand, ten, significand, digit);
3674
3675 unsigned d = digit.getZExtValue();
3676
3677 // Drop trailing zeros.
3678 if (inTrail && !d) exp++;
3679 else {
3680 buffer.push_back((char) ('0' + d));
3681 inTrail = false;
3682 }
3683 }
3684
3685 assert(!buffer.empty() && "no characters in buffer!");
3686
3687 // Drop down to FormatPrecision.
3688 // TODO: don't do more precise calculations above than are required.
3689 AdjustToPrecision(buffer, exp, FormatPrecision);
3690
3691 unsigned NDigits = buffer.size();
3692
3693 // Check whether we should use scientific notation.
3694 bool FormatScientific;
3695 if (!FormatMaxPadding)
3696 FormatScientific = true;
3697 else {
3698 if (exp >= 0) {
3699 // 765e3 --> 765000
3700 // ^^^
3701 // But we shouldn't make the number look more precise than it is.
3702 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3703 NDigits + (unsigned) exp > FormatPrecision);
3704 } else {
3705 // Power of the most significant digit.
3706 int MSD = exp + (int) (NDigits - 1);
3707 if (MSD >= 0) {
3708 // 765e-2 == 7.65
3709 FormatScientific = false;
3710 } else {
3711 // 765e-5 == 0.00765
3712 // ^ ^^
3713 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3714 }
3715 }
3716 }
3717
3718 // Scientific formatting is pretty straightforward.
3719 if (FormatScientific) {
3720 exp += (NDigits - 1);
3721
3722 Str.push_back(buffer[NDigits-1]);
3723 Str.push_back('.');
3724 if (NDigits == 1)
3725 Str.push_back('0');
3726 else
3727 for (unsigned I = 1; I != NDigits; ++I)
3728 Str.push_back(buffer[NDigits-1-I]);
3729 Str.push_back('E');
3730
3731 Str.push_back(exp >= 0 ? '+' : '-');
3732 if (exp < 0) exp = -exp;
3733 SmallVector<char, 6> expbuf;
3734 do {
3735 expbuf.push_back((char) ('0' + (exp % 10)));
3736 exp /= 10;
3737 } while (exp);
3738 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3739 Str.push_back(expbuf[E-1-I]);
3740 return;
3741 }
3742
3743 // Non-scientific, positive exponents.
3744 if (exp >= 0) {
3745 for (unsigned I = 0; I != NDigits; ++I)
3746 Str.push_back(buffer[NDigits-1-I]);
3747 for (unsigned I = 0; I != (unsigned) exp; ++I)
3748 Str.push_back('0');
3749 return;
3750 }
3751
3752 // Non-scientific, negative exponents.
3753
3754 // The number of digits to the left of the decimal point.
3755 int NWholeDigits = exp + (int) NDigits;
3756
3757 unsigned I = 0;
3758 if (NWholeDigits > 0) {
3759 for (; I != (unsigned) NWholeDigits; ++I)
3760 Str.push_back(buffer[NDigits-I-1]);
3761 Str.push_back('.');
3762 } else {
3763 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3764
3765 Str.push_back('0');
3766 Str.push_back('.');
3767 for (unsigned Z = 1; Z != NZeros; ++Z)
3768 Str.push_back('0');
3769 }
3770
3771 for (; I != NDigits; ++I)
3772 Str.push_back(buffer[NDigits-I-1]);
3773 }
3774
getExactInverse(APFloat * inv) const3775 bool APFloat::getExactInverse(APFloat *inv) const {
3776 // Special floats and denormals have no exact inverse.
3777 if (!isFiniteNonZero())
3778 return false;
3779
3780 // Check that the number is a power of two by making sure that only the
3781 // integer bit is set in the significand.
3782 if (significandLSB() != semantics->precision - 1)
3783 return false;
3784
3785 // Get the inverse.
3786 APFloat reciprocal(*semantics, 1ULL);
3787 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3788 return false;
3789
3790 // Avoid multiplication with a denormal, it is not safe on all platforms and
3791 // may be slower than a normal division.
3792 if (reciprocal.isDenormal())
3793 return false;
3794
3795 assert(reciprocal.isFiniteNonZero() &&
3796 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3797
3798 if (inv)
3799 *inv = reciprocal;
3800
3801 return true;
3802 }
3803
isSignaling() const3804 bool APFloat::isSignaling() const {
3805 if (!isNaN())
3806 return false;
3807
3808 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3809 // first bit of the trailing significand being 0.
3810 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3811 }
3812
3813 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3814 ///
3815 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3816 /// appropriate sign switching before/after the computation.
next(bool nextDown)3817 APFloat::opStatus APFloat::next(bool nextDown) {
3818 // If we are performing nextDown, swap sign so we have -x.
3819 if (nextDown)
3820 changeSign();
3821
3822 // Compute nextUp(x)
3823 opStatus result = opOK;
3824
3825 // Handle each float category separately.
3826 switch (category) {
3827 case fcInfinity:
3828 // nextUp(+inf) = +inf
3829 if (!isNegative())
3830 break;
3831 // nextUp(-inf) = -getLargest()
3832 makeLargest(true);
3833 break;
3834 case fcNaN:
3835 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3836 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3837 // change the payload.
3838 if (isSignaling()) {
3839 result = opInvalidOp;
3840 // For consistency, propagate the sign of the sNaN to the qNaN.
3841 makeNaN(false, isNegative(), nullptr);
3842 }
3843 break;
3844 case fcZero:
3845 // nextUp(pm 0) = +getSmallest()
3846 makeSmallest(false);
3847 break;
3848 case fcNormal:
3849 // nextUp(-getSmallest()) = -0
3850 if (isSmallest() && isNegative()) {
3851 APInt::tcSet(significandParts(), 0, partCount());
3852 category = fcZero;
3853 exponent = 0;
3854 break;
3855 }
3856
3857 // nextUp(getLargest()) == INFINITY
3858 if (isLargest() && !isNegative()) {
3859 APInt::tcSet(significandParts(), 0, partCount());
3860 category = fcInfinity;
3861 exponent = semantics->maxExponent + 1;
3862 break;
3863 }
3864
3865 // nextUp(normal) == normal + inc.
3866 if (isNegative()) {
3867 // If we are negative, we need to decrement the significand.
3868
3869 // We only cross a binade boundary that requires adjusting the exponent
3870 // if:
3871 // 1. exponent != semantics->minExponent. This implies we are not in the
3872 // smallest binade or are dealing with denormals.
3873 // 2. Our significand excluding the integral bit is all zeros.
3874 bool WillCrossBinadeBoundary =
3875 exponent != semantics->minExponent && isSignificandAllZeros();
3876
3877 // Decrement the significand.
3878 //
3879 // We always do this since:
3880 // 1. If we are dealing with a non-binade decrement, by definition we
3881 // just decrement the significand.
3882 // 2. If we are dealing with a normal -> normal binade decrement, since
3883 // we have an explicit integral bit the fact that all bits but the
3884 // integral bit are zero implies that subtracting one will yield a
3885 // significand with 0 integral bit and 1 in all other spots. Thus we
3886 // must just adjust the exponent and set the integral bit to 1.
3887 // 3. If we are dealing with a normal -> denormal binade decrement,
3888 // since we set the integral bit to 0 when we represent denormals, we
3889 // just decrement the significand.
3890 integerPart *Parts = significandParts();
3891 APInt::tcDecrement(Parts, partCount());
3892
3893 if (WillCrossBinadeBoundary) {
3894 // Our result is a normal number. Do the following:
3895 // 1. Set the integral bit to 1.
3896 // 2. Decrement the exponent.
3897 APInt::tcSetBit(Parts, semantics->precision - 1);
3898 exponent--;
3899 }
3900 } else {
3901 // If we are positive, we need to increment the significand.
3902
3903 // We only cross a binade boundary that requires adjusting the exponent if
3904 // the input is not a denormal and all of said input's significand bits
3905 // are set. If all of said conditions are true: clear the significand, set
3906 // the integral bit to 1, and increment the exponent. If we have a
3907 // denormal always increment since moving denormals and the numbers in the
3908 // smallest normal binade have the same exponent in our representation.
3909 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3910
3911 if (WillCrossBinadeBoundary) {
3912 integerPart *Parts = significandParts();
3913 APInt::tcSet(Parts, 0, partCount());
3914 APInt::tcSetBit(Parts, semantics->precision - 1);
3915 assert(exponent != semantics->maxExponent &&
3916 "We can not increment an exponent beyond the maxExponent allowed"
3917 " by the given floating point semantics.");
3918 exponent++;
3919 } else {
3920 incrementSignificand();
3921 }
3922 }
3923 break;
3924 }
3925
3926 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3927 if (nextDown)
3928 changeSign();
3929
3930 return result;
3931 }
3932
3933 void
makeInf(bool Negative)3934 APFloat::makeInf(bool Negative) {
3935 category = fcInfinity;
3936 sign = Negative;
3937 exponent = semantics->maxExponent + 1;
3938 APInt::tcSet(significandParts(), 0, partCount());
3939 }
3940
3941 void
makeZero(bool Negative)3942 APFloat::makeZero(bool Negative) {
3943 category = fcZero;
3944 sign = Negative;
3945 exponent = semantics->minExponent-1;
3946 APInt::tcSet(significandParts(), 0, partCount());
3947 }
3948
makeQuiet()3949 void APFloat::makeQuiet() {
3950 assert(isNaN());
3951 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3952 }
3953
ilogb(const APFloat & Arg)3954 int llvm::ilogb(const APFloat &Arg) {
3955 if (Arg.isNaN())
3956 return APFloat::IEK_NaN;
3957 if (Arg.isZero())
3958 return APFloat::IEK_Zero;
3959 if (Arg.isInfinity())
3960 return APFloat::IEK_Inf;
3961 if (!Arg.isDenormal())
3962 return Arg.exponent;
3963
3964 APFloat Normalized(Arg);
3965 int SignificandBits = Arg.getSemantics().precision - 1;
3966
3967 Normalized.exponent += SignificandBits;
3968 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
3969 return Normalized.exponent - SignificandBits;
3970 }
3971
scalbn(APFloat X,int Exp,APFloat::roundingMode RoundingMode)3972 APFloat llvm::scalbn(APFloat X, int Exp, APFloat::roundingMode RoundingMode) {
3973 auto MaxExp = X.getSemantics().maxExponent;
3974 auto MinExp = X.getSemantics().minExponent;
3975
3976 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3977 // overflow; clamp it to a safe range before adding, but ensure that the range
3978 // is large enough that the clamp does not change the result. The range we
3979 // need to support is the difference between the largest possible exponent and
3980 // the normalized exponent of half the smallest denormal.
3981
3982 int SignificandBits = X.getSemantics().precision - 1;
3983 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3984
3985 // Clamp to one past the range ends to let normalize handle overlflow.
3986 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3987 X.normalize(RoundingMode, lfExactlyZero);
3988 if (X.isNaN())
3989 X.makeQuiet();
3990 return X;
3991 }
3992
frexp(const APFloat & Val,int & Exp,APFloat::roundingMode RM)3993 APFloat llvm::frexp(const APFloat &Val, int &Exp, APFloat::roundingMode RM) {
3994 Exp = ilogb(Val);
3995
3996 // Quiet signalling nans.
3997 if (Exp == APFloat::IEK_NaN) {
3998 APFloat Quiet(Val);
3999 Quiet.makeQuiet();
4000 return Quiet;
4001 }
4002
4003 if (Exp == APFloat::IEK_Inf)
4004 return Val;
4005
4006 // 1 is added because frexp is defined to return a normalized fraction in
4007 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4008 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4009 return scalbn(Val, -Exp, RM);
4010 }
4011