1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
23 #include <cstring>
24 #include <limits.h>
25 
26 using namespace llvm;
27 
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
31 ///
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
35 
36 /* Assumed in hexadecimal significand parsing, and conversion to
37    hexadecimal strings.  */
38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
39 
40 namespace llvm {
41 
42   /* Represents floating point arithmetic semantics.  */
43   struct fltSemantics {
44     /* The largest E such that 2^E is representable; this matches the
45        definition of IEEE 754.  */
46     APFloat::ExponentType maxExponent;
47 
48     /* The smallest E such that 2^E is a normalized number; this
49        matches the definition of IEEE 754.  */
50     APFloat::ExponentType minExponent;
51 
52     /* Number of bits in the significand.  This includes the integer
53        bit.  */
54     unsigned int precision;
55   };
56 
57   const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
58   const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
59   const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
60   const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
61   const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
62   const fltSemantics APFloat::Bogus = { 0, 0, 0 };
63 
64   /* The PowerPC format consists of two doubles.  It does not map cleanly
65      onto the usual format above.  It is approximated using twice the
66      mantissa bits.  Note that for exponents near the double minimum,
67      we no longer can represent the full 106 mantissa bits, so those
68      will be treated as denormal numbers.
69 
70      FIXME: While this approximation is equivalent to what GCC uses for
71      compile-time arithmetic on PPC double-double numbers, it is not able
72      to represent all possible values held by a PPC double-double number,
73      for example: (long double) 1.0 + (long double) 0x1p-106
74      Should this be replaced by a full emulation of PPC double-double?  */
75   const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
76 
77   /* A tight upper bound on number of parts required to hold the value
78      pow(5, power) is
79 
80        power * 815 / (351 * integerPartWidth) + 1
81 
82      However, whilst the result may require only this many parts,
83      because we are multiplying two values to get it, the
84      multiplication may require an extra part with the excess part
85      being zero (consider the trivial case of 1 * 1, tcFullMultiply
86      requires two parts to hold the single-part result).  So we add an
87      extra one to guarantee enough space whilst multiplying.  */
88   const unsigned int maxExponent = 16383;
89   const unsigned int maxPrecision = 113;
90   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
91   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
92                                                 / (351 * integerPartWidth));
93 }
94 
95 /* A bunch of private, handy routines.  */
96 
97 static inline unsigned int
partCountForBits(unsigned int bits)98 partCountForBits(unsigned int bits)
99 {
100   return ((bits) + integerPartWidth - 1) / integerPartWidth;
101 }
102 
103 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
104 static inline unsigned int
decDigitValue(unsigned int c)105 decDigitValue(unsigned int c)
106 {
107   return c - '0';
108 }
109 
110 /* Return the value of a decimal exponent of the form
111    [+-]ddddddd.
112 
113    If the exponent overflows, returns a large exponent with the
114    appropriate sign.  */
115 static int
readExponent(StringRef::iterator begin,StringRef::iterator end)116 readExponent(StringRef::iterator begin, StringRef::iterator end)
117 {
118   bool isNegative;
119   unsigned int absExponent;
120   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
121   StringRef::iterator p = begin;
122 
123   assert(p != end && "Exponent has no digits");
124 
125   isNegative = (*p == '-');
126   if (*p == '-' || *p == '+') {
127     p++;
128     assert(p != end && "Exponent has no digits");
129   }
130 
131   absExponent = decDigitValue(*p++);
132   assert(absExponent < 10U && "Invalid character in exponent");
133 
134   for (; p != end; ++p) {
135     unsigned int value;
136 
137     value = decDigitValue(*p);
138     assert(value < 10U && "Invalid character in exponent");
139 
140     value += absExponent * 10;
141     if (absExponent >= overlargeExponent) {
142       absExponent = overlargeExponent;
143       p = end;  /* outwit assert below */
144       break;
145     }
146     absExponent = value;
147   }
148 
149   assert(p == end && "Invalid exponent in exponent");
150 
151   if (isNegative)
152     return -(int) absExponent;
153   else
154     return (int) absExponent;
155 }
156 
157 /* This is ugly and needs cleaning up, but I don't immediately see
158    how whilst remaining safe.  */
159 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)160 totalExponent(StringRef::iterator p, StringRef::iterator end,
161               int exponentAdjustment)
162 {
163   int unsignedExponent;
164   bool negative, overflow;
165   int exponent = 0;
166 
167   assert(p != end && "Exponent has no digits");
168 
169   negative = *p == '-';
170   if (*p == '-' || *p == '+') {
171     p++;
172     assert(p != end && "Exponent has no digits");
173   }
174 
175   unsignedExponent = 0;
176   overflow = false;
177   for (; p != end; ++p) {
178     unsigned int value;
179 
180     value = decDigitValue(*p);
181     assert(value < 10U && "Invalid character in exponent");
182 
183     unsignedExponent = unsignedExponent * 10 + value;
184     if (unsignedExponent > 32767) {
185       overflow = true;
186       break;
187     }
188   }
189 
190   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
191     overflow = true;
192 
193   if (!overflow) {
194     exponent = unsignedExponent;
195     if (negative)
196       exponent = -exponent;
197     exponent += exponentAdjustment;
198     if (exponent > 32767 || exponent < -32768)
199       overflow = true;
200   }
201 
202   if (overflow)
203     exponent = negative ? -32768: 32767;
204 
205   return exponent;
206 }
207 
208 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)209 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
210                            StringRef::iterator *dot)
211 {
212   StringRef::iterator p = begin;
213   *dot = end;
214   while (p != end && *p == '0')
215     p++;
216 
217   if (p != end && *p == '.') {
218     *dot = p++;
219 
220     assert(end - begin != 1 && "Significand has no digits");
221 
222     while (p != end && *p == '0')
223       p++;
224   }
225 
226   return p;
227 }
228 
229 /* Given a normal decimal floating point number of the form
230 
231      dddd.dddd[eE][+-]ddd
232 
233    where the decimal point and exponent are optional, fill out the
234    structure D.  Exponent is appropriate if the significand is
235    treated as an integer, and normalizedExponent if the significand
236    is taken to have the decimal point after a single leading
237    non-zero digit.
238 
239    If the value is zero, V->firstSigDigit points to a non-digit, and
240    the return exponent is zero.
241 */
242 struct decimalInfo {
243   const char *firstSigDigit;
244   const char *lastSigDigit;
245   int exponent;
246   int normalizedExponent;
247 };
248 
249 static void
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)250 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
251                  decimalInfo *D)
252 {
253   StringRef::iterator dot = end;
254   StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
255 
256   D->firstSigDigit = p;
257   D->exponent = 0;
258   D->normalizedExponent = 0;
259 
260   for (; p != end; ++p) {
261     if (*p == '.') {
262       assert(dot == end && "String contains multiple dots");
263       dot = p++;
264       if (p == end)
265         break;
266     }
267     if (decDigitValue(*p) >= 10U)
268       break;
269   }
270 
271   if (p != end) {
272     assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
273     assert(p != begin && "Significand has no digits");
274     assert((dot == end || p - begin != 1) && "Significand has no digits");
275 
276     /* p points to the first non-digit in the string */
277     D->exponent = readExponent(p + 1, end);
278 
279     /* Implied decimal point?  */
280     if (dot == end)
281       dot = p;
282   }
283 
284   /* If number is all zeroes accept any exponent.  */
285   if (p != D->firstSigDigit) {
286     /* Drop insignificant trailing zeroes.  */
287     if (p != begin) {
288       do
289         do
290           p--;
291         while (p != begin && *p == '0');
292       while (p != begin && *p == '.');
293     }
294 
295     /* Adjust the exponents for any decimal point.  */
296     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
297     D->normalizedExponent = (D->exponent +
298               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
299                                       - (dot > D->firstSigDigit && dot < p)));
300   }
301 
302   D->lastSigDigit = p;
303 }
304 
305 /* Return the trailing fraction of a hexadecimal number.
306    DIGITVALUE is the first hex digit of the fraction, P points to
307    the next digit.  */
308 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)309 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
310                             unsigned int digitValue)
311 {
312   unsigned int hexDigit;
313 
314   /* If the first trailing digit isn't 0 or 8 we can work out the
315      fraction immediately.  */
316   if (digitValue > 8)
317     return lfMoreThanHalf;
318   else if (digitValue < 8 && digitValue > 0)
319     return lfLessThanHalf;
320 
321   // Otherwise we need to find the first non-zero digit.
322   while (p != end && (*p == '0' || *p == '.'))
323     p++;
324 
325   assert(p != end && "Invalid trailing hexadecimal fraction!");
326 
327   hexDigit = hexDigitValue(*p);
328 
329   /* If we ran off the end it is exactly zero or one-half, otherwise
330      a little more.  */
331   if (hexDigit == -1U)
332     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
333   else
334     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
335 }
336 
337 /* Return the fraction lost were a bignum truncated losing the least
338    significant BITS bits.  */
339 static lostFraction
lostFractionThroughTruncation(const integerPart * parts,unsigned int partCount,unsigned int bits)340 lostFractionThroughTruncation(const integerPart *parts,
341                               unsigned int partCount,
342                               unsigned int bits)
343 {
344   unsigned int lsb;
345 
346   lsb = APInt::tcLSB(parts, partCount);
347 
348   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
349   if (bits <= lsb)
350     return lfExactlyZero;
351   if (bits == lsb + 1)
352     return lfExactlyHalf;
353   if (bits <= partCount * integerPartWidth &&
354       APInt::tcExtractBit(parts, bits - 1))
355     return lfMoreThanHalf;
356 
357   return lfLessThanHalf;
358 }
359 
360 /* Shift DST right BITS bits noting lost fraction.  */
361 static lostFraction
shiftRight(integerPart * dst,unsigned int parts,unsigned int bits)362 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
363 {
364   lostFraction lost_fraction;
365 
366   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
367 
368   APInt::tcShiftRight(dst, parts, bits);
369 
370   return lost_fraction;
371 }
372 
373 /* Combine the effect of two lost fractions.  */
374 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)375 combineLostFractions(lostFraction moreSignificant,
376                      lostFraction lessSignificant)
377 {
378   if (lessSignificant != lfExactlyZero) {
379     if (moreSignificant == lfExactlyZero)
380       moreSignificant = lfLessThanHalf;
381     else if (moreSignificant == lfExactlyHalf)
382       moreSignificant = lfMoreThanHalf;
383   }
384 
385   return moreSignificant;
386 }
387 
388 /* The error from the true value, in half-ulps, on multiplying two
389    floating point numbers, which differ from the value they
390    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
391    than the returned value.
392 
393    See "How to Read Floating Point Numbers Accurately" by William D
394    Clinger.  */
395 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)396 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
397 {
398   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
399 
400   if (HUerr1 + HUerr2 == 0)
401     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
402   else
403     return inexactMultiply + 2 * (HUerr1 + HUerr2);
404 }
405 
406 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
407    when the least significant BITS are truncated.  BITS cannot be
408    zero.  */
409 static integerPart
ulpsFromBoundary(const integerPart * parts,unsigned int bits,bool isNearest)410 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
411 {
412   unsigned int count, partBits;
413   integerPart part, boundary;
414 
415   assert(bits != 0);
416 
417   bits--;
418   count = bits / integerPartWidth;
419   partBits = bits % integerPartWidth + 1;
420 
421   part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
422 
423   if (isNearest)
424     boundary = (integerPart) 1 << (partBits - 1);
425   else
426     boundary = 0;
427 
428   if (count == 0) {
429     if (part - boundary <= boundary - part)
430       return part - boundary;
431     else
432       return boundary - part;
433   }
434 
435   if (part == boundary) {
436     while (--count)
437       if (parts[count])
438         return ~(integerPart) 0; /* A lot.  */
439 
440     return parts[0];
441   } else if (part == boundary - 1) {
442     while (--count)
443       if (~parts[count])
444         return ~(integerPart) 0; /* A lot.  */
445 
446     return -parts[0];
447   }
448 
449   return ~(integerPart) 0; /* A lot.  */
450 }
451 
452 /* Place pow(5, power) in DST, and return the number of parts used.
453    DST must be at least one part larger than size of the answer.  */
454 static unsigned int
powerOf5(integerPart * dst,unsigned int power)455 powerOf5(integerPart *dst, unsigned int power)
456 {
457   static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
458                                                   15625, 78125 };
459   integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
460   pow5s[0] = 78125 * 5;
461 
462   unsigned int partsCount[16] = { 1 };
463   integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
464   unsigned int result;
465   assert(power <= maxExponent);
466 
467   p1 = dst;
468   p2 = scratch;
469 
470   *p1 = firstEightPowers[power & 7];
471   power >>= 3;
472 
473   result = 1;
474   pow5 = pow5s;
475 
476   for (unsigned int n = 0; power; power >>= 1, n++) {
477     unsigned int pc;
478 
479     pc = partsCount[n];
480 
481     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
482     if (pc == 0) {
483       pc = partsCount[n - 1];
484       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
485       pc *= 2;
486       if (pow5[pc - 1] == 0)
487         pc--;
488       partsCount[n] = pc;
489     }
490 
491     if (power & 1) {
492       integerPart *tmp;
493 
494       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
495       result += pc;
496       if (p2[result - 1] == 0)
497         result--;
498 
499       /* Now result is in p1 with partsCount parts and p2 is scratch
500          space.  */
501       tmp = p1, p1 = p2, p2 = tmp;
502     }
503 
504     pow5 += pc;
505   }
506 
507   if (p1 != dst)
508     APInt::tcAssign(dst, p1, result);
509 
510   return result;
511 }
512 
513 /* Zero at the end to avoid modular arithmetic when adding one; used
514    when rounding up during hexadecimal output.  */
515 static const char hexDigitsLower[] = "0123456789abcdef0";
516 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
517 static const char infinityL[] = "infinity";
518 static const char infinityU[] = "INFINITY";
519 static const char NaNL[] = "nan";
520 static const char NaNU[] = "NAN";
521 
522 /* Write out an integerPart in hexadecimal, starting with the most
523    significant nibble.  Write out exactly COUNT hexdigits, return
524    COUNT.  */
525 static unsigned int
partAsHex(char * dst,integerPart part,unsigned int count,const char * hexDigitChars)526 partAsHex (char *dst, integerPart part, unsigned int count,
527            const char *hexDigitChars)
528 {
529   unsigned int result = count;
530 
531   assert(count != 0 && count <= integerPartWidth / 4);
532 
533   part >>= (integerPartWidth - 4 * count);
534   while (count--) {
535     dst[count] = hexDigitChars[part & 0xf];
536     part >>= 4;
537   }
538 
539   return result;
540 }
541 
542 /* Write out an unsigned decimal integer.  */
543 static char *
writeUnsignedDecimal(char * dst,unsigned int n)544 writeUnsignedDecimal (char *dst, unsigned int n)
545 {
546   char buff[40], *p;
547 
548   p = buff;
549   do
550     *p++ = '0' + n % 10;
551   while (n /= 10);
552 
553   do
554     *dst++ = *--p;
555   while (p != buff);
556 
557   return dst;
558 }
559 
560 /* Write out a signed decimal integer.  */
561 static char *
writeSignedDecimal(char * dst,int value)562 writeSignedDecimal (char *dst, int value)
563 {
564   if (value < 0) {
565     *dst++ = '-';
566     dst = writeUnsignedDecimal(dst, -(unsigned) value);
567   } else
568     dst = writeUnsignedDecimal(dst, value);
569 
570   return dst;
571 }
572 
573 /* Constructors.  */
574 void
initialize(const fltSemantics * ourSemantics)575 APFloat::initialize(const fltSemantics *ourSemantics)
576 {
577   unsigned int count;
578 
579   semantics = ourSemantics;
580   count = partCount();
581   if (count > 1)
582     significand.parts = new integerPart[count];
583 }
584 
585 void
freeSignificand()586 APFloat::freeSignificand()
587 {
588   if (needsCleanup())
589     delete [] significand.parts;
590 }
591 
592 void
assign(const APFloat & rhs)593 APFloat::assign(const APFloat &rhs)
594 {
595   assert(semantics == rhs.semantics);
596 
597   sign = rhs.sign;
598   category = rhs.category;
599   exponent = rhs.exponent;
600   if (isFiniteNonZero() || category == fcNaN)
601     copySignificand(rhs);
602 }
603 
604 void
copySignificand(const APFloat & rhs)605 APFloat::copySignificand(const APFloat &rhs)
606 {
607   assert(isFiniteNonZero() || category == fcNaN);
608   assert(rhs.partCount() >= partCount());
609 
610   APInt::tcAssign(significandParts(), rhs.significandParts(),
611                   partCount());
612 }
613 
614 /* Make this number a NaN, with an arbitrary but deterministic value
615    for the significand.  If double or longer, this is a signalling NaN,
616    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)617 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
618 {
619   category = fcNaN;
620   sign = Negative;
621 
622   integerPart *significand = significandParts();
623   unsigned numParts = partCount();
624 
625   // Set the significand bits to the fill.
626   if (!fill || fill->getNumWords() < numParts)
627     APInt::tcSet(significand, 0, numParts);
628   if (fill) {
629     APInt::tcAssign(significand, fill->getRawData(),
630                     std::min(fill->getNumWords(), numParts));
631 
632     // Zero out the excess bits of the significand.
633     unsigned bitsToPreserve = semantics->precision - 1;
634     unsigned part = bitsToPreserve / 64;
635     bitsToPreserve %= 64;
636     significand[part] &= ((1ULL << bitsToPreserve) - 1);
637     for (part++; part != numParts; ++part)
638       significand[part] = 0;
639   }
640 
641   unsigned QNaNBit = semantics->precision - 2;
642 
643   if (SNaN) {
644     // We always have to clear the QNaN bit to make it an SNaN.
645     APInt::tcClearBit(significand, QNaNBit);
646 
647     // If there are no bits set in the payload, we have to set
648     // *something* to make it a NaN instead of an infinity;
649     // conventionally, this is the next bit down from the QNaN bit.
650     if (APInt::tcIsZero(significand, numParts))
651       APInt::tcSetBit(significand, QNaNBit - 1);
652   } else {
653     // We always have to set the QNaN bit to make it a QNaN.
654     APInt::tcSetBit(significand, QNaNBit);
655   }
656 
657   // For x87 extended precision, we want to make a NaN, not a
658   // pseudo-NaN.  Maybe we should expose the ability to make
659   // pseudo-NaNs?
660   if (semantics == &APFloat::x87DoubleExtended)
661     APInt::tcSetBit(significand, QNaNBit + 1);
662 }
663 
makeNaN(const fltSemantics & Sem,bool SNaN,bool Negative,const APInt * fill)664 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
665                          const APInt *fill) {
666   APFloat value(Sem, uninitialized);
667   value.makeNaN(SNaN, Negative, fill);
668   return value;
669 }
670 
671 APFloat &
operator =(const APFloat & rhs)672 APFloat::operator=(const APFloat &rhs)
673 {
674   if (this != &rhs) {
675     if (semantics != rhs.semantics) {
676       freeSignificand();
677       initialize(rhs.semantics);
678     }
679     assign(rhs);
680   }
681 
682   return *this;
683 }
684 
685 APFloat &
operator =(APFloat && rhs)686 APFloat::operator=(APFloat &&rhs) {
687   freeSignificand();
688 
689   semantics = rhs.semantics;
690   significand = rhs.significand;
691   exponent = rhs.exponent;
692   category = rhs.category;
693   sign = rhs.sign;
694 
695   rhs.semantics = &Bogus;
696   return *this;
697 }
698 
699 bool
isDenormal() const700 APFloat::isDenormal() const {
701   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
702          (APInt::tcExtractBit(significandParts(),
703                               semantics->precision - 1) == 0);
704 }
705 
706 bool
isSmallest() const707 APFloat::isSmallest() const {
708   // The smallest number by magnitude in our format will be the smallest
709   // denormal, i.e. the floating point number with exponent being minimum
710   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
711   return isFiniteNonZero() && exponent == semantics->minExponent &&
712     significandMSB() == 0;
713 }
714 
isSignificandAllOnes() const715 bool APFloat::isSignificandAllOnes() const {
716   // Test if the significand excluding the integral bit is all ones. This allows
717   // us to test for binade boundaries.
718   const integerPart *Parts = significandParts();
719   const unsigned PartCount = partCount();
720   for (unsigned i = 0; i < PartCount - 1; i++)
721     if (~Parts[i])
722       return false;
723 
724   // Set the unused high bits to all ones when we compare.
725   const unsigned NumHighBits =
726     PartCount*integerPartWidth - semantics->precision + 1;
727   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
728          "fill than integerPartWidth");
729   const integerPart HighBitFill =
730     ~integerPart(0) << (integerPartWidth - NumHighBits);
731   if (~(Parts[PartCount - 1] | HighBitFill))
732     return false;
733 
734   return true;
735 }
736 
isSignificandAllZeros() const737 bool APFloat::isSignificandAllZeros() const {
738   // Test if the significand excluding the integral bit is all zeros. This
739   // allows us to test for binade boundaries.
740   const integerPart *Parts = significandParts();
741   const unsigned PartCount = partCount();
742 
743   for (unsigned i = 0; i < PartCount - 1; i++)
744     if (Parts[i])
745       return false;
746 
747   const unsigned NumHighBits =
748     PartCount*integerPartWidth - semantics->precision + 1;
749   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
750          "clear than integerPartWidth");
751   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
752 
753   if (Parts[PartCount - 1] & HighBitMask)
754     return false;
755 
756   return true;
757 }
758 
759 bool
isLargest() const760 APFloat::isLargest() const {
761   // The largest number by magnitude in our format will be the floating point
762   // number with maximum exponent and with significand that is all ones.
763   return isFiniteNonZero() && exponent == semantics->maxExponent
764     && isSignificandAllOnes();
765 }
766 
767 bool
bitwiseIsEqual(const APFloat & rhs) const768 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
769   if (this == &rhs)
770     return true;
771   if (semantics != rhs.semantics ||
772       category != rhs.category ||
773       sign != rhs.sign)
774     return false;
775   if (category==fcZero || category==fcInfinity)
776     return true;
777   else if (isFiniteNonZero() && exponent!=rhs.exponent)
778     return false;
779   else {
780     int i= partCount();
781     const integerPart* p=significandParts();
782     const integerPart* q=rhs.significandParts();
783     for (; i>0; i--, p++, q++) {
784       if (*p != *q)
785         return false;
786     }
787     return true;
788   }
789 }
790 
APFloat(const fltSemantics & ourSemantics,integerPart value)791 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
792   initialize(&ourSemantics);
793   sign = 0;
794   category = fcNormal;
795   zeroSignificand();
796   exponent = ourSemantics.precision - 1;
797   significandParts()[0] = value;
798   normalize(rmNearestTiesToEven, lfExactlyZero);
799 }
800 
APFloat(const fltSemantics & ourSemantics)801 APFloat::APFloat(const fltSemantics &ourSemantics) {
802   initialize(&ourSemantics);
803   category = fcZero;
804   sign = false;
805 }
806 
APFloat(const fltSemantics & ourSemantics,uninitializedTag tag)807 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
808   // Allocates storage if necessary but does not initialize it.
809   initialize(&ourSemantics);
810 }
811 
APFloat(const fltSemantics & ourSemantics,StringRef text)812 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
813   initialize(&ourSemantics);
814   convertFromString(text, rmNearestTiesToEven);
815 }
816 
APFloat(const APFloat & rhs)817 APFloat::APFloat(const APFloat &rhs) {
818   initialize(rhs.semantics);
819   assign(rhs);
820 }
821 
APFloat(APFloat && rhs)822 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
823   *this = std::move(rhs);
824 }
825 
~APFloat()826 APFloat::~APFloat()
827 {
828   freeSignificand();
829 }
830 
831 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const832 void APFloat::Profile(FoldingSetNodeID& ID) const {
833   ID.Add(bitcastToAPInt());
834 }
835 
836 unsigned int
partCount() const837 APFloat::partCount() const
838 {
839   return partCountForBits(semantics->precision + 1);
840 }
841 
842 unsigned int
semanticsPrecision(const fltSemantics & semantics)843 APFloat::semanticsPrecision(const fltSemantics &semantics)
844 {
845   return semantics.precision;
846 }
847 
848 const integerPart *
significandParts() const849 APFloat::significandParts() const
850 {
851   return const_cast<APFloat *>(this)->significandParts();
852 }
853 
854 integerPart *
significandParts()855 APFloat::significandParts()
856 {
857   if (partCount() > 1)
858     return significand.parts;
859   else
860     return &significand.part;
861 }
862 
863 void
zeroSignificand()864 APFloat::zeroSignificand()
865 {
866   APInt::tcSet(significandParts(), 0, partCount());
867 }
868 
869 /* Increment an fcNormal floating point number's significand.  */
870 void
incrementSignificand()871 APFloat::incrementSignificand()
872 {
873   integerPart carry;
874 
875   carry = APInt::tcIncrement(significandParts(), partCount());
876 
877   /* Our callers should never cause us to overflow.  */
878   assert(carry == 0);
879   (void)carry;
880 }
881 
882 /* Add the significand of the RHS.  Returns the carry flag.  */
883 integerPart
addSignificand(const APFloat & rhs)884 APFloat::addSignificand(const APFloat &rhs)
885 {
886   integerPart *parts;
887 
888   parts = significandParts();
889 
890   assert(semantics == rhs.semantics);
891   assert(exponent == rhs.exponent);
892 
893   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
894 }
895 
896 /* Subtract the significand of the RHS with a borrow flag.  Returns
897    the borrow flag.  */
898 integerPart
subtractSignificand(const APFloat & rhs,integerPart borrow)899 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
900 {
901   integerPart *parts;
902 
903   parts = significandParts();
904 
905   assert(semantics == rhs.semantics);
906   assert(exponent == rhs.exponent);
907 
908   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
909                            partCount());
910 }
911 
912 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
913    on to the full-precision result of the multiplication.  Returns the
914    lost fraction.  */
915 lostFraction
multiplySignificand(const APFloat & rhs,const APFloat * addend)916 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
917 {
918   unsigned int omsb;        // One, not zero, based MSB.
919   unsigned int partsCount, newPartsCount, precision;
920   integerPart *lhsSignificand;
921   integerPart scratch[4];
922   integerPart *fullSignificand;
923   lostFraction lost_fraction;
924   bool ignored;
925 
926   assert(semantics == rhs.semantics);
927 
928   precision = semantics->precision;
929 
930   // Allocate space for twice as many bits as the original significand, plus one
931   // extra bit for the addition to overflow into.
932   newPartsCount = partCountForBits(precision * 2 + 1);
933 
934   if (newPartsCount > 4)
935     fullSignificand = new integerPart[newPartsCount];
936   else
937     fullSignificand = scratch;
938 
939   lhsSignificand = significandParts();
940   partsCount = partCount();
941 
942   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
943                         rhs.significandParts(), partsCount, partsCount);
944 
945   lost_fraction = lfExactlyZero;
946   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
947   exponent += rhs.exponent;
948 
949   // Assume the operands involved in the multiplication are single-precision
950   // FP, and the two multiplicants are:
951   //   *this = a23 . a22 ... a0 * 2^e1
952   //     rhs = b23 . b22 ... b0 * 2^e2
953   // the result of multiplication is:
954   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
955   // Note that there are three significant bits at the left-hand side of the
956   // radix point: two for the multiplication, and an overflow bit for the
957   // addition (that will always be zero at this point). Move the radix point
958   // toward left by two bits, and adjust exponent accordingly.
959   exponent += 2;
960 
961   if (addend && addend->isNonZero()) {
962     // The intermediate result of the multiplication has "2 * precision"
963     // signicant bit; adjust the addend to be consistent with mul result.
964     //
965     Significand savedSignificand = significand;
966     const fltSemantics *savedSemantics = semantics;
967     fltSemantics extendedSemantics;
968     opStatus status;
969     unsigned int extendedPrecision;
970 
971     // Normalize our MSB to one below the top bit to allow for overflow.
972     extendedPrecision = 2 * precision + 1;
973     if (omsb != extendedPrecision - 1) {
974       assert(extendedPrecision > omsb);
975       APInt::tcShiftLeft(fullSignificand, newPartsCount,
976                          (extendedPrecision - 1) - omsb);
977       exponent -= (extendedPrecision - 1) - omsb;
978     }
979 
980     /* Create new semantics.  */
981     extendedSemantics = *semantics;
982     extendedSemantics.precision = extendedPrecision;
983 
984     if (newPartsCount == 1)
985       significand.part = fullSignificand[0];
986     else
987       significand.parts = fullSignificand;
988     semantics = &extendedSemantics;
989 
990     APFloat extendedAddend(*addend);
991     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
992     assert(status == opOK);
993     (void)status;
994 
995     // Shift the significand of the addend right by one bit. This guarantees
996     // that the high bit of the significand is zero (same as fullSignificand),
997     // so the addition will overflow (if it does overflow at all) into the top bit.
998     lost_fraction = extendedAddend.shiftSignificandRight(1);
999     assert(lost_fraction == lfExactlyZero &&
1000            "Lost precision while shifting addend for fused-multiply-add.");
1001 
1002     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1003 
1004     /* Restore our state.  */
1005     if (newPartsCount == 1)
1006       fullSignificand[0] = significand.part;
1007     significand = savedSignificand;
1008     semantics = savedSemantics;
1009 
1010     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1011   }
1012 
1013   // Convert the result having "2 * precision" significant-bits back to the one
1014   // having "precision" significant-bits. First, move the radix point from
1015   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1016   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1017   exponent -= precision + 1;
1018 
1019   // In case MSB resides at the left-hand side of radix point, shift the
1020   // mantissa right by some amount to make sure the MSB reside right before
1021   // the radix point (i.e. "MSB . rest-significant-bits").
1022   //
1023   // Note that the result is not normalized when "omsb < precision". So, the
1024   // caller needs to call APFloat::normalize() if normalized value is expected.
1025   if (omsb > precision) {
1026     unsigned int bits, significantParts;
1027     lostFraction lf;
1028 
1029     bits = omsb - precision;
1030     significantParts = partCountForBits(omsb);
1031     lf = shiftRight(fullSignificand, significantParts, bits);
1032     lost_fraction = combineLostFractions(lf, lost_fraction);
1033     exponent += bits;
1034   }
1035 
1036   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1037 
1038   if (newPartsCount > 4)
1039     delete [] fullSignificand;
1040 
1041   return lost_fraction;
1042 }
1043 
1044 /* Multiply the significands of LHS and RHS to DST.  */
1045 lostFraction
divideSignificand(const APFloat & rhs)1046 APFloat::divideSignificand(const APFloat &rhs)
1047 {
1048   unsigned int bit, i, partsCount;
1049   const integerPart *rhsSignificand;
1050   integerPart *lhsSignificand, *dividend, *divisor;
1051   integerPart scratch[4];
1052   lostFraction lost_fraction;
1053 
1054   assert(semantics == rhs.semantics);
1055 
1056   lhsSignificand = significandParts();
1057   rhsSignificand = rhs.significandParts();
1058   partsCount = partCount();
1059 
1060   if (partsCount > 2)
1061     dividend = new integerPart[partsCount * 2];
1062   else
1063     dividend = scratch;
1064 
1065   divisor = dividend + partsCount;
1066 
1067   /* Copy the dividend and divisor as they will be modified in-place.  */
1068   for (i = 0; i < partsCount; i++) {
1069     dividend[i] = lhsSignificand[i];
1070     divisor[i] = rhsSignificand[i];
1071     lhsSignificand[i] = 0;
1072   }
1073 
1074   exponent -= rhs.exponent;
1075 
1076   unsigned int precision = semantics->precision;
1077 
1078   /* Normalize the divisor.  */
1079   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1080   if (bit) {
1081     exponent += bit;
1082     APInt::tcShiftLeft(divisor, partsCount, bit);
1083   }
1084 
1085   /* Normalize the dividend.  */
1086   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1087   if (bit) {
1088     exponent -= bit;
1089     APInt::tcShiftLeft(dividend, partsCount, bit);
1090   }
1091 
1092   /* Ensure the dividend >= divisor initially for the loop below.
1093      Incidentally, this means that the division loop below is
1094      guaranteed to set the integer bit to one.  */
1095   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1096     exponent--;
1097     APInt::tcShiftLeft(dividend, partsCount, 1);
1098     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1099   }
1100 
1101   /* Long division.  */
1102   for (bit = precision; bit; bit -= 1) {
1103     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1104       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1105       APInt::tcSetBit(lhsSignificand, bit - 1);
1106     }
1107 
1108     APInt::tcShiftLeft(dividend, partsCount, 1);
1109   }
1110 
1111   /* Figure out the lost fraction.  */
1112   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1113 
1114   if (cmp > 0)
1115     lost_fraction = lfMoreThanHalf;
1116   else if (cmp == 0)
1117     lost_fraction = lfExactlyHalf;
1118   else if (APInt::tcIsZero(dividend, partsCount))
1119     lost_fraction = lfExactlyZero;
1120   else
1121     lost_fraction = lfLessThanHalf;
1122 
1123   if (partsCount > 2)
1124     delete [] dividend;
1125 
1126   return lost_fraction;
1127 }
1128 
1129 unsigned int
significandMSB() const1130 APFloat::significandMSB() const
1131 {
1132   return APInt::tcMSB(significandParts(), partCount());
1133 }
1134 
1135 unsigned int
significandLSB() const1136 APFloat::significandLSB() const
1137 {
1138   return APInt::tcLSB(significandParts(), partCount());
1139 }
1140 
1141 /* Note that a zero result is NOT normalized to fcZero.  */
1142 lostFraction
shiftSignificandRight(unsigned int bits)1143 APFloat::shiftSignificandRight(unsigned int bits)
1144 {
1145   /* Our exponent should not overflow.  */
1146   assert((ExponentType) (exponent + bits) >= exponent);
1147 
1148   exponent += bits;
1149 
1150   return shiftRight(significandParts(), partCount(), bits);
1151 }
1152 
1153 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1154 void
shiftSignificandLeft(unsigned int bits)1155 APFloat::shiftSignificandLeft(unsigned int bits)
1156 {
1157   assert(bits < semantics->precision);
1158 
1159   if (bits) {
1160     unsigned int partsCount = partCount();
1161 
1162     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1163     exponent -= bits;
1164 
1165     assert(!APInt::tcIsZero(significandParts(), partsCount));
1166   }
1167 }
1168 
1169 APFloat::cmpResult
compareAbsoluteValue(const APFloat & rhs) const1170 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1171 {
1172   int compare;
1173 
1174   assert(semantics == rhs.semantics);
1175   assert(isFiniteNonZero());
1176   assert(rhs.isFiniteNonZero());
1177 
1178   compare = exponent - rhs.exponent;
1179 
1180   /* If exponents are equal, do an unsigned bignum comparison of the
1181      significands.  */
1182   if (compare == 0)
1183     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1184                                partCount());
1185 
1186   if (compare > 0)
1187     return cmpGreaterThan;
1188   else if (compare < 0)
1189     return cmpLessThan;
1190   else
1191     return cmpEqual;
1192 }
1193 
1194 /* Handle overflow.  Sign is preserved.  We either become infinity or
1195    the largest finite number.  */
1196 APFloat::opStatus
handleOverflow(roundingMode rounding_mode)1197 APFloat::handleOverflow(roundingMode rounding_mode)
1198 {
1199   /* Infinity?  */
1200   if (rounding_mode == rmNearestTiesToEven ||
1201       rounding_mode == rmNearestTiesToAway ||
1202       (rounding_mode == rmTowardPositive && !sign) ||
1203       (rounding_mode == rmTowardNegative && sign)) {
1204     category = fcInfinity;
1205     return (opStatus) (opOverflow | opInexact);
1206   }
1207 
1208   /* Otherwise we become the largest finite number.  */
1209   category = fcNormal;
1210   exponent = semantics->maxExponent;
1211   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1212                                    semantics->precision);
1213 
1214   return opInexact;
1215 }
1216 
1217 /* Returns TRUE if, when truncating the current number, with BIT the
1218    new LSB, with the given lost fraction and rounding mode, the result
1219    would need to be rounded away from zero (i.e., by increasing the
1220    signficand).  This routine must work for fcZero of both signs, and
1221    fcNormal numbers.  */
1222 bool
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1223 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1224                            lostFraction lost_fraction,
1225                            unsigned int bit) const
1226 {
1227   /* NaNs and infinities should not have lost fractions.  */
1228   assert(isFiniteNonZero() || category == fcZero);
1229 
1230   /* Current callers never pass this so we don't handle it.  */
1231   assert(lost_fraction != lfExactlyZero);
1232 
1233   switch (rounding_mode) {
1234   case rmNearestTiesToAway:
1235     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1236 
1237   case rmNearestTiesToEven:
1238     if (lost_fraction == lfMoreThanHalf)
1239       return true;
1240 
1241     /* Our zeroes don't have a significand to test.  */
1242     if (lost_fraction == lfExactlyHalf && category != fcZero)
1243       return APInt::tcExtractBit(significandParts(), bit);
1244 
1245     return false;
1246 
1247   case rmTowardZero:
1248     return false;
1249 
1250   case rmTowardPositive:
1251     return !sign;
1252 
1253   case rmTowardNegative:
1254     return sign;
1255   }
1256   llvm_unreachable("Invalid rounding mode found");
1257 }
1258 
1259 APFloat::opStatus
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1260 APFloat::normalize(roundingMode rounding_mode,
1261                    lostFraction lost_fraction)
1262 {
1263   unsigned int omsb;                /* One, not zero, based MSB.  */
1264   int exponentChange;
1265 
1266   if (!isFiniteNonZero())
1267     return opOK;
1268 
1269   /* Before rounding normalize the exponent of fcNormal numbers.  */
1270   omsb = significandMSB() + 1;
1271 
1272   if (omsb) {
1273     /* OMSB is numbered from 1.  We want to place it in the integer
1274        bit numbered PRECISION if possible, with a compensating change in
1275        the exponent.  */
1276     exponentChange = omsb - semantics->precision;
1277 
1278     /* If the resulting exponent is too high, overflow according to
1279        the rounding mode.  */
1280     if (exponent + exponentChange > semantics->maxExponent)
1281       return handleOverflow(rounding_mode);
1282 
1283     /* Subnormal numbers have exponent minExponent, and their MSB
1284        is forced based on that.  */
1285     if (exponent + exponentChange < semantics->minExponent)
1286       exponentChange = semantics->minExponent - exponent;
1287 
1288     /* Shifting left is easy as we don't lose precision.  */
1289     if (exponentChange < 0) {
1290       assert(lost_fraction == lfExactlyZero);
1291 
1292       shiftSignificandLeft(-exponentChange);
1293 
1294       return opOK;
1295     }
1296 
1297     if (exponentChange > 0) {
1298       lostFraction lf;
1299 
1300       /* Shift right and capture any new lost fraction.  */
1301       lf = shiftSignificandRight(exponentChange);
1302 
1303       lost_fraction = combineLostFractions(lf, lost_fraction);
1304 
1305       /* Keep OMSB up-to-date.  */
1306       if (omsb > (unsigned) exponentChange)
1307         omsb -= exponentChange;
1308       else
1309         omsb = 0;
1310     }
1311   }
1312 
1313   /* Now round the number according to rounding_mode given the lost
1314      fraction.  */
1315 
1316   /* As specified in IEEE 754, since we do not trap we do not report
1317      underflow for exact results.  */
1318   if (lost_fraction == lfExactlyZero) {
1319     /* Canonicalize zeroes.  */
1320     if (omsb == 0)
1321       category = fcZero;
1322 
1323     return opOK;
1324   }
1325 
1326   /* Increment the significand if we're rounding away from zero.  */
1327   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1328     if (omsb == 0)
1329       exponent = semantics->minExponent;
1330 
1331     incrementSignificand();
1332     omsb = significandMSB() + 1;
1333 
1334     /* Did the significand increment overflow?  */
1335     if (omsb == (unsigned) semantics->precision + 1) {
1336       /* Renormalize by incrementing the exponent and shifting our
1337          significand right one.  However if we already have the
1338          maximum exponent we overflow to infinity.  */
1339       if (exponent == semantics->maxExponent) {
1340         category = fcInfinity;
1341 
1342         return (opStatus) (opOverflow | opInexact);
1343       }
1344 
1345       shiftSignificandRight(1);
1346 
1347       return opInexact;
1348     }
1349   }
1350 
1351   /* The normal case - we were and are not denormal, and any
1352      significand increment above didn't overflow.  */
1353   if (omsb == semantics->precision)
1354     return opInexact;
1355 
1356   /* We have a non-zero denormal.  */
1357   assert(omsb < semantics->precision);
1358 
1359   /* Canonicalize zeroes.  */
1360   if (omsb == 0)
1361     category = fcZero;
1362 
1363   /* The fcZero case is a denormal that underflowed to zero.  */
1364   return (opStatus) (opUnderflow | opInexact);
1365 }
1366 
1367 APFloat::opStatus
addOrSubtractSpecials(const APFloat & rhs,bool subtract)1368 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1369 {
1370   switch (PackCategoriesIntoKey(category, rhs.category)) {
1371   default:
1372     llvm_unreachable(nullptr);
1373 
1374   case PackCategoriesIntoKey(fcNaN, fcZero):
1375   case PackCategoriesIntoKey(fcNaN, fcNormal):
1376   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1377   case PackCategoriesIntoKey(fcNaN, fcNaN):
1378   case PackCategoriesIntoKey(fcNormal, fcZero):
1379   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1380   case PackCategoriesIntoKey(fcInfinity, fcZero):
1381     return opOK;
1382 
1383   case PackCategoriesIntoKey(fcZero, fcNaN):
1384   case PackCategoriesIntoKey(fcNormal, fcNaN):
1385   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1386     // We need to be sure to flip the sign here for subtraction because we
1387     // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1388     sign = rhs.sign ^ subtract;
1389     category = fcNaN;
1390     copySignificand(rhs);
1391     return opOK;
1392 
1393   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1394   case PackCategoriesIntoKey(fcZero, fcInfinity):
1395     category = fcInfinity;
1396     sign = rhs.sign ^ subtract;
1397     return opOK;
1398 
1399   case PackCategoriesIntoKey(fcZero, fcNormal):
1400     assign(rhs);
1401     sign = rhs.sign ^ subtract;
1402     return opOK;
1403 
1404   case PackCategoriesIntoKey(fcZero, fcZero):
1405     /* Sign depends on rounding mode; handled by caller.  */
1406     return opOK;
1407 
1408   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1409     /* Differently signed infinities can only be validly
1410        subtracted.  */
1411     if (((sign ^ rhs.sign)!=0) != subtract) {
1412       makeNaN();
1413       return opInvalidOp;
1414     }
1415 
1416     return opOK;
1417 
1418   case PackCategoriesIntoKey(fcNormal, fcNormal):
1419     return opDivByZero;
1420   }
1421 }
1422 
1423 /* Add or subtract two normal numbers.  */
1424 lostFraction
addOrSubtractSignificand(const APFloat & rhs,bool subtract)1425 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1426 {
1427   integerPart carry;
1428   lostFraction lost_fraction;
1429   int bits;
1430 
1431   /* Determine if the operation on the absolute values is effectively
1432      an addition or subtraction.  */
1433   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1434 
1435   /* Are we bigger exponent-wise than the RHS?  */
1436   bits = exponent - rhs.exponent;
1437 
1438   /* Subtraction is more subtle than one might naively expect.  */
1439   if (subtract) {
1440     APFloat temp_rhs(rhs);
1441     bool reverse;
1442 
1443     if (bits == 0) {
1444       reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1445       lost_fraction = lfExactlyZero;
1446     } else if (bits > 0) {
1447       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1448       shiftSignificandLeft(1);
1449       reverse = false;
1450     } else {
1451       lost_fraction = shiftSignificandRight(-bits - 1);
1452       temp_rhs.shiftSignificandLeft(1);
1453       reverse = true;
1454     }
1455 
1456     if (reverse) {
1457       carry = temp_rhs.subtractSignificand
1458         (*this, lost_fraction != lfExactlyZero);
1459       copySignificand(temp_rhs);
1460       sign = !sign;
1461     } else {
1462       carry = subtractSignificand
1463         (temp_rhs, lost_fraction != lfExactlyZero);
1464     }
1465 
1466     /* Invert the lost fraction - it was on the RHS and
1467        subtracted.  */
1468     if (lost_fraction == lfLessThanHalf)
1469       lost_fraction = lfMoreThanHalf;
1470     else if (lost_fraction == lfMoreThanHalf)
1471       lost_fraction = lfLessThanHalf;
1472 
1473     /* The code above is intended to ensure that no borrow is
1474        necessary.  */
1475     assert(!carry);
1476     (void)carry;
1477   } else {
1478     if (bits > 0) {
1479       APFloat temp_rhs(rhs);
1480 
1481       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1482       carry = addSignificand(temp_rhs);
1483     } else {
1484       lost_fraction = shiftSignificandRight(-bits);
1485       carry = addSignificand(rhs);
1486     }
1487 
1488     /* We have a guard bit; generating a carry cannot happen.  */
1489     assert(!carry);
1490     (void)carry;
1491   }
1492 
1493   return lost_fraction;
1494 }
1495 
1496 APFloat::opStatus
multiplySpecials(const APFloat & rhs)1497 APFloat::multiplySpecials(const APFloat &rhs)
1498 {
1499   switch (PackCategoriesIntoKey(category, rhs.category)) {
1500   default:
1501     llvm_unreachable(nullptr);
1502 
1503   case PackCategoriesIntoKey(fcNaN, fcZero):
1504   case PackCategoriesIntoKey(fcNaN, fcNormal):
1505   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1506   case PackCategoriesIntoKey(fcNaN, fcNaN):
1507     sign = false;
1508     return opOK;
1509 
1510   case PackCategoriesIntoKey(fcZero, fcNaN):
1511   case PackCategoriesIntoKey(fcNormal, fcNaN):
1512   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1513     sign = false;
1514     category = fcNaN;
1515     copySignificand(rhs);
1516     return opOK;
1517 
1518   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1519   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1520   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1521     category = fcInfinity;
1522     return opOK;
1523 
1524   case PackCategoriesIntoKey(fcZero, fcNormal):
1525   case PackCategoriesIntoKey(fcNormal, fcZero):
1526   case PackCategoriesIntoKey(fcZero, fcZero):
1527     category = fcZero;
1528     return opOK;
1529 
1530   case PackCategoriesIntoKey(fcZero, fcInfinity):
1531   case PackCategoriesIntoKey(fcInfinity, fcZero):
1532     makeNaN();
1533     return opInvalidOp;
1534 
1535   case PackCategoriesIntoKey(fcNormal, fcNormal):
1536     return opOK;
1537   }
1538 }
1539 
1540 APFloat::opStatus
divideSpecials(const APFloat & rhs)1541 APFloat::divideSpecials(const APFloat &rhs)
1542 {
1543   switch (PackCategoriesIntoKey(category, rhs.category)) {
1544   default:
1545     llvm_unreachable(nullptr);
1546 
1547   case PackCategoriesIntoKey(fcZero, fcNaN):
1548   case PackCategoriesIntoKey(fcNormal, fcNaN):
1549   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1550     category = fcNaN;
1551     copySignificand(rhs);
1552   case PackCategoriesIntoKey(fcNaN, fcZero):
1553   case PackCategoriesIntoKey(fcNaN, fcNormal):
1554   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1555   case PackCategoriesIntoKey(fcNaN, fcNaN):
1556     sign = false;
1557   case PackCategoriesIntoKey(fcInfinity, fcZero):
1558   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1559   case PackCategoriesIntoKey(fcZero, fcInfinity):
1560   case PackCategoriesIntoKey(fcZero, fcNormal):
1561     return opOK;
1562 
1563   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1564     category = fcZero;
1565     return opOK;
1566 
1567   case PackCategoriesIntoKey(fcNormal, fcZero):
1568     category = fcInfinity;
1569     return opDivByZero;
1570 
1571   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1572   case PackCategoriesIntoKey(fcZero, fcZero):
1573     makeNaN();
1574     return opInvalidOp;
1575 
1576   case PackCategoriesIntoKey(fcNormal, fcNormal):
1577     return opOK;
1578   }
1579 }
1580 
1581 APFloat::opStatus
modSpecials(const APFloat & rhs)1582 APFloat::modSpecials(const APFloat &rhs)
1583 {
1584   switch (PackCategoriesIntoKey(category, rhs.category)) {
1585   default:
1586     llvm_unreachable(nullptr);
1587 
1588   case PackCategoriesIntoKey(fcNaN, fcZero):
1589   case PackCategoriesIntoKey(fcNaN, fcNormal):
1590   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1591   case PackCategoriesIntoKey(fcNaN, fcNaN):
1592   case PackCategoriesIntoKey(fcZero, fcInfinity):
1593   case PackCategoriesIntoKey(fcZero, fcNormal):
1594   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1595     return opOK;
1596 
1597   case PackCategoriesIntoKey(fcZero, fcNaN):
1598   case PackCategoriesIntoKey(fcNormal, fcNaN):
1599   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1600     sign = false;
1601     category = fcNaN;
1602     copySignificand(rhs);
1603     return opOK;
1604 
1605   case PackCategoriesIntoKey(fcNormal, fcZero):
1606   case PackCategoriesIntoKey(fcInfinity, fcZero):
1607   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1608   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1609   case PackCategoriesIntoKey(fcZero, fcZero):
1610     makeNaN();
1611     return opInvalidOp;
1612 
1613   case PackCategoriesIntoKey(fcNormal, fcNormal):
1614     return opOK;
1615   }
1616 }
1617 
1618 /* Change sign.  */
1619 void
changeSign()1620 APFloat::changeSign()
1621 {
1622   /* Look mummy, this one's easy.  */
1623   sign = !sign;
1624 }
1625 
1626 void
clearSign()1627 APFloat::clearSign()
1628 {
1629   /* So is this one. */
1630   sign = 0;
1631 }
1632 
1633 void
copySign(const APFloat & rhs)1634 APFloat::copySign(const APFloat &rhs)
1635 {
1636   /* And this one. */
1637   sign = rhs.sign;
1638 }
1639 
1640 /* Normalized addition or subtraction.  */
1641 APFloat::opStatus
addOrSubtract(const APFloat & rhs,roundingMode rounding_mode,bool subtract)1642 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1643                        bool subtract)
1644 {
1645   opStatus fs;
1646 
1647   fs = addOrSubtractSpecials(rhs, subtract);
1648 
1649   /* This return code means it was not a simple case.  */
1650   if (fs == opDivByZero) {
1651     lostFraction lost_fraction;
1652 
1653     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1654     fs = normalize(rounding_mode, lost_fraction);
1655 
1656     /* Can only be zero if we lost no fraction.  */
1657     assert(category != fcZero || lost_fraction == lfExactlyZero);
1658   }
1659 
1660   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1661      positive zero unless rounding to minus infinity, except that
1662      adding two like-signed zeroes gives that zero.  */
1663   if (category == fcZero) {
1664     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1665       sign = (rounding_mode == rmTowardNegative);
1666   }
1667 
1668   return fs;
1669 }
1670 
1671 /* Normalized addition.  */
1672 APFloat::opStatus
add(const APFloat & rhs,roundingMode rounding_mode)1673 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1674 {
1675   return addOrSubtract(rhs, rounding_mode, false);
1676 }
1677 
1678 /* Normalized subtraction.  */
1679 APFloat::opStatus
subtract(const APFloat & rhs,roundingMode rounding_mode)1680 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1681 {
1682   return addOrSubtract(rhs, rounding_mode, true);
1683 }
1684 
1685 /* Normalized multiply.  */
1686 APFloat::opStatus
multiply(const APFloat & rhs,roundingMode rounding_mode)1687 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1688 {
1689   opStatus fs;
1690 
1691   sign ^= rhs.sign;
1692   fs = multiplySpecials(rhs);
1693 
1694   if (isFiniteNonZero()) {
1695     lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1696     fs = normalize(rounding_mode, lost_fraction);
1697     if (lost_fraction != lfExactlyZero)
1698       fs = (opStatus) (fs | opInexact);
1699   }
1700 
1701   return fs;
1702 }
1703 
1704 /* Normalized divide.  */
1705 APFloat::opStatus
divide(const APFloat & rhs,roundingMode rounding_mode)1706 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1707 {
1708   opStatus fs;
1709 
1710   sign ^= rhs.sign;
1711   fs = divideSpecials(rhs);
1712 
1713   if (isFiniteNonZero()) {
1714     lostFraction lost_fraction = divideSignificand(rhs);
1715     fs = normalize(rounding_mode, lost_fraction);
1716     if (lost_fraction != lfExactlyZero)
1717       fs = (opStatus) (fs | opInexact);
1718   }
1719 
1720   return fs;
1721 }
1722 
1723 /* Normalized remainder.  This is not currently correct in all cases.  */
1724 APFloat::opStatus
remainder(const APFloat & rhs)1725 APFloat::remainder(const APFloat &rhs)
1726 {
1727   opStatus fs;
1728   APFloat V = *this;
1729   unsigned int origSign = sign;
1730 
1731   fs = V.divide(rhs, rmNearestTiesToEven);
1732   if (fs == opDivByZero)
1733     return fs;
1734 
1735   int parts = partCount();
1736   integerPart *x = new integerPart[parts];
1737   bool ignored;
1738   fs = V.convertToInteger(x, parts * integerPartWidth, true,
1739                           rmNearestTiesToEven, &ignored);
1740   if (fs==opInvalidOp)
1741     return fs;
1742 
1743   fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1744                                         rmNearestTiesToEven);
1745   assert(fs==opOK);   // should always work
1746 
1747   fs = V.multiply(rhs, rmNearestTiesToEven);
1748   assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1749 
1750   fs = subtract(V, rmNearestTiesToEven);
1751   assert(fs==opOK || fs==opInexact);   // likewise
1752 
1753   if (isZero())
1754     sign = origSign;    // IEEE754 requires this
1755   delete[] x;
1756   return fs;
1757 }
1758 
1759 /* Normalized llvm frem (C fmod).
1760    This is not currently correct in all cases.  */
1761 APFloat::opStatus
mod(const APFloat & rhs,roundingMode rounding_mode)1762 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1763 {
1764   opStatus fs;
1765   fs = modSpecials(rhs);
1766 
1767   if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1768     APFloat V = *this;
1769     unsigned int origSign = sign;
1770 
1771     fs = V.divide(rhs, rmNearestTiesToEven);
1772     if (fs == opDivByZero)
1773       return fs;
1774 
1775     int parts = partCount();
1776     integerPart *x = new integerPart[parts];
1777     bool ignored;
1778     fs = V.convertToInteger(x, parts * integerPartWidth, true,
1779                             rmTowardZero, &ignored);
1780     if (fs==opInvalidOp)
1781       return fs;
1782 
1783     fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1784                                           rmNearestTiesToEven);
1785     assert(fs==opOK);   // should always work
1786 
1787     fs = V.multiply(rhs, rounding_mode);
1788     assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1789 
1790     fs = subtract(V, rounding_mode);
1791     assert(fs==opOK || fs==opInexact);   // likewise
1792 
1793     if (isZero())
1794       sign = origSign;    // IEEE754 requires this
1795     delete[] x;
1796   }
1797   return fs;
1798 }
1799 
1800 /* Normalized fused-multiply-add.  */
1801 APFloat::opStatus
fusedMultiplyAdd(const APFloat & multiplicand,const APFloat & addend,roundingMode rounding_mode)1802 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1803                           const APFloat &addend,
1804                           roundingMode rounding_mode)
1805 {
1806   opStatus fs;
1807 
1808   /* Post-multiplication sign, before addition.  */
1809   sign ^= multiplicand.sign;
1810 
1811   /* If and only if all arguments are normal do we need to do an
1812      extended-precision calculation.  */
1813   if (isFiniteNonZero() &&
1814       multiplicand.isFiniteNonZero() &&
1815       addend.isFinite()) {
1816     lostFraction lost_fraction;
1817 
1818     lost_fraction = multiplySignificand(multiplicand, &addend);
1819     fs = normalize(rounding_mode, lost_fraction);
1820     if (lost_fraction != lfExactlyZero)
1821       fs = (opStatus) (fs | opInexact);
1822 
1823     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1824        positive zero unless rounding to minus infinity, except that
1825        adding two like-signed zeroes gives that zero.  */
1826     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1827       sign = (rounding_mode == rmTowardNegative);
1828   } else {
1829     fs = multiplySpecials(multiplicand);
1830 
1831     /* FS can only be opOK or opInvalidOp.  There is no more work
1832        to do in the latter case.  The IEEE-754R standard says it is
1833        implementation-defined in this case whether, if ADDEND is a
1834        quiet NaN, we raise invalid op; this implementation does so.
1835 
1836        If we need to do the addition we can do so with normal
1837        precision.  */
1838     if (fs == opOK)
1839       fs = addOrSubtract(addend, rounding_mode, false);
1840   }
1841 
1842   return fs;
1843 }
1844 
1845 /* Rounding-mode corrrect round to integral value.  */
roundToIntegral(roundingMode rounding_mode)1846 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1847   opStatus fs;
1848 
1849   // If the exponent is large enough, we know that this value is already
1850   // integral, and the arithmetic below would potentially cause it to saturate
1851   // to +/-Inf.  Bail out early instead.
1852   if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1853     return opOK;
1854 
1855   // The algorithm here is quite simple: we add 2^(p-1), where p is the
1856   // precision of our format, and then subtract it back off again.  The choice
1857   // of rounding modes for the addition/subtraction determines the rounding mode
1858   // for our integral rounding as well.
1859   // NOTE: When the input value is negative, we do subtraction followed by
1860   // addition instead.
1861   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1862   IntegerConstant <<= semanticsPrecision(*semantics)-1;
1863   APFloat MagicConstant(*semantics);
1864   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1865                                       rmNearestTiesToEven);
1866   MagicConstant.copySign(*this);
1867 
1868   if (fs != opOK)
1869     return fs;
1870 
1871   // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1872   bool inputSign = isNegative();
1873 
1874   fs = add(MagicConstant, rounding_mode);
1875   if (fs != opOK && fs != opInexact)
1876     return fs;
1877 
1878   fs = subtract(MagicConstant, rounding_mode);
1879 
1880   // Restore the input sign.
1881   if (inputSign != isNegative())
1882     changeSign();
1883 
1884   return fs;
1885 }
1886 
1887 
1888 /* Comparison requires normalized numbers.  */
1889 APFloat::cmpResult
compare(const APFloat & rhs) const1890 APFloat::compare(const APFloat &rhs) const
1891 {
1892   cmpResult result;
1893 
1894   assert(semantics == rhs.semantics);
1895 
1896   switch (PackCategoriesIntoKey(category, rhs.category)) {
1897   default:
1898     llvm_unreachable(nullptr);
1899 
1900   case PackCategoriesIntoKey(fcNaN, fcZero):
1901   case PackCategoriesIntoKey(fcNaN, fcNormal):
1902   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1903   case PackCategoriesIntoKey(fcNaN, fcNaN):
1904   case PackCategoriesIntoKey(fcZero, fcNaN):
1905   case PackCategoriesIntoKey(fcNormal, fcNaN):
1906   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1907     return cmpUnordered;
1908 
1909   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1910   case PackCategoriesIntoKey(fcInfinity, fcZero):
1911   case PackCategoriesIntoKey(fcNormal, fcZero):
1912     if (sign)
1913       return cmpLessThan;
1914     else
1915       return cmpGreaterThan;
1916 
1917   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1918   case PackCategoriesIntoKey(fcZero, fcInfinity):
1919   case PackCategoriesIntoKey(fcZero, fcNormal):
1920     if (rhs.sign)
1921       return cmpGreaterThan;
1922     else
1923       return cmpLessThan;
1924 
1925   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1926     if (sign == rhs.sign)
1927       return cmpEqual;
1928     else if (sign)
1929       return cmpLessThan;
1930     else
1931       return cmpGreaterThan;
1932 
1933   case PackCategoriesIntoKey(fcZero, fcZero):
1934     return cmpEqual;
1935 
1936   case PackCategoriesIntoKey(fcNormal, fcNormal):
1937     break;
1938   }
1939 
1940   /* Two normal numbers.  Do they have the same sign?  */
1941   if (sign != rhs.sign) {
1942     if (sign)
1943       result = cmpLessThan;
1944     else
1945       result = cmpGreaterThan;
1946   } else {
1947     /* Compare absolute values; invert result if negative.  */
1948     result = compareAbsoluteValue(rhs);
1949 
1950     if (sign) {
1951       if (result == cmpLessThan)
1952         result = cmpGreaterThan;
1953       else if (result == cmpGreaterThan)
1954         result = cmpLessThan;
1955     }
1956   }
1957 
1958   return result;
1959 }
1960 
1961 /// APFloat::convert - convert a value of one floating point type to another.
1962 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
1963 /// records whether the transformation lost information, i.e. whether
1964 /// converting the result back to the original type will produce the
1965 /// original value (this is almost the same as return value==fsOK, but there
1966 /// are edge cases where this is not so).
1967 
1968 APFloat::opStatus
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1969 APFloat::convert(const fltSemantics &toSemantics,
1970                  roundingMode rounding_mode, bool *losesInfo)
1971 {
1972   lostFraction lostFraction;
1973   unsigned int newPartCount, oldPartCount;
1974   opStatus fs;
1975   int shift;
1976   const fltSemantics &fromSemantics = *semantics;
1977 
1978   lostFraction = lfExactlyZero;
1979   newPartCount = partCountForBits(toSemantics.precision + 1);
1980   oldPartCount = partCount();
1981   shift = toSemantics.precision - fromSemantics.precision;
1982 
1983   bool X86SpecialNan = false;
1984   if (&fromSemantics == &APFloat::x87DoubleExtended &&
1985       &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1986       (!(*significandParts() & 0x8000000000000000ULL) ||
1987        !(*significandParts() & 0x4000000000000000ULL))) {
1988     // x86 has some unusual NaNs which cannot be represented in any other
1989     // format; note them here.
1990     X86SpecialNan = true;
1991   }
1992 
1993   // If this is a truncation of a denormal number, and the target semantics
1994   // has larger exponent range than the source semantics (this can happen
1995   // when truncating from PowerPC double-double to double format), the
1996   // right shift could lose result mantissa bits.  Adjust exponent instead
1997   // of performing excessive shift.
1998   if (shift < 0 && isFiniteNonZero()) {
1999     int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2000     if (exponent + exponentChange < toSemantics.minExponent)
2001       exponentChange = toSemantics.minExponent - exponent;
2002     if (exponentChange < shift)
2003       exponentChange = shift;
2004     if (exponentChange < 0) {
2005       shift -= exponentChange;
2006       exponent += exponentChange;
2007     }
2008   }
2009 
2010   // If this is a truncation, perform the shift before we narrow the storage.
2011   if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2012     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2013 
2014   // Fix the storage so it can hold to new value.
2015   if (newPartCount > oldPartCount) {
2016     // The new type requires more storage; make it available.
2017     integerPart *newParts;
2018     newParts = new integerPart[newPartCount];
2019     APInt::tcSet(newParts, 0, newPartCount);
2020     if (isFiniteNonZero() || category==fcNaN)
2021       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2022     freeSignificand();
2023     significand.parts = newParts;
2024   } else if (newPartCount == 1 && oldPartCount != 1) {
2025     // Switch to built-in storage for a single part.
2026     integerPart newPart = 0;
2027     if (isFiniteNonZero() || category==fcNaN)
2028       newPart = significandParts()[0];
2029     freeSignificand();
2030     significand.part = newPart;
2031   }
2032 
2033   // Now that we have the right storage, switch the semantics.
2034   semantics = &toSemantics;
2035 
2036   // If this is an extension, perform the shift now that the storage is
2037   // available.
2038   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2039     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2040 
2041   if (isFiniteNonZero()) {
2042     fs = normalize(rounding_mode, lostFraction);
2043     *losesInfo = (fs != opOK);
2044   } else if (category == fcNaN) {
2045     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2046 
2047     // For x87 extended precision, we want to make a NaN, not a special NaN if
2048     // the input wasn't special either.
2049     if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2050       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2051 
2052     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2053     // does not give you back the same bits.  This is dubious, and we
2054     // don't currently do it.  You're really supposed to get
2055     // an invalid operation signal at runtime, but nobody does that.
2056     fs = opOK;
2057   } else {
2058     *losesInfo = false;
2059     fs = opOK;
2060   }
2061 
2062   return fs;
2063 }
2064 
2065 /* Convert a floating point number to an integer according to the
2066    rounding mode.  If the rounded integer value is out of range this
2067    returns an invalid operation exception and the contents of the
2068    destination parts are unspecified.  If the rounded value is in
2069    range but the floating point number is not the exact integer, the C
2070    standard doesn't require an inexact exception to be raised.  IEEE
2071    854 does require it so we do that.
2072 
2073    Note that for conversions to integer type the C standard requires
2074    round-to-zero to always be used.  */
2075 APFloat::opStatus
convertToSignExtendedInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2076 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2077                                       bool isSigned,
2078                                       roundingMode rounding_mode,
2079                                       bool *isExact) const
2080 {
2081   lostFraction lost_fraction;
2082   const integerPart *src;
2083   unsigned int dstPartsCount, truncatedBits;
2084 
2085   *isExact = false;
2086 
2087   /* Handle the three special cases first.  */
2088   if (category == fcInfinity || category == fcNaN)
2089     return opInvalidOp;
2090 
2091   dstPartsCount = partCountForBits(width);
2092 
2093   if (category == fcZero) {
2094     APInt::tcSet(parts, 0, dstPartsCount);
2095     // Negative zero can't be represented as an int.
2096     *isExact = !sign;
2097     return opOK;
2098   }
2099 
2100   src = significandParts();
2101 
2102   /* Step 1: place our absolute value, with any fraction truncated, in
2103      the destination.  */
2104   if (exponent < 0) {
2105     /* Our absolute value is less than one; truncate everything.  */
2106     APInt::tcSet(parts, 0, dstPartsCount);
2107     /* For exponent -1 the integer bit represents .5, look at that.
2108        For smaller exponents leftmost truncated bit is 0. */
2109     truncatedBits = semantics->precision -1U - exponent;
2110   } else {
2111     /* We want the most significant (exponent + 1) bits; the rest are
2112        truncated.  */
2113     unsigned int bits = exponent + 1U;
2114 
2115     /* Hopelessly large in magnitude?  */
2116     if (bits > width)
2117       return opInvalidOp;
2118 
2119     if (bits < semantics->precision) {
2120       /* We truncate (semantics->precision - bits) bits.  */
2121       truncatedBits = semantics->precision - bits;
2122       APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2123     } else {
2124       /* We want at least as many bits as are available.  */
2125       APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2126       APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2127       truncatedBits = 0;
2128     }
2129   }
2130 
2131   /* Step 2: work out any lost fraction, and increment the absolute
2132      value if we would round away from zero.  */
2133   if (truncatedBits) {
2134     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2135                                                   truncatedBits);
2136     if (lost_fraction != lfExactlyZero &&
2137         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2138       if (APInt::tcIncrement(parts, dstPartsCount))
2139         return opInvalidOp;     /* Overflow.  */
2140     }
2141   } else {
2142     lost_fraction = lfExactlyZero;
2143   }
2144 
2145   /* Step 3: check if we fit in the destination.  */
2146   unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2147 
2148   if (sign) {
2149     if (!isSigned) {
2150       /* Negative numbers cannot be represented as unsigned.  */
2151       if (omsb != 0)
2152         return opInvalidOp;
2153     } else {
2154       /* It takes omsb bits to represent the unsigned integer value.
2155          We lose a bit for the sign, but care is needed as the
2156          maximally negative integer is a special case.  */
2157       if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2158         return opInvalidOp;
2159 
2160       /* This case can happen because of rounding.  */
2161       if (omsb > width)
2162         return opInvalidOp;
2163     }
2164 
2165     APInt::tcNegate (parts, dstPartsCount);
2166   } else {
2167     if (omsb >= width + !isSigned)
2168       return opInvalidOp;
2169   }
2170 
2171   if (lost_fraction == lfExactlyZero) {
2172     *isExact = true;
2173     return opOK;
2174   } else
2175     return opInexact;
2176 }
2177 
2178 /* Same as convertToSignExtendedInteger, except we provide
2179    deterministic values in case of an invalid operation exception,
2180    namely zero for NaNs and the minimal or maximal value respectively
2181    for underflow or overflow.
2182    The *isExact output tells whether the result is exact, in the sense
2183    that converting it back to the original floating point type produces
2184    the original value.  This is almost equivalent to result==opOK,
2185    except for negative zeroes.
2186 */
2187 APFloat::opStatus
convertToInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2188 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2189                           bool isSigned,
2190                           roundingMode rounding_mode, bool *isExact) const
2191 {
2192   opStatus fs;
2193 
2194   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2195                                     isExact);
2196 
2197   if (fs == opInvalidOp) {
2198     unsigned int bits, dstPartsCount;
2199 
2200     dstPartsCount = partCountForBits(width);
2201 
2202     if (category == fcNaN)
2203       bits = 0;
2204     else if (sign)
2205       bits = isSigned;
2206     else
2207       bits = width - isSigned;
2208 
2209     APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2210     if (sign && isSigned)
2211       APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2212   }
2213 
2214   return fs;
2215 }
2216 
2217 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2218    an APSInt, whose initial bit-width and signed-ness are used to determine the
2219    precision of the conversion.
2220  */
2221 APFloat::opStatus
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const2222 APFloat::convertToInteger(APSInt &result,
2223                           roundingMode rounding_mode, bool *isExact) const
2224 {
2225   unsigned bitWidth = result.getBitWidth();
2226   SmallVector<uint64_t, 4> parts(result.getNumWords());
2227   opStatus status = convertToInteger(
2228     parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2229   // Keeps the original signed-ness.
2230   result = APInt(bitWidth, parts);
2231   return status;
2232 }
2233 
2234 /* Convert an unsigned integer SRC to a floating point number,
2235    rounding according to ROUNDING_MODE.  The sign of the floating
2236    point number is not modified.  */
2237 APFloat::opStatus
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2238 APFloat::convertFromUnsignedParts(const integerPart *src,
2239                                   unsigned int srcCount,
2240                                   roundingMode rounding_mode)
2241 {
2242   unsigned int omsb, precision, dstCount;
2243   integerPart *dst;
2244   lostFraction lost_fraction;
2245 
2246   category = fcNormal;
2247   omsb = APInt::tcMSB(src, srcCount) + 1;
2248   dst = significandParts();
2249   dstCount = partCount();
2250   precision = semantics->precision;
2251 
2252   /* We want the most significant PRECISION bits of SRC.  There may not
2253      be that many; extract what we can.  */
2254   if (precision <= omsb) {
2255     exponent = omsb - 1;
2256     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2257                                                   omsb - precision);
2258     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2259   } else {
2260     exponent = precision - 1;
2261     lost_fraction = lfExactlyZero;
2262     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2263   }
2264 
2265   return normalize(rounding_mode, lost_fraction);
2266 }
2267 
2268 APFloat::opStatus
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2269 APFloat::convertFromAPInt(const APInt &Val,
2270                           bool isSigned,
2271                           roundingMode rounding_mode)
2272 {
2273   unsigned int partCount = Val.getNumWords();
2274   APInt api = Val;
2275 
2276   sign = false;
2277   if (isSigned && api.isNegative()) {
2278     sign = true;
2279     api = -api;
2280   }
2281 
2282   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2283 }
2284 
2285 /* Convert a two's complement integer SRC to a floating point number,
2286    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2287    integer is signed, in which case it must be sign-extended.  */
2288 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2289 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2290                                         unsigned int srcCount,
2291                                         bool isSigned,
2292                                         roundingMode rounding_mode)
2293 {
2294   opStatus status;
2295 
2296   if (isSigned &&
2297       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2298     integerPart *copy;
2299 
2300     /* If we're signed and negative negate a copy.  */
2301     sign = true;
2302     copy = new integerPart[srcCount];
2303     APInt::tcAssign(copy, src, srcCount);
2304     APInt::tcNegate(copy, srcCount);
2305     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2306     delete [] copy;
2307   } else {
2308     sign = false;
2309     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2310   }
2311 
2312   return status;
2313 }
2314 
2315 /* FIXME: should this just take a const APInt reference?  */
2316 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2317 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2318                                         unsigned int width, bool isSigned,
2319                                         roundingMode rounding_mode)
2320 {
2321   unsigned int partCount = partCountForBits(width);
2322   APInt api = APInt(width, makeArrayRef(parts, partCount));
2323 
2324   sign = false;
2325   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2326     sign = true;
2327     api = -api;
2328   }
2329 
2330   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2331 }
2332 
2333 APFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2334 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2335 {
2336   lostFraction lost_fraction = lfExactlyZero;
2337 
2338   category = fcNormal;
2339   zeroSignificand();
2340   exponent = 0;
2341 
2342   integerPart *significand = significandParts();
2343   unsigned partsCount = partCount();
2344   unsigned bitPos = partsCount * integerPartWidth;
2345   bool computedTrailingFraction = false;
2346 
2347   // Skip leading zeroes and any (hexa)decimal point.
2348   StringRef::iterator begin = s.begin();
2349   StringRef::iterator end = s.end();
2350   StringRef::iterator dot;
2351   StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2352   StringRef::iterator firstSignificantDigit = p;
2353 
2354   while (p != end) {
2355     integerPart hex_value;
2356 
2357     if (*p == '.') {
2358       assert(dot == end && "String contains multiple dots");
2359       dot = p++;
2360       continue;
2361     }
2362 
2363     hex_value = hexDigitValue(*p);
2364     if (hex_value == -1U)
2365       break;
2366 
2367     p++;
2368 
2369     // Store the number while we have space.
2370     if (bitPos) {
2371       bitPos -= 4;
2372       hex_value <<= bitPos % integerPartWidth;
2373       significand[bitPos / integerPartWidth] |= hex_value;
2374     } else if (!computedTrailingFraction) {
2375       lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2376       computedTrailingFraction = true;
2377     }
2378   }
2379 
2380   /* Hex floats require an exponent but not a hexadecimal point.  */
2381   assert(p != end && "Hex strings require an exponent");
2382   assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2383   assert(p != begin && "Significand has no digits");
2384   assert((dot == end || p - begin != 1) && "Significand has no digits");
2385 
2386   /* Ignore the exponent if we are zero.  */
2387   if (p != firstSignificantDigit) {
2388     int expAdjustment;
2389 
2390     /* Implicit hexadecimal point?  */
2391     if (dot == end)
2392       dot = p;
2393 
2394     /* Calculate the exponent adjustment implicit in the number of
2395        significant digits.  */
2396     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2397     if (expAdjustment < 0)
2398       expAdjustment++;
2399     expAdjustment = expAdjustment * 4 - 1;
2400 
2401     /* Adjust for writing the significand starting at the most
2402        significant nibble.  */
2403     expAdjustment += semantics->precision;
2404     expAdjustment -= partsCount * integerPartWidth;
2405 
2406     /* Adjust for the given exponent.  */
2407     exponent = totalExponent(p + 1, end, expAdjustment);
2408   }
2409 
2410   return normalize(rounding_mode, lost_fraction);
2411 }
2412 
2413 APFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2414 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2415                                       unsigned sigPartCount, int exp,
2416                                       roundingMode rounding_mode)
2417 {
2418   unsigned int parts, pow5PartCount;
2419   fltSemantics calcSemantics = { 32767, -32767, 0 };
2420   integerPart pow5Parts[maxPowerOfFiveParts];
2421   bool isNearest;
2422 
2423   isNearest = (rounding_mode == rmNearestTiesToEven ||
2424                rounding_mode == rmNearestTiesToAway);
2425 
2426   parts = partCountForBits(semantics->precision + 11);
2427 
2428   /* Calculate pow(5, abs(exp)).  */
2429   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2430 
2431   for (;; parts *= 2) {
2432     opStatus sigStatus, powStatus;
2433     unsigned int excessPrecision, truncatedBits;
2434 
2435     calcSemantics.precision = parts * integerPartWidth - 1;
2436     excessPrecision = calcSemantics.precision - semantics->precision;
2437     truncatedBits = excessPrecision;
2438 
2439     APFloat decSig = APFloat::getZero(calcSemantics, sign);
2440     APFloat pow5(calcSemantics);
2441 
2442     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2443                                                 rmNearestTiesToEven);
2444     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2445                                               rmNearestTiesToEven);
2446     /* Add exp, as 10^n = 5^n * 2^n.  */
2447     decSig.exponent += exp;
2448 
2449     lostFraction calcLostFraction;
2450     integerPart HUerr, HUdistance;
2451     unsigned int powHUerr;
2452 
2453     if (exp >= 0) {
2454       /* multiplySignificand leaves the precision-th bit set to 1.  */
2455       calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2456       powHUerr = powStatus != opOK;
2457     } else {
2458       calcLostFraction = decSig.divideSignificand(pow5);
2459       /* Denormal numbers have less precision.  */
2460       if (decSig.exponent < semantics->minExponent) {
2461         excessPrecision += (semantics->minExponent - decSig.exponent);
2462         truncatedBits = excessPrecision;
2463         if (excessPrecision > calcSemantics.precision)
2464           excessPrecision = calcSemantics.precision;
2465       }
2466       /* Extra half-ulp lost in reciprocal of exponent.  */
2467       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2468     }
2469 
2470     /* Both multiplySignificand and divideSignificand return the
2471        result with the integer bit set.  */
2472     assert(APInt::tcExtractBit
2473            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2474 
2475     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2476                        powHUerr);
2477     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2478                                       excessPrecision, isNearest);
2479 
2480     /* Are we guaranteed to round correctly if we truncate?  */
2481     if (HUdistance >= HUerr) {
2482       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2483                        calcSemantics.precision - excessPrecision,
2484                        excessPrecision);
2485       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2486          above we must adjust our exponent to compensate for the
2487          implicit right shift.  */
2488       exponent = (decSig.exponent + semantics->precision
2489                   - (calcSemantics.precision - excessPrecision));
2490       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2491                                                        decSig.partCount(),
2492                                                        truncatedBits);
2493       return normalize(rounding_mode, calcLostFraction);
2494     }
2495   }
2496 }
2497 
2498 APFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2499 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2500 {
2501   decimalInfo D;
2502   opStatus fs;
2503 
2504   /* Scan the text.  */
2505   StringRef::iterator p = str.begin();
2506   interpretDecimal(p, str.end(), &D);
2507 
2508   /* Handle the quick cases.  First the case of no significant digits,
2509      i.e. zero, and then exponents that are obviously too large or too
2510      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2511      definitely overflows if
2512 
2513            (exp - 1) * L >= maxExponent
2514 
2515      and definitely underflows to zero where
2516 
2517            (exp + 1) * L <= minExponent - precision
2518 
2519      With integer arithmetic the tightest bounds for L are
2520 
2521            93/28 < L < 196/59            [ numerator <= 256 ]
2522            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2523   */
2524 
2525   // Test if we have a zero number allowing for strings with no null terminators
2526   // and zero decimals with non-zero exponents.
2527   //
2528   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2529   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2530   // be at most one dot. On the other hand, if we have a zero with a non-zero
2531   // exponent, then we know that D.firstSigDigit will be non-numeric.
2532   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2533     category = fcZero;
2534     fs = opOK;
2535 
2536   /* Check whether the normalized exponent is high enough to overflow
2537      max during the log-rebasing in the max-exponent check below. */
2538   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2539     fs = handleOverflow(rounding_mode);
2540 
2541   /* If it wasn't, then it also wasn't high enough to overflow max
2542      during the log-rebasing in the min-exponent check.  Check that it
2543      won't overflow min in either check, then perform the min-exponent
2544      check. */
2545   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2546              (D.normalizedExponent + 1) * 28738 <=
2547                8651 * (semantics->minExponent - (int) semantics->precision)) {
2548     /* Underflow to zero and round.  */
2549     category = fcNormal;
2550     zeroSignificand();
2551     fs = normalize(rounding_mode, lfLessThanHalf);
2552 
2553   /* We can finally safely perform the max-exponent check. */
2554   } else if ((D.normalizedExponent - 1) * 42039
2555              >= 12655 * semantics->maxExponent) {
2556     /* Overflow and round.  */
2557     fs = handleOverflow(rounding_mode);
2558   } else {
2559     integerPart *decSignificand;
2560     unsigned int partCount;
2561 
2562     /* A tight upper bound on number of bits required to hold an
2563        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2564        to hold the full significand, and an extra part required by
2565        tcMultiplyPart.  */
2566     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2567     partCount = partCountForBits(1 + 196 * partCount / 59);
2568     decSignificand = new integerPart[partCount + 1];
2569     partCount = 0;
2570 
2571     /* Convert to binary efficiently - we do almost all multiplication
2572        in an integerPart.  When this would overflow do we do a single
2573        bignum multiplication, and then revert again to multiplication
2574        in an integerPart.  */
2575     do {
2576       integerPart decValue, val, multiplier;
2577 
2578       val = 0;
2579       multiplier = 1;
2580 
2581       do {
2582         if (*p == '.') {
2583           p++;
2584           if (p == str.end()) {
2585             break;
2586           }
2587         }
2588         decValue = decDigitValue(*p++);
2589         assert(decValue < 10U && "Invalid character in significand");
2590         multiplier *= 10;
2591         val = val * 10 + decValue;
2592         /* The maximum number that can be multiplied by ten with any
2593            digit added without overflowing an integerPart.  */
2594       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2595 
2596       /* Multiply out the current part.  */
2597       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2598                             partCount, partCount + 1, false);
2599 
2600       /* If we used another part (likely but not guaranteed), increase
2601          the count.  */
2602       if (decSignificand[partCount])
2603         partCount++;
2604     } while (p <= D.lastSigDigit);
2605 
2606     category = fcNormal;
2607     fs = roundSignificandWithExponent(decSignificand, partCount,
2608                                       D.exponent, rounding_mode);
2609 
2610     delete [] decSignificand;
2611   }
2612 
2613   return fs;
2614 }
2615 
2616 bool
convertFromStringSpecials(StringRef str)2617 APFloat::convertFromStringSpecials(StringRef str) {
2618   if (str.equals("inf") || str.equals("INFINITY")) {
2619     makeInf(false);
2620     return true;
2621   }
2622 
2623   if (str.equals("-inf") || str.equals("-INFINITY")) {
2624     makeInf(true);
2625     return true;
2626   }
2627 
2628   if (str.equals("nan") || str.equals("NaN")) {
2629     makeNaN(false, false);
2630     return true;
2631   }
2632 
2633   if (str.equals("-nan") || str.equals("-NaN")) {
2634     makeNaN(false, true);
2635     return true;
2636   }
2637 
2638   return false;
2639 }
2640 
2641 APFloat::opStatus
convertFromString(StringRef str,roundingMode rounding_mode)2642 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2643 {
2644   assert(!str.empty() && "Invalid string length");
2645 
2646   // Handle special cases.
2647   if (convertFromStringSpecials(str))
2648     return opOK;
2649 
2650   /* Handle a leading minus sign.  */
2651   StringRef::iterator p = str.begin();
2652   size_t slen = str.size();
2653   sign = *p == '-' ? 1 : 0;
2654   if (*p == '-' || *p == '+') {
2655     p++;
2656     slen--;
2657     assert(slen && "String has no digits");
2658   }
2659 
2660   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2661     assert(slen - 2 && "Invalid string");
2662     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2663                                         rounding_mode);
2664   }
2665 
2666   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2667 }
2668 
2669 /* Write out a hexadecimal representation of the floating point value
2670    to DST, which must be of sufficient size, in the C99 form
2671    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2672    excluding the terminating NUL.
2673 
2674    If UPPERCASE, the output is in upper case, otherwise in lower case.
2675 
2676    HEXDIGITS digits appear altogether, rounding the value if
2677    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2678    number precisely is used instead.  If nothing would appear after
2679    the decimal point it is suppressed.
2680 
2681    The decimal exponent is always printed and has at least one digit.
2682    Zero values display an exponent of zero.  Infinities and NaNs
2683    appear as "infinity" or "nan" respectively.
2684 
2685    The above rules are as specified by C99.  There is ambiguity about
2686    what the leading hexadecimal digit should be.  This implementation
2687    uses whatever is necessary so that the exponent is displayed as
2688    stored.  This implies the exponent will fall within the IEEE format
2689    range, and the leading hexadecimal digit will be 0 (for denormals),
2690    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2691    any other digits zero).
2692 */
2693 unsigned int
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2694 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2695                             bool upperCase, roundingMode rounding_mode) const
2696 {
2697   char *p;
2698 
2699   p = dst;
2700   if (sign)
2701     *dst++ = '-';
2702 
2703   switch (category) {
2704   case fcInfinity:
2705     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2706     dst += sizeof infinityL - 1;
2707     break;
2708 
2709   case fcNaN:
2710     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2711     dst += sizeof NaNU - 1;
2712     break;
2713 
2714   case fcZero:
2715     *dst++ = '0';
2716     *dst++ = upperCase ? 'X': 'x';
2717     *dst++ = '0';
2718     if (hexDigits > 1) {
2719       *dst++ = '.';
2720       memset (dst, '0', hexDigits - 1);
2721       dst += hexDigits - 1;
2722     }
2723     *dst++ = upperCase ? 'P': 'p';
2724     *dst++ = '0';
2725     break;
2726 
2727   case fcNormal:
2728     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2729     break;
2730   }
2731 
2732   *dst = 0;
2733 
2734   return static_cast<unsigned int>(dst - p);
2735 }
2736 
2737 /* Does the hard work of outputting the correctly rounded hexadecimal
2738    form of a normal floating point number with the specified number of
2739    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
2740    digits necessary to print the value precisely is output.  */
2741 char *
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2742 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2743                                   bool upperCase,
2744                                   roundingMode rounding_mode) const
2745 {
2746   unsigned int count, valueBits, shift, partsCount, outputDigits;
2747   const char *hexDigitChars;
2748   const integerPart *significand;
2749   char *p;
2750   bool roundUp;
2751 
2752   *dst++ = '0';
2753   *dst++ = upperCase ? 'X': 'x';
2754 
2755   roundUp = false;
2756   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2757 
2758   significand = significandParts();
2759   partsCount = partCount();
2760 
2761   /* +3 because the first digit only uses the single integer bit, so
2762      we have 3 virtual zero most-significant-bits.  */
2763   valueBits = semantics->precision + 3;
2764   shift = integerPartWidth - valueBits % integerPartWidth;
2765 
2766   /* The natural number of digits required ignoring trailing
2767      insignificant zeroes.  */
2768   outputDigits = (valueBits - significandLSB () + 3) / 4;
2769 
2770   /* hexDigits of zero means use the required number for the
2771      precision.  Otherwise, see if we are truncating.  If we are,
2772      find out if we need to round away from zero.  */
2773   if (hexDigits) {
2774     if (hexDigits < outputDigits) {
2775       /* We are dropping non-zero bits, so need to check how to round.
2776          "bits" is the number of dropped bits.  */
2777       unsigned int bits;
2778       lostFraction fraction;
2779 
2780       bits = valueBits - hexDigits * 4;
2781       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2782       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2783     }
2784     outputDigits = hexDigits;
2785   }
2786 
2787   /* Write the digits consecutively, and start writing in the location
2788      of the hexadecimal point.  We move the most significant digit
2789      left and add the hexadecimal point later.  */
2790   p = ++dst;
2791 
2792   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2793 
2794   while (outputDigits && count) {
2795     integerPart part;
2796 
2797     /* Put the most significant integerPartWidth bits in "part".  */
2798     if (--count == partsCount)
2799       part = 0;  /* An imaginary higher zero part.  */
2800     else
2801       part = significand[count] << shift;
2802 
2803     if (count && shift)
2804       part |= significand[count - 1] >> (integerPartWidth - shift);
2805 
2806     /* Convert as much of "part" to hexdigits as we can.  */
2807     unsigned int curDigits = integerPartWidth / 4;
2808 
2809     if (curDigits > outputDigits)
2810       curDigits = outputDigits;
2811     dst += partAsHex (dst, part, curDigits, hexDigitChars);
2812     outputDigits -= curDigits;
2813   }
2814 
2815   if (roundUp) {
2816     char *q = dst;
2817 
2818     /* Note that hexDigitChars has a trailing '0'.  */
2819     do {
2820       q--;
2821       *q = hexDigitChars[hexDigitValue (*q) + 1];
2822     } while (*q == '0');
2823     assert(q >= p);
2824   } else {
2825     /* Add trailing zeroes.  */
2826     memset (dst, '0', outputDigits);
2827     dst += outputDigits;
2828   }
2829 
2830   /* Move the most significant digit to before the point, and if there
2831      is something after the decimal point add it.  This must come
2832      after rounding above.  */
2833   p[-1] = p[0];
2834   if (dst -1 == p)
2835     dst--;
2836   else
2837     p[0] = '.';
2838 
2839   /* Finally output the exponent.  */
2840   *dst++ = upperCase ? 'P': 'p';
2841 
2842   return writeSignedDecimal (dst, exponent);
2843 }
2844 
hash_value(const APFloat & Arg)2845 hash_code llvm::hash_value(const APFloat &Arg) {
2846   if (!Arg.isFiniteNonZero())
2847     return hash_combine((uint8_t)Arg.category,
2848                         // NaN has no sign, fix it at zero.
2849                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2850                         Arg.semantics->precision);
2851 
2852   // Normal floats need their exponent and significand hashed.
2853   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2854                       Arg.semantics->precision, Arg.exponent,
2855                       hash_combine_range(
2856                         Arg.significandParts(),
2857                         Arg.significandParts() + Arg.partCount()));
2858 }
2859 
2860 // Conversion from APFloat to/from host float/double.  It may eventually be
2861 // possible to eliminate these and have everybody deal with APFloats, but that
2862 // will take a while.  This approach will not easily extend to long double.
2863 // Current implementation requires integerPartWidth==64, which is correct at
2864 // the moment but could be made more general.
2865 
2866 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2867 // the actual IEEE respresentations.  We compensate for that here.
2868 
2869 APInt
convertF80LongDoubleAPFloatToAPInt() const2870 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2871 {
2872   assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2873   assert(partCount()==2);
2874 
2875   uint64_t myexponent, mysignificand;
2876 
2877   if (isFiniteNonZero()) {
2878     myexponent = exponent+16383; //bias
2879     mysignificand = significandParts()[0];
2880     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2881       myexponent = 0;   // denormal
2882   } else if (category==fcZero) {
2883     myexponent = 0;
2884     mysignificand = 0;
2885   } else if (category==fcInfinity) {
2886     myexponent = 0x7fff;
2887     mysignificand = 0x8000000000000000ULL;
2888   } else {
2889     assert(category == fcNaN && "Unknown category");
2890     myexponent = 0x7fff;
2891     mysignificand = significandParts()[0];
2892   }
2893 
2894   uint64_t words[2];
2895   words[0] = mysignificand;
2896   words[1] =  ((uint64_t)(sign & 1) << 15) |
2897               (myexponent & 0x7fffLL);
2898   return APInt(80, words);
2899 }
2900 
2901 APInt
convertPPCDoubleDoubleAPFloatToAPInt() const2902 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2903 {
2904   assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2905   assert(partCount()==2);
2906 
2907   uint64_t words[2];
2908   opStatus fs;
2909   bool losesInfo;
2910 
2911   // Convert number to double.  To avoid spurious underflows, we re-
2912   // normalize against the "double" minExponent first, and only *then*
2913   // truncate the mantissa.  The result of that second conversion
2914   // may be inexact, but should never underflow.
2915   // Declare fltSemantics before APFloat that uses it (and
2916   // saves pointer to it) to ensure correct destruction order.
2917   fltSemantics extendedSemantics = *semantics;
2918   extendedSemantics.minExponent = IEEEdouble.minExponent;
2919   APFloat extended(*this);
2920   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2921   assert(fs == opOK && !losesInfo);
2922   (void)fs;
2923 
2924   APFloat u(extended);
2925   fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2926   assert(fs == opOK || fs == opInexact);
2927   (void)fs;
2928   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2929 
2930   // If conversion was exact or resulted in a special case, we're done;
2931   // just set the second double to zero.  Otherwise, re-convert back to
2932   // the extended format and compute the difference.  This now should
2933   // convert exactly to double.
2934   if (u.isFiniteNonZero() && losesInfo) {
2935     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2936     assert(fs == opOK && !losesInfo);
2937     (void)fs;
2938 
2939     APFloat v(extended);
2940     v.subtract(u, rmNearestTiesToEven);
2941     fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2942     assert(fs == opOK && !losesInfo);
2943     (void)fs;
2944     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2945   } else {
2946     words[1] = 0;
2947   }
2948 
2949   return APInt(128, words);
2950 }
2951 
2952 APInt
convertQuadrupleAPFloatToAPInt() const2953 APFloat::convertQuadrupleAPFloatToAPInt() const
2954 {
2955   assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2956   assert(partCount()==2);
2957 
2958   uint64_t myexponent, mysignificand, mysignificand2;
2959 
2960   if (isFiniteNonZero()) {
2961     myexponent = exponent+16383; //bias
2962     mysignificand = significandParts()[0];
2963     mysignificand2 = significandParts()[1];
2964     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2965       myexponent = 0;   // denormal
2966   } else if (category==fcZero) {
2967     myexponent = 0;
2968     mysignificand = mysignificand2 = 0;
2969   } else if (category==fcInfinity) {
2970     myexponent = 0x7fff;
2971     mysignificand = mysignificand2 = 0;
2972   } else {
2973     assert(category == fcNaN && "Unknown category!");
2974     myexponent = 0x7fff;
2975     mysignificand = significandParts()[0];
2976     mysignificand2 = significandParts()[1];
2977   }
2978 
2979   uint64_t words[2];
2980   words[0] = mysignificand;
2981   words[1] = ((uint64_t)(sign & 1) << 63) |
2982              ((myexponent & 0x7fff) << 48) |
2983              (mysignificand2 & 0xffffffffffffLL);
2984 
2985   return APInt(128, words);
2986 }
2987 
2988 APInt
convertDoubleAPFloatToAPInt() const2989 APFloat::convertDoubleAPFloatToAPInt() const
2990 {
2991   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2992   assert(partCount()==1);
2993 
2994   uint64_t myexponent, mysignificand;
2995 
2996   if (isFiniteNonZero()) {
2997     myexponent = exponent+1023; //bias
2998     mysignificand = *significandParts();
2999     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3000       myexponent = 0;   // denormal
3001   } else if (category==fcZero) {
3002     myexponent = 0;
3003     mysignificand = 0;
3004   } else if (category==fcInfinity) {
3005     myexponent = 0x7ff;
3006     mysignificand = 0;
3007   } else {
3008     assert(category == fcNaN && "Unknown category!");
3009     myexponent = 0x7ff;
3010     mysignificand = *significandParts();
3011   }
3012 
3013   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3014                      ((myexponent & 0x7ff) <<  52) |
3015                      (mysignificand & 0xfffffffffffffLL))));
3016 }
3017 
3018 APInt
convertFloatAPFloatToAPInt() const3019 APFloat::convertFloatAPFloatToAPInt() const
3020 {
3021   assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
3022   assert(partCount()==1);
3023 
3024   uint32_t myexponent, mysignificand;
3025 
3026   if (isFiniteNonZero()) {
3027     myexponent = exponent+127; //bias
3028     mysignificand = (uint32_t)*significandParts();
3029     if (myexponent == 1 && !(mysignificand & 0x800000))
3030       myexponent = 0;   // denormal
3031   } else if (category==fcZero) {
3032     myexponent = 0;
3033     mysignificand = 0;
3034   } else if (category==fcInfinity) {
3035     myexponent = 0xff;
3036     mysignificand = 0;
3037   } else {
3038     assert(category == fcNaN && "Unknown category!");
3039     myexponent = 0xff;
3040     mysignificand = (uint32_t)*significandParts();
3041   }
3042 
3043   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3044                     (mysignificand & 0x7fffff)));
3045 }
3046 
3047 APInt
convertHalfAPFloatToAPInt() const3048 APFloat::convertHalfAPFloatToAPInt() const
3049 {
3050   assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3051   assert(partCount()==1);
3052 
3053   uint32_t myexponent, mysignificand;
3054 
3055   if (isFiniteNonZero()) {
3056     myexponent = exponent+15; //bias
3057     mysignificand = (uint32_t)*significandParts();
3058     if (myexponent == 1 && !(mysignificand & 0x400))
3059       myexponent = 0;   // denormal
3060   } else if (category==fcZero) {
3061     myexponent = 0;
3062     mysignificand = 0;
3063   } else if (category==fcInfinity) {
3064     myexponent = 0x1f;
3065     mysignificand = 0;
3066   } else {
3067     assert(category == fcNaN && "Unknown category!");
3068     myexponent = 0x1f;
3069     mysignificand = (uint32_t)*significandParts();
3070   }
3071 
3072   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3073                     (mysignificand & 0x3ff)));
3074 }
3075 
3076 // This function creates an APInt that is just a bit map of the floating
3077 // point constant as it would appear in memory.  It is not a conversion,
3078 // and treating the result as a normal integer is unlikely to be useful.
3079 
3080 APInt
bitcastToAPInt() const3081 APFloat::bitcastToAPInt() const
3082 {
3083   if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3084     return convertHalfAPFloatToAPInt();
3085 
3086   if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3087     return convertFloatAPFloatToAPInt();
3088 
3089   if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3090     return convertDoubleAPFloatToAPInt();
3091 
3092   if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3093     return convertQuadrupleAPFloatToAPInt();
3094 
3095   if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3096     return convertPPCDoubleDoubleAPFloatToAPInt();
3097 
3098   assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3099          "unknown format!");
3100   return convertF80LongDoubleAPFloatToAPInt();
3101 }
3102 
3103 float
convertToFloat() const3104 APFloat::convertToFloat() const
3105 {
3106   assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3107          "Float semantics are not IEEEsingle");
3108   APInt api = bitcastToAPInt();
3109   return api.bitsToFloat();
3110 }
3111 
3112 double
convertToDouble() const3113 APFloat::convertToDouble() const
3114 {
3115   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3116          "Float semantics are not IEEEdouble");
3117   APInt api = bitcastToAPInt();
3118   return api.bitsToDouble();
3119 }
3120 
3121 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3122 /// does not support these bit patterns:
3123 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3124 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3125 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3126 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3127 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3128 void
initFromF80LongDoubleAPInt(const APInt & api)3129 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3130 {
3131   assert(api.getBitWidth()==80);
3132   uint64_t i1 = api.getRawData()[0];
3133   uint64_t i2 = api.getRawData()[1];
3134   uint64_t myexponent = (i2 & 0x7fff);
3135   uint64_t mysignificand = i1;
3136 
3137   initialize(&APFloat::x87DoubleExtended);
3138   assert(partCount()==2);
3139 
3140   sign = static_cast<unsigned int>(i2>>15);
3141   if (myexponent==0 && mysignificand==0) {
3142     // exponent, significand meaningless
3143     category = fcZero;
3144   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3145     // exponent, significand meaningless
3146     category = fcInfinity;
3147   } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3148     // exponent meaningless
3149     category = fcNaN;
3150     significandParts()[0] = mysignificand;
3151     significandParts()[1] = 0;
3152   } else {
3153     category = fcNormal;
3154     exponent = myexponent - 16383;
3155     significandParts()[0] = mysignificand;
3156     significandParts()[1] = 0;
3157     if (myexponent==0)          // denormal
3158       exponent = -16382;
3159   }
3160 }
3161 
3162 void
initFromPPCDoubleDoubleAPInt(const APInt & api)3163 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3164 {
3165   assert(api.getBitWidth()==128);
3166   uint64_t i1 = api.getRawData()[0];
3167   uint64_t i2 = api.getRawData()[1];
3168   opStatus fs;
3169   bool losesInfo;
3170 
3171   // Get the first double and convert to our format.
3172   initFromDoubleAPInt(APInt(64, i1));
3173   fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3174   assert(fs == opOK && !losesInfo);
3175   (void)fs;
3176 
3177   // Unless we have a special case, add in second double.
3178   if (isFiniteNonZero()) {
3179     APFloat v(IEEEdouble, APInt(64, i2));
3180     fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3181     assert(fs == opOK && !losesInfo);
3182     (void)fs;
3183 
3184     add(v, rmNearestTiesToEven);
3185   }
3186 }
3187 
3188 void
initFromQuadrupleAPInt(const APInt & api)3189 APFloat::initFromQuadrupleAPInt(const APInt &api)
3190 {
3191   assert(api.getBitWidth()==128);
3192   uint64_t i1 = api.getRawData()[0];
3193   uint64_t i2 = api.getRawData()[1];
3194   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3195   uint64_t mysignificand  = i1;
3196   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3197 
3198   initialize(&APFloat::IEEEquad);
3199   assert(partCount()==2);
3200 
3201   sign = static_cast<unsigned int>(i2>>63);
3202   if (myexponent==0 &&
3203       (mysignificand==0 && mysignificand2==0)) {
3204     // exponent, significand meaningless
3205     category = fcZero;
3206   } else if (myexponent==0x7fff &&
3207              (mysignificand==0 && mysignificand2==0)) {
3208     // exponent, significand meaningless
3209     category = fcInfinity;
3210   } else if (myexponent==0x7fff &&
3211              (mysignificand!=0 || mysignificand2 !=0)) {
3212     // exponent meaningless
3213     category = fcNaN;
3214     significandParts()[0] = mysignificand;
3215     significandParts()[1] = mysignificand2;
3216   } else {
3217     category = fcNormal;
3218     exponent = myexponent - 16383;
3219     significandParts()[0] = mysignificand;
3220     significandParts()[1] = mysignificand2;
3221     if (myexponent==0)          // denormal
3222       exponent = -16382;
3223     else
3224       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3225   }
3226 }
3227 
3228 void
initFromDoubleAPInt(const APInt & api)3229 APFloat::initFromDoubleAPInt(const APInt &api)
3230 {
3231   assert(api.getBitWidth()==64);
3232   uint64_t i = *api.getRawData();
3233   uint64_t myexponent = (i >> 52) & 0x7ff;
3234   uint64_t mysignificand = i & 0xfffffffffffffLL;
3235 
3236   initialize(&APFloat::IEEEdouble);
3237   assert(partCount()==1);
3238 
3239   sign = static_cast<unsigned int>(i>>63);
3240   if (myexponent==0 && mysignificand==0) {
3241     // exponent, significand meaningless
3242     category = fcZero;
3243   } else if (myexponent==0x7ff && mysignificand==0) {
3244     // exponent, significand meaningless
3245     category = fcInfinity;
3246   } else if (myexponent==0x7ff && mysignificand!=0) {
3247     // exponent meaningless
3248     category = fcNaN;
3249     *significandParts() = mysignificand;
3250   } else {
3251     category = fcNormal;
3252     exponent = myexponent - 1023;
3253     *significandParts() = mysignificand;
3254     if (myexponent==0)          // denormal
3255       exponent = -1022;
3256     else
3257       *significandParts() |= 0x10000000000000LL;  // integer bit
3258   }
3259 }
3260 
3261 void
initFromFloatAPInt(const APInt & api)3262 APFloat::initFromFloatAPInt(const APInt & api)
3263 {
3264   assert(api.getBitWidth()==32);
3265   uint32_t i = (uint32_t)*api.getRawData();
3266   uint32_t myexponent = (i >> 23) & 0xff;
3267   uint32_t mysignificand = i & 0x7fffff;
3268 
3269   initialize(&APFloat::IEEEsingle);
3270   assert(partCount()==1);
3271 
3272   sign = i >> 31;
3273   if (myexponent==0 && mysignificand==0) {
3274     // exponent, significand meaningless
3275     category = fcZero;
3276   } else if (myexponent==0xff && mysignificand==0) {
3277     // exponent, significand meaningless
3278     category = fcInfinity;
3279   } else if (myexponent==0xff && mysignificand!=0) {
3280     // sign, exponent, significand meaningless
3281     category = fcNaN;
3282     *significandParts() = mysignificand;
3283   } else {
3284     category = fcNormal;
3285     exponent = myexponent - 127;  //bias
3286     *significandParts() = mysignificand;
3287     if (myexponent==0)    // denormal
3288       exponent = -126;
3289     else
3290       *significandParts() |= 0x800000; // integer bit
3291   }
3292 }
3293 
3294 void
initFromHalfAPInt(const APInt & api)3295 APFloat::initFromHalfAPInt(const APInt & api)
3296 {
3297   assert(api.getBitWidth()==16);
3298   uint32_t i = (uint32_t)*api.getRawData();
3299   uint32_t myexponent = (i >> 10) & 0x1f;
3300   uint32_t mysignificand = i & 0x3ff;
3301 
3302   initialize(&APFloat::IEEEhalf);
3303   assert(partCount()==1);
3304 
3305   sign = i >> 15;
3306   if (myexponent==0 && mysignificand==0) {
3307     // exponent, significand meaningless
3308     category = fcZero;
3309   } else if (myexponent==0x1f && mysignificand==0) {
3310     // exponent, significand meaningless
3311     category = fcInfinity;
3312   } else if (myexponent==0x1f && mysignificand!=0) {
3313     // sign, exponent, significand meaningless
3314     category = fcNaN;
3315     *significandParts() = mysignificand;
3316   } else {
3317     category = fcNormal;
3318     exponent = myexponent - 15;  //bias
3319     *significandParts() = mysignificand;
3320     if (myexponent==0)    // denormal
3321       exponent = -14;
3322     else
3323       *significandParts() |= 0x400; // integer bit
3324   }
3325 }
3326 
3327 /// Treat api as containing the bits of a floating point number.  Currently
3328 /// we infer the floating point type from the size of the APInt.  The
3329 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3330 /// when the size is anything else).
3331 void
initFromAPInt(const fltSemantics * Sem,const APInt & api)3332 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3333 {
3334   if (Sem == &IEEEhalf)
3335     return initFromHalfAPInt(api);
3336   if (Sem == &IEEEsingle)
3337     return initFromFloatAPInt(api);
3338   if (Sem == &IEEEdouble)
3339     return initFromDoubleAPInt(api);
3340   if (Sem == &x87DoubleExtended)
3341     return initFromF80LongDoubleAPInt(api);
3342   if (Sem == &IEEEquad)
3343     return initFromQuadrupleAPInt(api);
3344   if (Sem == &PPCDoubleDouble)
3345     return initFromPPCDoubleDoubleAPInt(api);
3346 
3347   llvm_unreachable(nullptr);
3348 }
3349 
3350 APFloat
getAllOnesValue(unsigned BitWidth,bool isIEEE)3351 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3352 {
3353   switch (BitWidth) {
3354   case 16:
3355     return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3356   case 32:
3357     return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3358   case 64:
3359     return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3360   case 80:
3361     return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3362   case 128:
3363     if (isIEEE)
3364       return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3365     return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3366   default:
3367     llvm_unreachable("Unknown floating bit width");
3368   }
3369 }
3370 
3371 /// Make this number the largest magnitude normal number in the given
3372 /// semantics.
makeLargest(bool Negative)3373 void APFloat::makeLargest(bool Negative) {
3374   // We want (in interchange format):
3375   //   sign = {Negative}
3376   //   exponent = 1..10
3377   //   significand = 1..1
3378   category = fcNormal;
3379   sign = Negative;
3380   exponent = semantics->maxExponent;
3381 
3382   // Use memset to set all but the highest integerPart to all ones.
3383   integerPart *significand = significandParts();
3384   unsigned PartCount = partCount();
3385   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3386 
3387   // Set the high integerPart especially setting all unused top bits for
3388   // internal consistency.
3389   const unsigned NumUnusedHighBits =
3390     PartCount*integerPartWidth - semantics->precision;
3391   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3392                                    ? (~integerPart(0) >> NumUnusedHighBits)
3393                                    : 0;
3394 }
3395 
3396 /// Make this number the smallest magnitude denormal number in the given
3397 /// semantics.
makeSmallest(bool Negative)3398 void APFloat::makeSmallest(bool Negative) {
3399   // We want (in interchange format):
3400   //   sign = {Negative}
3401   //   exponent = 0..0
3402   //   significand = 0..01
3403   category = fcNormal;
3404   sign = Negative;
3405   exponent = semantics->minExponent;
3406   APInt::tcSet(significandParts(), 1, partCount());
3407 }
3408 
3409 
getLargest(const fltSemantics & Sem,bool Negative)3410 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3411   // We want (in interchange format):
3412   //   sign = {Negative}
3413   //   exponent = 1..10
3414   //   significand = 1..1
3415   APFloat Val(Sem, uninitialized);
3416   Val.makeLargest(Negative);
3417   return Val;
3418 }
3419 
getSmallest(const fltSemantics & Sem,bool Negative)3420 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3421   // We want (in interchange format):
3422   //   sign = {Negative}
3423   //   exponent = 0..0
3424   //   significand = 0..01
3425   APFloat Val(Sem, uninitialized);
3426   Val.makeSmallest(Negative);
3427   return Val;
3428 }
3429 
getSmallestNormalized(const fltSemantics & Sem,bool Negative)3430 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3431   APFloat Val(Sem, uninitialized);
3432 
3433   // We want (in interchange format):
3434   //   sign = {Negative}
3435   //   exponent = 0..0
3436   //   significand = 10..0
3437 
3438   Val.category = fcNormal;
3439   Val.zeroSignificand();
3440   Val.sign = Negative;
3441   Val.exponent = Sem.minExponent;
3442   Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3443     (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3444 
3445   return Val;
3446 }
3447 
APFloat(const fltSemantics & Sem,const APInt & API)3448 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3449   initFromAPInt(&Sem, API);
3450 }
3451 
APFloat(float f)3452 APFloat::APFloat(float f) {
3453   initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3454 }
3455 
APFloat(double d)3456 APFloat::APFloat(double d) {
3457   initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3458 }
3459 
3460 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3461   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3462     Buffer.append(Str.begin(), Str.end());
3463   }
3464 
3465   /// Removes data from the given significand until it is no more
3466   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3467   void AdjustToPrecision(APInt &significand,
3468                          int &exp, unsigned FormatPrecision) {
3469     unsigned bits = significand.getActiveBits();
3470 
3471     // 196/59 is a very slight overestimate of lg_2(10).
3472     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3473 
3474     if (bits <= bitsRequired) return;
3475 
3476     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3477     if (!tensRemovable) return;
3478 
3479     exp += tensRemovable;
3480 
3481     APInt divisor(significand.getBitWidth(), 1);
3482     APInt powten(significand.getBitWidth(), 10);
3483     while (true) {
3484       if (tensRemovable & 1)
3485         divisor *= powten;
3486       tensRemovable >>= 1;
3487       if (!tensRemovable) break;
3488       powten *= powten;
3489     }
3490 
3491     significand = significand.udiv(divisor);
3492 
3493     // Truncate the significand down to its active bit count.
3494     significand = significand.trunc(significand.getActiveBits());
3495   }
3496 
3497 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3498   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3499                          int &exp, unsigned FormatPrecision) {
3500     unsigned N = buffer.size();
3501     if (N <= FormatPrecision) return;
3502 
3503     // The most significant figures are the last ones in the buffer.
3504     unsigned FirstSignificant = N - FormatPrecision;
3505 
3506     // Round.
3507     // FIXME: this probably shouldn't use 'round half up'.
3508 
3509     // Rounding down is just a truncation, except we also want to drop
3510     // trailing zeros from the new result.
3511     if (buffer[FirstSignificant - 1] < '5') {
3512       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3513         FirstSignificant++;
3514 
3515       exp += FirstSignificant;
3516       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3517       return;
3518     }
3519 
3520     // Rounding up requires a decimal add-with-carry.  If we continue
3521     // the carry, the newly-introduced zeros will just be truncated.
3522     for (unsigned I = FirstSignificant; I != N; ++I) {
3523       if (buffer[I] == '9') {
3524         FirstSignificant++;
3525       } else {
3526         buffer[I]++;
3527         break;
3528       }
3529     }
3530 
3531     // If we carried through, we have exactly one digit of precision.
3532     if (FirstSignificant == N) {
3533       exp += FirstSignificant;
3534       buffer.clear();
3535       buffer.push_back('1');
3536       return;
3537     }
3538 
3539     exp += FirstSignificant;
3540     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3541   }
3542 }
3543 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding) const3544 void APFloat::toString(SmallVectorImpl<char> &Str,
3545                        unsigned FormatPrecision,
3546                        unsigned FormatMaxPadding) const {
3547   switch (category) {
3548   case fcInfinity:
3549     if (isNegative())
3550       return append(Str, "-Inf");
3551     else
3552       return append(Str, "+Inf");
3553 
3554   case fcNaN: return append(Str, "NaN");
3555 
3556   case fcZero:
3557     if (isNegative())
3558       Str.push_back('-');
3559 
3560     if (!FormatMaxPadding)
3561       append(Str, "0.0E+0");
3562     else
3563       Str.push_back('0');
3564     return;
3565 
3566   case fcNormal:
3567     break;
3568   }
3569 
3570   if (isNegative())
3571     Str.push_back('-');
3572 
3573   // Decompose the number into an APInt and an exponent.
3574   int exp = exponent - ((int) semantics->precision - 1);
3575   APInt significand(semantics->precision,
3576                     makeArrayRef(significandParts(),
3577                                  partCountForBits(semantics->precision)));
3578 
3579   // Set FormatPrecision if zero.  We want to do this before we
3580   // truncate trailing zeros, as those are part of the precision.
3581   if (!FormatPrecision) {
3582     // We use enough digits so the number can be round-tripped back to an
3583     // APFloat. The formula comes from "How to Print Floating-Point Numbers
3584     // Accurately" by Steele and White.
3585     // FIXME: Using a formula based purely on the precision is conservative;
3586     // we can print fewer digits depending on the actual value being printed.
3587 
3588     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3589     FormatPrecision = 2 + semantics->precision * 59 / 196;
3590   }
3591 
3592   // Ignore trailing binary zeros.
3593   int trailingZeros = significand.countTrailingZeros();
3594   exp += trailingZeros;
3595   significand = significand.lshr(trailingZeros);
3596 
3597   // Change the exponent from 2^e to 10^e.
3598   if (exp == 0) {
3599     // Nothing to do.
3600   } else if (exp > 0) {
3601     // Just shift left.
3602     significand = significand.zext(semantics->precision + exp);
3603     significand <<= exp;
3604     exp = 0;
3605   } else { /* exp < 0 */
3606     int texp = -exp;
3607 
3608     // We transform this using the identity:
3609     //   (N)(2^-e) == (N)(5^e)(10^-e)
3610     // This means we have to multiply N (the significand) by 5^e.
3611     // To avoid overflow, we have to operate on numbers large
3612     // enough to store N * 5^e:
3613     //   log2(N * 5^e) == log2(N) + e * log2(5)
3614     //                 <= semantics->precision + e * 137 / 59
3615     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3616 
3617     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3618 
3619     // Multiply significand by 5^e.
3620     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3621     significand = significand.zext(precision);
3622     APInt five_to_the_i(precision, 5);
3623     while (true) {
3624       if (texp & 1) significand *= five_to_the_i;
3625 
3626       texp >>= 1;
3627       if (!texp) break;
3628       five_to_the_i *= five_to_the_i;
3629     }
3630   }
3631 
3632   AdjustToPrecision(significand, exp, FormatPrecision);
3633 
3634   SmallVector<char, 256> buffer;
3635 
3636   // Fill the buffer.
3637   unsigned precision = significand.getBitWidth();
3638   APInt ten(precision, 10);
3639   APInt digit(precision, 0);
3640 
3641   bool inTrail = true;
3642   while (significand != 0) {
3643     // digit <- significand % 10
3644     // significand <- significand / 10
3645     APInt::udivrem(significand, ten, significand, digit);
3646 
3647     unsigned d = digit.getZExtValue();
3648 
3649     // Drop trailing zeros.
3650     if (inTrail && !d) exp++;
3651     else {
3652       buffer.push_back((char) ('0' + d));
3653       inTrail = false;
3654     }
3655   }
3656 
3657   assert(!buffer.empty() && "no characters in buffer!");
3658 
3659   // Drop down to FormatPrecision.
3660   // TODO: don't do more precise calculations above than are required.
3661   AdjustToPrecision(buffer, exp, FormatPrecision);
3662 
3663   unsigned NDigits = buffer.size();
3664 
3665   // Check whether we should use scientific notation.
3666   bool FormatScientific;
3667   if (!FormatMaxPadding)
3668     FormatScientific = true;
3669   else {
3670     if (exp >= 0) {
3671       // 765e3 --> 765000
3672       //              ^^^
3673       // But we shouldn't make the number look more precise than it is.
3674       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3675                           NDigits + (unsigned) exp > FormatPrecision);
3676     } else {
3677       // Power of the most significant digit.
3678       int MSD = exp + (int) (NDigits - 1);
3679       if (MSD >= 0) {
3680         // 765e-2 == 7.65
3681         FormatScientific = false;
3682       } else {
3683         // 765e-5 == 0.00765
3684         //           ^ ^^
3685         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3686       }
3687     }
3688   }
3689 
3690   // Scientific formatting is pretty straightforward.
3691   if (FormatScientific) {
3692     exp += (NDigits - 1);
3693 
3694     Str.push_back(buffer[NDigits-1]);
3695     Str.push_back('.');
3696     if (NDigits == 1)
3697       Str.push_back('0');
3698     else
3699       for (unsigned I = 1; I != NDigits; ++I)
3700         Str.push_back(buffer[NDigits-1-I]);
3701     Str.push_back('E');
3702 
3703     Str.push_back(exp >= 0 ? '+' : '-');
3704     if (exp < 0) exp = -exp;
3705     SmallVector<char, 6> expbuf;
3706     do {
3707       expbuf.push_back((char) ('0' + (exp % 10)));
3708       exp /= 10;
3709     } while (exp);
3710     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3711       Str.push_back(expbuf[E-1-I]);
3712     return;
3713   }
3714 
3715   // Non-scientific, positive exponents.
3716   if (exp >= 0) {
3717     for (unsigned I = 0; I != NDigits; ++I)
3718       Str.push_back(buffer[NDigits-1-I]);
3719     for (unsigned I = 0; I != (unsigned) exp; ++I)
3720       Str.push_back('0');
3721     return;
3722   }
3723 
3724   // Non-scientific, negative exponents.
3725 
3726   // The number of digits to the left of the decimal point.
3727   int NWholeDigits = exp + (int) NDigits;
3728 
3729   unsigned I = 0;
3730   if (NWholeDigits > 0) {
3731     for (; I != (unsigned) NWholeDigits; ++I)
3732       Str.push_back(buffer[NDigits-I-1]);
3733     Str.push_back('.');
3734   } else {
3735     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3736 
3737     Str.push_back('0');
3738     Str.push_back('.');
3739     for (unsigned Z = 1; Z != NZeros; ++Z)
3740       Str.push_back('0');
3741   }
3742 
3743   for (; I != NDigits; ++I)
3744     Str.push_back(buffer[NDigits-I-1]);
3745 }
3746 
getExactInverse(APFloat * inv) const3747 bool APFloat::getExactInverse(APFloat *inv) const {
3748   // Special floats and denormals have no exact inverse.
3749   if (!isFiniteNonZero())
3750     return false;
3751 
3752   // Check that the number is a power of two by making sure that only the
3753   // integer bit is set in the significand.
3754   if (significandLSB() != semantics->precision - 1)
3755     return false;
3756 
3757   // Get the inverse.
3758   APFloat reciprocal(*semantics, 1ULL);
3759   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3760     return false;
3761 
3762   // Avoid multiplication with a denormal, it is not safe on all platforms and
3763   // may be slower than a normal division.
3764   if (reciprocal.isDenormal())
3765     return false;
3766 
3767   assert(reciprocal.isFiniteNonZero() &&
3768          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3769 
3770   if (inv)
3771     *inv = reciprocal;
3772 
3773   return true;
3774 }
3775 
isSignaling() const3776 bool APFloat::isSignaling() const {
3777   if (!isNaN())
3778     return false;
3779 
3780   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3781   // first bit of the trailing significand being 0.
3782   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3783 }
3784 
3785 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3786 ///
3787 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3788 /// appropriate sign switching before/after the computation.
next(bool nextDown)3789 APFloat::opStatus APFloat::next(bool nextDown) {
3790   // If we are performing nextDown, swap sign so we have -x.
3791   if (nextDown)
3792     changeSign();
3793 
3794   // Compute nextUp(x)
3795   opStatus result = opOK;
3796 
3797   // Handle each float category separately.
3798   switch (category) {
3799   case fcInfinity:
3800     // nextUp(+inf) = +inf
3801     if (!isNegative())
3802       break;
3803     // nextUp(-inf) = -getLargest()
3804     makeLargest(true);
3805     break;
3806   case fcNaN:
3807     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3808     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3809     //                     change the payload.
3810     if (isSignaling()) {
3811       result = opInvalidOp;
3812       // For consistency, propagate the sign of the sNaN to the qNaN.
3813       makeNaN(false, isNegative(), nullptr);
3814     }
3815     break;
3816   case fcZero:
3817     // nextUp(pm 0) = +getSmallest()
3818     makeSmallest(false);
3819     break;
3820   case fcNormal:
3821     // nextUp(-getSmallest()) = -0
3822     if (isSmallest() && isNegative()) {
3823       APInt::tcSet(significandParts(), 0, partCount());
3824       category = fcZero;
3825       exponent = 0;
3826       break;
3827     }
3828 
3829     // nextUp(getLargest()) == INFINITY
3830     if (isLargest() && !isNegative()) {
3831       APInt::tcSet(significandParts(), 0, partCount());
3832       category = fcInfinity;
3833       exponent = semantics->maxExponent + 1;
3834       break;
3835     }
3836 
3837     // nextUp(normal) == normal + inc.
3838     if (isNegative()) {
3839       // If we are negative, we need to decrement the significand.
3840 
3841       // We only cross a binade boundary that requires adjusting the exponent
3842       // if:
3843       //   1. exponent != semantics->minExponent. This implies we are not in the
3844       //   smallest binade or are dealing with denormals.
3845       //   2. Our significand excluding the integral bit is all zeros.
3846       bool WillCrossBinadeBoundary =
3847         exponent != semantics->minExponent && isSignificandAllZeros();
3848 
3849       // Decrement the significand.
3850       //
3851       // We always do this since:
3852       //   1. If we are dealing with a non-binade decrement, by definition we
3853       //   just decrement the significand.
3854       //   2. If we are dealing with a normal -> normal binade decrement, since
3855       //   we have an explicit integral bit the fact that all bits but the
3856       //   integral bit are zero implies that subtracting one will yield a
3857       //   significand with 0 integral bit and 1 in all other spots. Thus we
3858       //   must just adjust the exponent and set the integral bit to 1.
3859       //   3. If we are dealing with a normal -> denormal binade decrement,
3860       //   since we set the integral bit to 0 when we represent denormals, we
3861       //   just decrement the significand.
3862       integerPart *Parts = significandParts();
3863       APInt::tcDecrement(Parts, partCount());
3864 
3865       if (WillCrossBinadeBoundary) {
3866         // Our result is a normal number. Do the following:
3867         // 1. Set the integral bit to 1.
3868         // 2. Decrement the exponent.
3869         APInt::tcSetBit(Parts, semantics->precision - 1);
3870         exponent--;
3871       }
3872     } else {
3873       // If we are positive, we need to increment the significand.
3874 
3875       // We only cross a binade boundary that requires adjusting the exponent if
3876       // the input is not a denormal and all of said input's significand bits
3877       // are set. If all of said conditions are true: clear the significand, set
3878       // the integral bit to 1, and increment the exponent. If we have a
3879       // denormal always increment since moving denormals and the numbers in the
3880       // smallest normal binade have the same exponent in our representation.
3881       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3882 
3883       if (WillCrossBinadeBoundary) {
3884         integerPart *Parts = significandParts();
3885         APInt::tcSet(Parts, 0, partCount());
3886         APInt::tcSetBit(Parts, semantics->precision - 1);
3887         assert(exponent != semantics->maxExponent &&
3888                "We can not increment an exponent beyond the maxExponent allowed"
3889                " by the given floating point semantics.");
3890         exponent++;
3891       } else {
3892         incrementSignificand();
3893       }
3894     }
3895     break;
3896   }
3897 
3898   // If we are performing nextDown, swap sign so we have -nextUp(-x)
3899   if (nextDown)
3900     changeSign();
3901 
3902   return result;
3903 }
3904 
3905 void
makeInf(bool Negative)3906 APFloat::makeInf(bool Negative) {
3907   category = fcInfinity;
3908   sign = Negative;
3909   exponent = semantics->maxExponent + 1;
3910   APInt::tcSet(significandParts(), 0, partCount());
3911 }
3912 
3913 void
makeZero(bool Negative)3914 APFloat::makeZero(bool Negative) {
3915   category = fcZero;
3916   sign = Negative;
3917   exponent = semantics->minExponent-1;
3918   APInt::tcSet(significandParts(), 0, partCount());
3919 }
3920 
scalbn(APFloat X,int Exp)3921 APFloat llvm::scalbn(APFloat X, int Exp) {
3922   if (X.isInfinity() || X.isZero() || X.isNaN())
3923     return std::move(X);
3924 
3925   auto MaxExp = X.getSemantics().maxExponent;
3926   auto MinExp = X.getSemantics().minExponent;
3927   if (Exp > (MaxExp - X.exponent))
3928     // Overflow saturates to infinity.
3929     return APFloat::getInf(X.getSemantics(), X.isNegative());
3930   if (Exp < (MinExp - X.exponent))
3931     // Underflow saturates to zero.
3932     return APFloat::getZero(X.getSemantics(), X.isNegative());
3933 
3934   X.exponent += Exp;
3935   return std::move(X);
3936 }
3937