//===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ // This is a helper class for use in floating-point conversions // between binary decimal representations. It holds a multiple-precision // integer value using digits of a radix that is a large even power of ten // (10,000,000,000,000,000 by default, 10**16). These digits are accompanied // by a signed exponent that denotes multiplication by a power of ten. // The effective radix point is to the right of the digits (i.e., they do // not represent a fraction). // // The operations supported by this class are limited to those required // for conversions between binary and decimal representations; it is not // a general-purpose facility. #include "flang/Common/bit-population-count.h" #include "flang/Common/leading-zero-bit-count.h" #include "flang/Common/uint128.h" #include "flang/Decimal/binary-floating-point.h" #include "flang/Decimal/decimal.h" #include #include #include namespace Fortran::decimal { static constexpr std::uint64_t TenToThe(int power) { return power <= 0 ? 1 : 10 * TenToThe(power - 1); } // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be // even, so that pairs of decimal digits do not straddle Digits. // So LOG10RADIX must be 16 or 6. template class BigRadixFloatingPointNumber { public: using Real = BinaryFloatingPointNumber; static constexpr int log10Radix{LOG10RADIX}; private: static constexpr std::uint64_t uint64Radix{TenToThe(log10Radix)}; static constexpr int minDigitBits{ 64 - common::LeadingZeroBitCount(uint64Radix)}; using Digit = common::HostUnsignedIntType; static constexpr Digit radix{uint64Radix}; static_assert(radix < std::numeric_limits::max() / 1000, "radix is somehow too big"); static_assert(radix > std::numeric_limits::max() / 10000, "radix is somehow too small"); // The base-2 logarithm of the least significant bit that can arise // in a subnormal IEEE floating-point number. static constexpr int minLog2AnyBit{ -Real::exponentBias - Real::binaryPrecision}; // The number of Digits needed to represent the smallest subnormal. static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix}; public: explicit BigRadixFloatingPointNumber( enum FortranRounding rounding = RoundNearest) : rounding_{rounding} {} // Converts a binary floating point value. explicit BigRadixFloatingPointNumber( Real, enum FortranRounding = RoundNearest); BigRadixFloatingPointNumber &SetToZero() { isNegative_ = false; digits_ = 0; exponent_ = 0; return *this; } // Converts decimal floating-point to binary. ConversionToBinaryResult ConvertToBinary(); // Parses and converts to binary. Handles leading spaces, // "NaN", & optionally-signed "Inf". Does not skip internal // spaces. // The argument is a reference to a pointer that is left // pointing to the first character that wasn't parsed. ConversionToBinaryResult ConvertToBinary(const char *&); // Formats a decimal floating-point number to a user buffer. // May emit "NaN" or "Inf", or an possibly-signed integer. // No decimal point is written, but if it were, it would be // after the last digit; the effective decimal exponent is // returned as part of the result structure so that it can be // formatted by the client. ConversionToDecimalResult ConvertToDecimal( char *, std::size_t, enum DecimalConversionFlags, int digits) const; // Discard decimal digits not needed to distinguish this value // from the decimal encodings of two others (viz., the nearest binary // floating-point numbers immediately below and above this one). // The last decimal digit may not be uniquely determined in all // cases, and will be the mean value when that is so (e.g., if // last decimal digit values 6-8 would all work, it'll be a 7). // This minimization necessarily assumes that the value will be // emitted and read back into the same (or less precise) format // with default rounding to the nearest value. void Minimize( BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); template STREAM &Dump(STREAM &) const; private: BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that) : digits_{that.digits_}, exponent_{that.exponent_}, isNegative_{that.isNegative_}, rounding_{that.rounding_} { for (int j{0}; j < digits_; ++j) { digit_[j] = that.digit_[j]; } } bool IsZero() const { // Don't assume normalization. for (int j{0}; j < digits_; ++j) { if (digit_[j] != 0) { return false; } } return true; } // Predicate: true when 10*value would cause a carry. // (When this happens during decimal-to-binary conversion, // there are more digits in the input string than can be // represented precisely.) bool IsFull() const { return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10; } // Sets *this to an unsigned integer value. // Returns any remainder. template UINT SetTo(UINT n) { static_assert( std::is_same_v || std::is_unsigned_v); SetToZero(); while (n != 0) { auto q{n / 10u}; if (n != q * 10) { break; } ++exponent_; n = q; } if constexpr (sizeof n < sizeof(Digit)) { if (n != 0) { digit_[digits_++] = n; } return 0; } else { while (n != 0 && digits_ < digitLimit_) { auto q{n / radix}; digit_[digits_++] = static_cast(n - q * radix); n = q; } return n; } } int RemoveLeastOrderZeroDigits() { int remove{0}; if (digits_ > 0 && digit_[0] == 0) { while (remove < digits_ && digit_[remove] == 0) { ++remove; } if (remove >= digits_) { digits_ = 0; } else if (remove > 0) { #if defined __GNUC__ && __GNUC__ < 8 // (&& j + remove < maxDigits) was added to avoid GCC < 8 build failure // on -Werror=array-bounds. This can be removed if -Werror is disable. for (int j{0}; j + remove < digits_ && (j + remove < maxDigits); ++j) { #else for (int j{0}; j + remove < digits_; ++j) { #endif digit_[j] = digit_[j + remove]; } digits_ -= remove; } } return remove; } void RemoveLeadingZeroDigits() { while (digits_ > 0 && digit_[digits_ - 1] == 0) { --digits_; } } void Normalize() { RemoveLeadingZeroDigits(); exponent_ += RemoveLeastOrderZeroDigits() * log10Radix; } // This limited divisibility test only works for even divisors of the radix, // which is fine since it's only ever used with 2 and 5. template bool IsDivisibleBy() const { static_assert(N > 1 && radix % N == 0, "bad modulus"); return digits_ == 0 || (digit_[0] % N) == 0; } template int DivideBy() { Digit remainder{0}; for (int j{digits_ - 1}; j >= 0; --j) { Digit q{digit_[j] / DIVISOR}; Digit nrem{digit_[j] - DIVISOR * q}; digit_[j] = q + (radix / DIVISOR) * remainder; remainder = nrem; } return remainder; } void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix Digit remainder{0}; auto mask{(Digit{1} << twoPow) - 1}; auto coeff{radix >> twoPow}; for (int j{digits_ - 1}; j >= 0; --j) { auto nrem{digit_[j] & mask}; digit_[j] = (digit_[j] >> twoPow) + coeff * remainder; remainder = nrem; } } // Returns true on overflow bool DivideByPowerOfTwoInPlace(int twoPow) { if (digits_ > 0) { while (twoPow > 0) { int chunk{twoPow > log10Radix ? log10Radix : twoPow}; if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) { DivideByPowerOfTwo(chunk); twoPow -= chunk; continue; } twoPow -= chunk; if (digit_[digits_ - 1] >> chunk != 0) { if (digits_ == digitLimit_) { return true; // overflow } digit_[digits_++] = 0; } auto remainder{digit_[digits_ - 1]}; exponent_ -= log10Radix; auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix auto mask{(Digit{1} << chunk) - 1}; for (int j{digits_ - 1}; j >= 1; --j) { digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder; remainder = digit_[j - 1] & mask; } digit_[0] = coeff * remainder; } } return false; // no overflow } int AddCarry(int position = 0, int carry = 1) { for (; position < digits_; ++position) { Digit v{digit_[position] + carry}; if (v < radix) { digit_[position] = v; return 0; } digit_[position] = v - radix; carry = 1; } if (digits_ < digitLimit_) { digit_[digits_++] = carry; return 0; } Normalize(); if (digits_ < digitLimit_) { digit_[digits_++] = carry; return 0; } return carry; } void Decrement() { for (int j{0}; digit_[j]-- == 0; ++j) { digit_[j] = radix - 1; } } template int MultiplyByHelper(int carry = 0) { for (int j{0}; j < digits_; ++j) { auto v{N * digit_[j] + carry}; carry = v / radix; digit_[j] = v - carry * radix; // i.e., v % radix } return carry; } template int MultiplyBy(int carry = 0) { if (int newCarry{MultiplyByHelper(carry)}) { return AddCarry(digits_, newCarry); } else { return 0; } } template int MultiplyWithoutNormalization() { if (int carry{MultiplyByHelper(0)}) { if (digits_ < digitLimit_) { digit_[digits_++] = carry; return 0; } else { return carry; } } else { return 0; } } void LoseLeastSignificantDigit(); // with rounding void PushCarry(int carry) { if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) { LoseLeastSignificantDigit(); digit_[digits_ - 1] += carry; } else { digit_[digits_++] = carry; } } // Adds another number and then divides by two. // Assumes same exponent and sign. // Returns true when the the result has effectively been rounded down. bool Mean(const BigRadixFloatingPointNumber &); bool ParseNumber(const char *&, bool &inexact); using Raw = typename Real::RawType; constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); } constexpr Raw Infinity() const { return (Raw{Real::maxExponent} << Real::significandBits) | SignBit(); } static constexpr Raw NaN() { return (Raw{Real::maxExponent} << Real::significandBits) | (Raw{1} << (Real::significandBits - 2)); } Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD int digits_{0}; // # of elements in digit_[] array; zero when zero int digitLimit_{maxDigits}; // precision clamp int exponent_{0}; // signed power of ten bool isNegative_{false}; enum FortranRounding rounding_ { RoundNearest }; }; } // namespace Fortran::decimal #endif