1 //===-- include/flang/Common/real.h -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_COMMON_REAL_H_
10 #define FORTRAN_COMMON_REAL_H_
11 
12 // Characteristics of IEEE-754 & related binary floating-point numbers.
13 // The various representations are distinguished by their binary precisions
14 // (number of explicit significand bits and any implicit MSB in the fraction).
15 
16 #include <cinttypes>
17 
18 namespace Fortran::common {
19 
20 // Total representation size in bits for each type
BitsForBinaryPrecision(int binaryPrecision)21 static constexpr int BitsForBinaryPrecision(int binaryPrecision) {
22   switch (binaryPrecision) {
23   case 8: // IEEE single (truncated): 1+8+7 with implicit bit
24     return 16;
25   case 11: // IEEE half precision: 1+5+10 with implicit bit
26     return 16;
27   case 24: // IEEE single precision: 1+8+23 with implicit bit
28     return 32;
29   case 53: // IEEE double precision: 1+11+52 with implicit bit
30     return 64;
31   case 64: // x87 extended precision: 1+15+64, no implicit bit
32     return 80;
33   case 106: // "double-double": 2*(1+11+52 with implicit bit)
34     return 128;
35   case 113: // IEEE quad precision: 1+15+112 with implicit bit
36     return 128;
37   default:
38     return -1;
39   }
40 }
41 
42 // Maximum number of significant decimal digits in the fraction of an
43 // exact conversion in each type; computed by converting the value
44 // with the minimum exponent (biased to 1) and all fractional bits set.
MaxDecimalConversionDigits(int binaryPrecision)45 static constexpr int MaxDecimalConversionDigits(int binaryPrecision) {
46   switch (binaryPrecision) {
47   case 8: // IEEE single (truncated): 1+8+7 with implicit bit
48     return 96;
49   case 11: // IEEE half precision: 1+5+10 with implicit bit
50     return 21;
51   case 24: // IEEE single precision: 1+8+23 with implicit bit
52     return 112;
53   case 53: // IEEE double precision: 1+11+52 with implicit bit
54     return 767;
55   case 64: // x87 extended precision: 1+15+64, no implicit bit
56     return 11514;
57   case 106: // "double-double": 2*(1+11+52 with implicit bit)
58     return 2 * 767;
59   case 113: // IEEE quad precision: 1+15+112 with implicit bit
60     return 11563;
61   default:
62     return -1;
63   }
64 }
65 
RealKindForPrecision(int binaryPrecision)66 static constexpr int RealKindForPrecision(int binaryPrecision) {
67   switch (binaryPrecision) {
68   case 8: // IEEE single (truncated): 1+8+7 with implicit bit
69     return 3;
70   case 11: // IEEE half precision: 1+5+10 with implicit bit
71     return 2;
72   case 24: // IEEE single precision: 1+8+23 with implicit bit
73     return 4;
74   case 53: // IEEE double precision: 1+11+52 with implicit bit
75     return 8;
76   case 64: // x87 extended precision: 1+15+64, no implicit bit
77     return 10;
78   // TODO: case 106: return kind for double/double
79   case 113: // IEEE quad precision: 1+15+112 with implicit bit
80     return 16;
81   default:
82     return -1;
83   }
84 }
85 
PrecisionOfRealKind(int kind)86 static constexpr int PrecisionOfRealKind(int kind) {
87   switch (kind) {
88   case 2: // IEEE half precision: 1+5+10 with implicit bit
89     return 11;
90   case 3: // IEEE single (truncated): 1+8+7 with implicit bit
91     return 8;
92   case 4: // IEEE single precision: 1+8+23 with implicit bit
93     return 24;
94   case 8: // IEEE double precision: 1+11+52 with implicit bit
95     return 53;
96   case 10: // x87 extended precision: 1+15+64, no implicit bit
97     return 64;
98   // TODO: case kind for double/double: return 106;
99   case 16: // IEEE quad precision: 1+15+112 with implicit bit
100     return 113;
101   default:
102     return -1;
103   }
104 }
105 
106 template <int BINARY_PRECISION> class RealDetails {
107 private:
108   // Converts bit widths to whole decimal digits
LogBaseTwoToLogBaseTen(int logb2)109   static constexpr int LogBaseTwoToLogBaseTen(int logb2) {
110     constexpr std::int64_t LogBaseTenOfTwoTimesTenToThe12th{301029995664};
111     constexpr std::int64_t TenToThe12th{1000000000000};
112     std::int64_t logb10{
113         (logb2 * LogBaseTenOfTwoTimesTenToThe12th) / TenToThe12th};
114     return static_cast<int>(logb10);
115   }
116 
117 public:
118   static constexpr int binaryPrecision{BINARY_PRECISION};
119   static constexpr int bits{BitsForBinaryPrecision(binaryPrecision)};
120   static constexpr bool isImplicitMSB{binaryPrecision != 64 /*x87*/};
121   static constexpr int significandBits{binaryPrecision - isImplicitMSB};
122   static constexpr int exponentBits{bits - significandBits - 1 /*sign*/};
123   static constexpr int maxExponent{(1 << exponentBits) - 1};
124   static constexpr int exponentBias{maxExponent / 2};
125 
126   static constexpr int decimalPrecision{
127       LogBaseTwoToLogBaseTen(binaryPrecision - 1)};
128   static constexpr int decimalRange{LogBaseTwoToLogBaseTen(exponentBias - 1)};
129 
130   // Number of significant decimal digits in the fraction of the
131   // exact conversion of the least nonzero subnormal.
132   static constexpr int maxDecimalConversionDigits{
133       MaxDecimalConversionDigits(binaryPrecision)};
134 
135   static_assert(binaryPrecision > 0);
136   static_assert(exponentBits > 1);
137   static_assert(exponentBits <= 15);
138 };
139 
140 } // namespace Fortran::common
141 #endif // FORTRAN_COMMON_REAL_H_
142