1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
18 #define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
19 
20 #include <cmath>
21 #include <limits>
22 
23 #include "berberis/base/bit_util.h"
24 #include "berberis/intrinsics/common/intrinsics_float.h"
25 #include "berberis/intrinsics/common_to_x86/intrinsics_float.h"
26 #include "berberis/intrinsics/guest_cpu_flags.h"       // ToHostRoundingMode
27 #include "berberis/intrinsics/guest_rounding_modes.h"  // ScopedRoundingMode
28 #include "berberis/intrinsics/type_traits.h"
29 
30 namespace berberis::intrinsics {
31 
32 // x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it
33 // have support for 80bit floats: if calculations are done with one bit (or more) of extra precision
34 // in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember
35 // this addition may overflow.
36 template <typename FloatType, typename OperationType, typename... Args>
ExecuteFloatOperationRmm(OperationType operation,Args...args)37 inline FloatType ExecuteFloatOperationRmm(OperationType operation, Args... args) {
38   using Wide = typename TypeTraits<FloatType>::Wide;
39   Wide wide_result = operation(static_cast<typename TypeTraits<Args>::Wide>(args)...);
40   if constexpr (std::is_same_v<FloatType, Float32>) {
41     // In the 32bit->64bit case everything happens almost automatically, we just need to clear low
42     // bits to ensure that we are getting ±∞ and not NaN.
43     auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result);
44     if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) {
45       return FloatType(wide_result);
46     }
47     int_result += 0x0000'0000'1000'0000;
48     int_result &= 0xffff'ffff'e000'0000;
49     wide_result = bit_cast<Wide>(int_result);
50   } else if constexpr (std::is_same_v<FloatType, Float64>) {
51     // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not
52     // pseudo-infinity (supported on 8087/80287, but not on modern CPUs).
53     struct {
54       uint64_t significand;
55       uint16_t exponent;
56       uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)];
57     } fp80_parts;
58     static_assert(sizeof fp80_parts == sizeof(Wide));
59     memcpy(&fp80_parts, &wide_result, sizeof(wide_result));
60     // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V).
61     if ((fp80_parts.exponent & 0x7fff) == 0x7fff ||
62         (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) {
63       return FloatType(wide_result);
64     }
65     fp80_parts.significand += 0x0000'0000'0000'0400;
66     fp80_parts.significand &= 0xffff'ffff'ffff'f800;
67     if (fp80_parts.significand == 0) {
68       fp80_parts.exponent++;
69       fp80_parts.significand = 0x8000'0000'0000'0000;
70     }
71     memcpy(&wide_result, &fp80_parts, sizeof(wide_result));
72   }
73   return FloatType(wide_result);
74 }
75 
76 // Note: first round of rm/frm verification must happen before that function because RISC-V
77 // postulates that invalid rm or frm should trigger illegal instruction exception.
78 // Here we can assume both rm and frm fields are valid.
79 template <typename FloatType, typename OperationType, typename... Args>
ExecuteFloatOperation(uint8_t requested_rm,uint8_t current_rm,OperationType operation,Args...args)80 inline FloatType ExecuteFloatOperation(uint8_t requested_rm,
81                                        uint8_t current_rm,
82                                        OperationType operation,
83                                        Args... args) {
84   int host_requested_rm = ToHostRoundingMode(requested_rm);
85   int host_current_rm = ToHostRoundingMode(current_rm);
86   if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) {
87     uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm;
88     if (rm == FPFlags::RMM) {
89       return ExecuteFloatOperationRmm<FloatType>(operation, args...);
90     }
91     return operation(args...);
92   }
93   ScopedRoundingMode scoped_rounding_mode{host_requested_rm};
94   if (requested_rm == FPFlags::RMM) {
95     return ExecuteFloatOperationRmm<FloatType>(operation, args...);
96   }
97   return operation(args...);
98 }
99 
100 // From RISC-V ISA manual: Single-Precision Floating-Point Computational Instructions.
101 // Covers behavior for both single and double precision floating point comparisons.
102 #define DEFINE_FLOAT_COMPARE_FUNC(FuncName, FloatType, ZeroVal, Intrinsic) \
103   inline FloatType FuncName(FloatType op1, FloatType op2) {                \
104     FPInfo op1_class = FPClassify(op1);                                    \
105     FPInfo op2_class = FPClassify(op2);                                    \
106     if (op1_class == FPInfo::kZero && op2_class == FPInfo::kZero &&        \
107         SignBit(op1) != SignBit(op2)) {                                    \
108       return FloatType(ZeroVal);                                           \
109     }                                                                      \
110     /* If both inputs are NaNs, the result is the canonical NaN. */        \
111     if (op1_class == FPInfo::kNaN && op2_class == FPInfo::kNaN) {          \
112       return std::numeric_limits<FloatType>::quiet_NaN();                  \
113     }                                                                      \
114     /* If only one operand is a NaN, the result is the non-NaN operand. */ \
115     if (op1_class == FPInfo::kNaN) {                                       \
116       return op2;                                                          \
117     }                                                                      \
118     if (op2_class == FPInfo::kNaN) {                                       \
119       return op1;                                                          \
120     }                                                                      \
121     return FloatType(Intrinsic(op1.value_, op2.value_));                   \
122   }
123 DEFINE_FLOAT_COMPARE_FUNC(Max, Float32, +0.f, std::fmax);
124 DEFINE_FLOAT_COMPARE_FUNC(Max, Float64, +0.f, std::fmax);
125 DEFINE_FLOAT_COMPARE_FUNC(Min, Float32, -0.f, std::fmin);
126 DEFINE_FLOAT_COMPARE_FUNC(Min, Float64, -0.f, std::fmin);
127 #undef DEFINE_FLOAT_COMPARE_FUNC
128 
129 // We only need Negative(long double) for FMA, b/120563432 doesn't affect this function.
Negative(const long double & v)130 inline long double Negative(const long double& v) {
131   return -v;
132 }
133 
Sqrt(const long double & v)134 inline long double Sqrt(const long double& v) {
135   return sqrt(v);
136 }
137 
MulAdd(const long double & v1,const long double & v2,const long double & v3)138 inline long double MulAdd(const long double& v1, const long double& v2, const long double& v3) {
139   return fma(v1, v2, v3);
140 }
141 
142 }  // namespace berberis::intrinsics
143 
144 #endif  // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
145