1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_INTRINSICS_INTRINSICS_FLOATING_POINT_IMPL_H_
18 #define BERBERIS_INTRINSICS_INTRINSICS_FLOATING_POINT_IMPL_H_
19 
20 #include <limits>
21 #include <tuple>
22 #include <type_traits>
23 
24 #include "berberis/base/bit_util.h"
25 #include "berberis/intrinsics/guest_cpu_flags.h"
26 #include "berberis/intrinsics/intrinsics.h"
27 #include "berberis/intrinsics/intrinsics_float.h"  // Float32/Float64/ProcessNans
28 #include "berberis/intrinsics/type_traits.h"
29 
30 namespace berberis::intrinsics {
31 
32 template <typename FloatType,
33           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FAdd(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)34 std::tuple<FloatType> FAdd(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
35   return intrinsics::ExecuteFloatOperation<FloatType>(
36       rm,
37       frm,
38       [](auto x, auto y) {
39         return std::get<0>(FAddHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
40       },
41       arg1,
42       arg2);
43 }
44 
45 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FAddHostRounding(FloatType arg1,FloatType arg2)46 std::tuple<FloatType> FAddHostRounding(FloatType arg1, FloatType arg2) {
47   return {arg1 + arg2};
48 }
49 
50 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FClass(FloatType arg)51 std::tuple<uint64_t> FClass(FloatType arg) {
52   using IntType = std::make_unsigned_t<typename TypeTraits<FloatType>::Int>;
53   constexpr IntType quiet_bit =
54       __builtin_bit_cast(IntType, std::numeric_limits<FloatType>::quiet_NaN()) &
55       ~__builtin_bit_cast(IntType, std::numeric_limits<FloatType>::signaling_NaN());
56   const IntType raw_bits = bit_cast<IntType>(arg);
57 
58   switch (FPClassify(arg)) {
59     case intrinsics::FPInfo::kNaN:
60       return (raw_bits & quiet_bit) ? 0b10'0000'0000 : 0b01'0000'0000;
61     case intrinsics::FPInfo::kInfinite:
62       return intrinsics::SignBit(arg) ? 0b00'0000'0001 : 0b00'1000'0000;
63     case intrinsics::FPInfo::kNormal:
64       return intrinsics::SignBit(arg) ? 0b00'0000'0010 : 0b00'0100'0000;
65     case intrinsics::FPInfo::kSubnormal:
66       return intrinsics::SignBit(arg) ? 0b00'0000'0100 : 0b00'0010'0000;
67     case intrinsics::FPInfo::kZero:
68       return intrinsics::SignBit(arg) ? 0b00'0000'1000 : 0b00'0001'0000;
69   }
70 }
71 
72 template <typename TargetOperandType,
73           typename SourceOperandType,
74           enum PreferredIntrinsicsImplementation>
FCvtFloatToFloat(int8_t rm,int8_t frm,SourceOperandType arg)75 std::tuple<TargetOperandType> FCvtFloatToFloat(int8_t rm, int8_t frm, SourceOperandType arg) {
76   static_assert(std::is_same_v<Float32, SourceOperandType> ||
77                 std::is_same_v<Float64, SourceOperandType>);
78   static_assert(std::is_same_v<Float32, TargetOperandType> ||
79                 std::is_same_v<Float64, TargetOperandType>);
80   if constexpr (sizeof(TargetOperandType) > sizeof(SourceOperandType)) {
81     // Conversion from narrow type to wide one ignores rm because all possible values from narrow
82     // type fit in the wide type.
83     return TargetOperandType(arg);
84   } else {
85     return intrinsics::ExecuteFloatOperation<TargetOperandType>(
86         rm, frm, [](auto x) { return typename TypeTraits<decltype(x)>::Narrow(x); }, arg);
87   }
88 }
89 
90 template <typename TargetOperandType,
91           typename SourceOperandType,
92           enum PreferredIntrinsicsImplementation>
FCvtFloatToInteger(int8_t rm,int8_t frm,SourceOperandType arg)93 std::tuple<TargetOperandType> FCvtFloatToInteger(int8_t rm, int8_t frm, SourceOperandType arg) {
94   static_assert(std::is_same_v<Float32, SourceOperandType> ||
95                 std::is_same_v<Float64, SourceOperandType>);
96   static_assert(std::is_integral_v<TargetOperandType>);
97   int8_t actual_rm = rm == FPFlags::DYN ? frm : rm;
98   SourceOperandType result = FPRound(arg, ToIntrinsicRoundingMode(actual_rm));
99   if constexpr (std::is_signed_v<TargetOperandType>) {
100     // Note: because of how two's complement numbers and floats work minimum negative number always
101     // either representable precisely or not prepresentable at all, but this is not true for minimal
102     // possible value.
103     // Use ~min() to guarantee no surprises with rounding.
104     constexpr float kMinInBoundsNegativeValue =
105         static_cast<float>(std::numeric_limits<TargetOperandType>::min());
106     constexpr float kMinNotInBoundsPositiveValue = static_cast<float>(-kMinInBoundsNegativeValue);
107     if (result < SourceOperandType{kMinInBoundsNegativeValue}) [[unlikely]] {
108       return std::numeric_limits<TargetOperandType>::min();
109     }
110     // Note: we have to ensure that NaN is properly handled by this comparison!
111     if (result < SourceOperandType{kMinNotInBoundsPositiveValue}) [[likely]] {
112       return static_cast<TargetOperandType>(result);
113     }
114   } else {
115     // Note: if value is less than zero then result of conversion from float/double to unsigned
116     // integer is undefined and thus clang/gcc happily use conversion cvttss2si without doing
117     // anything to handle negative numbers.  We need to handle that corner case here.
118     if (result < SourceOperandType{0.0f}) [[unlikely]] {
119       return 0;
120     }
121     // Similarly to signed interners case above, have to use -2.0f * min to properly handle NaNs.
122     constexpr float kMinNotInBoundsPositiveValue = static_cast<float>(
123         -2.0f *
124         static_cast<float>(std::numeric_limits<std::make_signed_t<TargetOperandType>>::min()));
125     // Note: we have to ensure that NaN is properly handled by this comparison!
126     if (result < SourceOperandType{kMinNotInBoundsPositiveValue}) [[likely]] {
127       return static_cast<TargetOperandType>(result);
128     }
129   }
130   // Handle too large numbers and NaN.
131   return std::numeric_limits<TargetOperandType>::max();
132 }
133 
134 template <typename TargetOperandType,
135           typename SourceOperandType,
136           enum PreferredIntrinsicsImplementation>
FCvtIntegerToFloat(int8_t,int8_t,SourceOperandType arg)137 std::tuple<TargetOperandType> FCvtIntegerToFloat(int8_t /*rm*/,
138                                                  int8_t /*frm*/,
139                                                  SourceOperandType arg) {
140   static_assert(std::is_integral_v<SourceOperandType>);
141   static_assert(std::is_same_v<Float32, TargetOperandType> ||
142                 std::is_same_v<Float64, TargetOperandType>);
143   // TODO(265372622): handle rm properly in integer-to-float and float-to-integer cases.
144   TargetOperandType result = static_cast<TargetOperandType>(arg);
145   return result;
146 }
147 
148 template <typename FloatType,
149           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FDiv(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)150 std::tuple<FloatType> FDiv(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
151   return intrinsics::ExecuteFloatOperation<FloatType>(
152       rm,
153       frm,
154       [](auto x, auto y) {
155         return std::get<0>(FDivHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
156       },
157       arg1,
158       arg2);
159 }
160 
161 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FDivHostRounding(FloatType arg1,FloatType arg2)162 std::tuple<FloatType> FDivHostRounding(FloatType arg1, FloatType arg2) {
163   return {arg1 / arg2};
164 }
165 
166 template <typename FloatType,
167           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FMAdd(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)168 std::tuple<FloatType> FMAdd(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2, FloatType arg3) {
169   return intrinsics::ExecuteFloatOperation<FloatType>(
170       rm,
171       frm,
172       [](auto x, auto y, auto z) {
173         return std::get<0>(
174             FMAddHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
175       },
176       arg1,
177       arg2,
178       arg3);
179 }
180 
181 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMAddHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)182 std::tuple<FloatType> FMAddHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
183   return {intrinsics::MulAdd(arg1, arg2, arg3)};
184 }
185 
186 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMax(FloatType x,FloatType y)187 std::tuple<FloatType> FMax(FloatType x, FloatType y) {
188   return {Max(x, y)};
189 }
190 
191 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMin(FloatType x,FloatType y)192 std::tuple<FloatType> FMin(FloatType x, FloatType y) {
193   return {Min(x, y)};
194 }
195 
196 template <typename FloatType,
197           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FMSub(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)198 std::tuple<FloatType> FMSub(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2, FloatType arg3) {
199   return intrinsics::ExecuteFloatOperation<FloatType>(
200       rm,
201       frm,
202       [](auto x, auto y, auto z) {
203         return std::get<0>(
204             FMSubHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
205       },
206       arg1,
207       arg2,
208       arg3);
209 }
210 
211 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMSubHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)212 std::tuple<FloatType> FMSubHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
213   return {intrinsics::MulAdd(arg1, arg2, intrinsics::Negative(arg3))};
214 }
215 
216 template <typename FloatType,
217           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FMul(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)218 std::tuple<FloatType> FMul(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
219   return intrinsics::ExecuteFloatOperation<FloatType>(
220       rm,
221       frm,
222       [](auto x, auto y) {
223         return std::get<0>(FMulHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
224       },
225       arg1,
226       arg2);
227 }
228 
229 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMulHostRounding(FloatType arg1,FloatType arg2)230 std::tuple<FloatType> FMulHostRounding(FloatType arg1, FloatType arg2) {
231   return {arg1 * arg2};
232 }
233 
234 template <typename FloatType,
235           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FNMAdd(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)236 std::tuple<FloatType> FNMAdd(int8_t rm,
237                              int8_t frm,
238                              FloatType arg1,
239                              FloatType arg2,
240                              FloatType arg3) {
241   return intrinsics::ExecuteFloatOperation<FloatType>(
242       rm,
243       frm,
244       [](auto x, auto y, auto z) {
245         return std::get<0>(
246             FNMAddHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
247       },
248       arg1,
249       arg2,
250       arg3);
251 }
252 
253 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FNMAddHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)254 std::tuple<FloatType> FNMAddHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
255   return {intrinsics::MulAdd(intrinsics::Negative(arg1), arg2, arg3)};
256 }
257 
258 template <typename FloatType,
259           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FNMSub(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)260 std::tuple<FloatType> FNMSub(int8_t rm,
261                              int8_t frm,
262                              FloatType arg1,
263                              FloatType arg2,
264                              FloatType arg3) {
265   return intrinsics::ExecuteFloatOperation<FloatType>(
266       rm,
267       frm,
268       [](auto x, auto y, auto z) {
269         return std::get<0>(
270             FNMSubHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
271       },
272       arg1,
273       arg2,
274       arg3);
275 }
276 
277 template <typename FloatType>
CanonicalizeNanTuple(std::tuple<FloatType> arg)278 FloatType CanonicalizeNanTuple(std::tuple<FloatType> arg) {
279   return std::get<0>(CanonicalizeNan<FloatType>(std::get<0>(arg)));
280 }
281 
282 template <typename FloatType>
RSqrtEstimate(FloatType op)283 FloatType RSqrtEstimate(FloatType op) {
284   if (SignBit(op)) {
285     // If argument is negative - return default NaN.
286     return std::numeric_limits<FloatType>::quiet_NaN();
287   }
288   switch (FPClassify(op)) {
289     case FPInfo::kNaN:
290       // If argument is NaN - return default NaN.
291       return std::numeric_limits<FloatType>::quiet_NaN();
292     case FPInfo::kInfinite:
293       return FloatType{0.0};
294     case FPInfo::kSubnormal:
295     case FPInfo::kZero:
296       // If operand is too small - return the appropriate infinity.
297       return CopySignBit(std::numeric_limits<FloatType>::infinity(), op);
298     case FPInfo::kNormal:
299       if constexpr (std::is_same_v<FloatType, Float32>) {
300         uint32_t op_32 = bit_cast<uint32_t>(op);
301         op_32 &= ~0xffff;
302         op_32 += 0x8000;
303         Float32 fp32 = bit_cast<Float32>(op_32);
304         fp32 = (FloatType{1.0} / Sqrt(fp32));
305         op_32 = bit_cast<uint32_t>(fp32);
306         op_32 += 0x4000;
307         op_32 &= ~0x7fff;
308         return bit_cast<Float32>(op_32);
309       } else {
310         static_assert(std::is_same_v<FloatType, Float64>);
311         uint64_t op_64 = bit_cast<uint64_t>(op);
312         op_64 &= ~0x1fff'ffff'ffff;
313         op_64 += 0x1000'0000'0000;
314         Float64 fp64 = bit_cast<Float64>(op_64);
315         fp64 = (FloatType{1.0} / Sqrt(fp64));
316         op_64 = bit_cast<uint64_t>(fp64);
317         op_64 += 0x0800'0000'0000;
318         op_64 &= ~0x0fff'ffff'ffff;
319         return bit_cast<Float64>(op_64);
320       }
321   }
322 }
323 
324 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FNMSubHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)325 std::tuple<FloatType> FNMSubHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
326   return {intrinsics::MulAdd(intrinsics::Negative(arg1), arg2, intrinsics::Negative(arg3))};
327 }
328 
329 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSgnj(FloatType x,FloatType y)330 std::tuple<FloatType> FSgnj(FloatType x, FloatType y) {
331   using Int = typename TypeTraits<FloatType>::Int;
332   using UInt = std::make_unsigned_t<Int>;
333   constexpr UInt sign_bit = std::numeric_limits<Int>::min();
334   constexpr UInt non_sign_bit = std::numeric_limits<Int>::max();
335   return {bit_cast<FloatType>((bit_cast<UInt>(x) & non_sign_bit) | (bit_cast<UInt>(y) & sign_bit))};
336 }
337 
338 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSgnjn(FloatType x,FloatType y)339 std::tuple<FloatType> FSgnjn(FloatType x, FloatType y) {
340   return FSgnj(x, Negative(y));
341 }
342 
343 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSgnjx(FloatType x,FloatType y)344 std::tuple<FloatType> FSgnjx(FloatType x, FloatType y) {
345   using Int = typename TypeTraits<FloatType>::Int;
346   using UInt = std::make_unsigned_t<Int>;
347   constexpr UInt sign_bit = std::numeric_limits<Int>::min();
348   return {bit_cast<FloatType>(bit_cast<UInt>(x) ^ (bit_cast<UInt>(y) & sign_bit))};
349 }
350 
351 template <typename FloatType,
352           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FSqrt(int8_t rm,int8_t frm,FloatType arg)353 std::tuple<FloatType> FSqrt(int8_t rm, int8_t frm, FloatType arg) {
354   return intrinsics::ExecuteFloatOperation<FloatType>(
355       rm,
356       frm,
357       [](auto x) {
358         return std::get<0>(FSqrtHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x));
359       },
360       arg);
361 }
362 
363 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSqrtHostRounding(FloatType arg)364 std::tuple<FloatType> FSqrtHostRounding(FloatType arg) {
365   return {Sqrt(arg)};
366 }
367 
368 template <typename FloatType,
369           enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FSub(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)370 std::tuple<FloatType> FSub(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
371   return intrinsics::ExecuteFloatOperation<FloatType>(
372       rm,
373       frm,
374       [](auto x, auto y) {
375         return std::get<0>(FSubHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
376       },
377       arg1,
378       arg2);
379 }
380 
381 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSubHostRounding(FloatType arg1,FloatType arg2)382 std::tuple<FloatType> FSubHostRounding(FloatType arg1, FloatType arg2) {
383   return {arg1 - arg2};
384 }
385 
386 }  // namespace berberis::intrinsics
387 
388 #endif  // BERBERIS_INTRINSICS_COMMON_INTRINSICS_H_
389