1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_INTRINSICS_INTRINSICS_FLOATING_POINT_IMPL_H_
18 #define BERBERIS_INTRINSICS_INTRINSICS_FLOATING_POINT_IMPL_H_
19
20 #include <limits>
21 #include <tuple>
22 #include <type_traits>
23
24 #include "berberis/base/bit_util.h"
25 #include "berberis/intrinsics/guest_cpu_flags.h"
26 #include "berberis/intrinsics/intrinsics.h"
27 #include "berberis/intrinsics/intrinsics_float.h" // Float32/Float64/ProcessNans
28 #include "berberis/intrinsics/type_traits.h"
29
30 namespace berberis::intrinsics {
31
32 template <typename FloatType,
33 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FAdd(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)34 std::tuple<FloatType> FAdd(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
35 return intrinsics::ExecuteFloatOperation<FloatType>(
36 rm,
37 frm,
38 [](auto x, auto y) {
39 return std::get<0>(FAddHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
40 },
41 arg1,
42 arg2);
43 }
44
45 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FAddHostRounding(FloatType arg1,FloatType arg2)46 std::tuple<FloatType> FAddHostRounding(FloatType arg1, FloatType arg2) {
47 return {arg1 + arg2};
48 }
49
50 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FClass(FloatType arg)51 std::tuple<uint64_t> FClass(FloatType arg) {
52 using IntType = std::make_unsigned_t<typename TypeTraits<FloatType>::Int>;
53 constexpr IntType quiet_bit =
54 __builtin_bit_cast(IntType, std::numeric_limits<FloatType>::quiet_NaN()) &
55 ~__builtin_bit_cast(IntType, std::numeric_limits<FloatType>::signaling_NaN());
56 const IntType raw_bits = bit_cast<IntType>(arg);
57
58 switch (FPClassify(arg)) {
59 case intrinsics::FPInfo::kNaN:
60 return (raw_bits & quiet_bit) ? 0b10'0000'0000 : 0b01'0000'0000;
61 case intrinsics::FPInfo::kInfinite:
62 return intrinsics::SignBit(arg) ? 0b00'0000'0001 : 0b00'1000'0000;
63 case intrinsics::FPInfo::kNormal:
64 return intrinsics::SignBit(arg) ? 0b00'0000'0010 : 0b00'0100'0000;
65 case intrinsics::FPInfo::kSubnormal:
66 return intrinsics::SignBit(arg) ? 0b00'0000'0100 : 0b00'0010'0000;
67 case intrinsics::FPInfo::kZero:
68 return intrinsics::SignBit(arg) ? 0b00'0000'1000 : 0b00'0001'0000;
69 }
70 }
71
72 template <typename TargetOperandType,
73 typename SourceOperandType,
74 enum PreferredIntrinsicsImplementation>
FCvtFloatToFloat(int8_t rm,int8_t frm,SourceOperandType arg)75 std::tuple<TargetOperandType> FCvtFloatToFloat(int8_t rm, int8_t frm, SourceOperandType arg) {
76 static_assert(std::is_same_v<Float32, SourceOperandType> ||
77 std::is_same_v<Float64, SourceOperandType>);
78 static_assert(std::is_same_v<Float32, TargetOperandType> ||
79 std::is_same_v<Float64, TargetOperandType>);
80 if constexpr (sizeof(TargetOperandType) > sizeof(SourceOperandType)) {
81 // Conversion from narrow type to wide one ignores rm because all possible values from narrow
82 // type fit in the wide type.
83 return TargetOperandType(arg);
84 } else {
85 return intrinsics::ExecuteFloatOperation<TargetOperandType>(
86 rm, frm, [](auto x) { return typename TypeTraits<decltype(x)>::Narrow(x); }, arg);
87 }
88 }
89
90 template <typename TargetOperandType,
91 typename SourceOperandType,
92 enum PreferredIntrinsicsImplementation>
FCvtFloatToInteger(int8_t rm,int8_t frm,SourceOperandType arg)93 std::tuple<TargetOperandType> FCvtFloatToInteger(int8_t rm, int8_t frm, SourceOperandType arg) {
94 static_assert(std::is_same_v<Float32, SourceOperandType> ||
95 std::is_same_v<Float64, SourceOperandType>);
96 static_assert(std::is_integral_v<TargetOperandType>);
97 int8_t actual_rm = rm == FPFlags::DYN ? frm : rm;
98 SourceOperandType result = FPRound(arg, ToIntrinsicRoundingMode(actual_rm));
99 if constexpr (std::is_signed_v<TargetOperandType>) {
100 // Note: because of how two's complement numbers and floats work minimum negative number always
101 // either representable precisely or not prepresentable at all, but this is not true for minimal
102 // possible value.
103 // Use ~min() to guarantee no surprises with rounding.
104 constexpr float kMinInBoundsNegativeValue =
105 static_cast<float>(std::numeric_limits<TargetOperandType>::min());
106 constexpr float kMinNotInBoundsPositiveValue = static_cast<float>(-kMinInBoundsNegativeValue);
107 if (result < SourceOperandType{kMinInBoundsNegativeValue}) [[unlikely]] {
108 return std::numeric_limits<TargetOperandType>::min();
109 }
110 // Note: we have to ensure that NaN is properly handled by this comparison!
111 if (result < SourceOperandType{kMinNotInBoundsPositiveValue}) [[likely]] {
112 return static_cast<TargetOperandType>(result);
113 }
114 } else {
115 // Note: if value is less than zero then result of conversion from float/double to unsigned
116 // integer is undefined and thus clang/gcc happily use conversion cvttss2si without doing
117 // anything to handle negative numbers. We need to handle that corner case here.
118 if (result < SourceOperandType{0.0f}) [[unlikely]] {
119 return 0;
120 }
121 // Similarly to signed interners case above, have to use -2.0f * min to properly handle NaNs.
122 constexpr float kMinNotInBoundsPositiveValue = static_cast<float>(
123 -2.0f *
124 static_cast<float>(std::numeric_limits<std::make_signed_t<TargetOperandType>>::min()));
125 // Note: we have to ensure that NaN is properly handled by this comparison!
126 if (result < SourceOperandType{kMinNotInBoundsPositiveValue}) [[likely]] {
127 return static_cast<TargetOperandType>(result);
128 }
129 }
130 // Handle too large numbers and NaN.
131 return std::numeric_limits<TargetOperandType>::max();
132 }
133
134 template <typename TargetOperandType,
135 typename SourceOperandType,
136 enum PreferredIntrinsicsImplementation>
FCvtIntegerToFloat(int8_t,int8_t,SourceOperandType arg)137 std::tuple<TargetOperandType> FCvtIntegerToFloat(int8_t /*rm*/,
138 int8_t /*frm*/,
139 SourceOperandType arg) {
140 static_assert(std::is_integral_v<SourceOperandType>);
141 static_assert(std::is_same_v<Float32, TargetOperandType> ||
142 std::is_same_v<Float64, TargetOperandType>);
143 // TODO(265372622): handle rm properly in integer-to-float and float-to-integer cases.
144 TargetOperandType result = static_cast<TargetOperandType>(arg);
145 return result;
146 }
147
148 template <typename FloatType,
149 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FDiv(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)150 std::tuple<FloatType> FDiv(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
151 return intrinsics::ExecuteFloatOperation<FloatType>(
152 rm,
153 frm,
154 [](auto x, auto y) {
155 return std::get<0>(FDivHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
156 },
157 arg1,
158 arg2);
159 }
160
161 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FDivHostRounding(FloatType arg1,FloatType arg2)162 std::tuple<FloatType> FDivHostRounding(FloatType arg1, FloatType arg2) {
163 return {arg1 / arg2};
164 }
165
166 template <typename FloatType,
167 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FMAdd(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)168 std::tuple<FloatType> FMAdd(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2, FloatType arg3) {
169 return intrinsics::ExecuteFloatOperation<FloatType>(
170 rm,
171 frm,
172 [](auto x, auto y, auto z) {
173 return std::get<0>(
174 FMAddHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
175 },
176 arg1,
177 arg2,
178 arg3);
179 }
180
181 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMAddHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)182 std::tuple<FloatType> FMAddHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
183 return {intrinsics::MulAdd(arg1, arg2, arg3)};
184 }
185
186 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMax(FloatType x,FloatType y)187 std::tuple<FloatType> FMax(FloatType x, FloatType y) {
188 return {Max(x, y)};
189 }
190
191 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMin(FloatType x,FloatType y)192 std::tuple<FloatType> FMin(FloatType x, FloatType y) {
193 return {Min(x, y)};
194 }
195
196 template <typename FloatType,
197 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FMSub(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)198 std::tuple<FloatType> FMSub(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2, FloatType arg3) {
199 return intrinsics::ExecuteFloatOperation<FloatType>(
200 rm,
201 frm,
202 [](auto x, auto y, auto z) {
203 return std::get<0>(
204 FMSubHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
205 },
206 arg1,
207 arg2,
208 arg3);
209 }
210
211 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMSubHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)212 std::tuple<FloatType> FMSubHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
213 return {intrinsics::MulAdd(arg1, arg2, intrinsics::Negative(arg3))};
214 }
215
216 template <typename FloatType,
217 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FMul(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)218 std::tuple<FloatType> FMul(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
219 return intrinsics::ExecuteFloatOperation<FloatType>(
220 rm,
221 frm,
222 [](auto x, auto y) {
223 return std::get<0>(FMulHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
224 },
225 arg1,
226 arg2);
227 }
228
229 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FMulHostRounding(FloatType arg1,FloatType arg2)230 std::tuple<FloatType> FMulHostRounding(FloatType arg1, FloatType arg2) {
231 return {arg1 * arg2};
232 }
233
234 template <typename FloatType,
235 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FNMAdd(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)236 std::tuple<FloatType> FNMAdd(int8_t rm,
237 int8_t frm,
238 FloatType arg1,
239 FloatType arg2,
240 FloatType arg3) {
241 return intrinsics::ExecuteFloatOperation<FloatType>(
242 rm,
243 frm,
244 [](auto x, auto y, auto z) {
245 return std::get<0>(
246 FNMAddHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
247 },
248 arg1,
249 arg2,
250 arg3);
251 }
252
253 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FNMAddHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)254 std::tuple<FloatType> FNMAddHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
255 return {intrinsics::MulAdd(intrinsics::Negative(arg1), arg2, arg3)};
256 }
257
258 template <typename FloatType,
259 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FNMSub(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2,FloatType arg3)260 std::tuple<FloatType> FNMSub(int8_t rm,
261 int8_t frm,
262 FloatType arg1,
263 FloatType arg2,
264 FloatType arg3) {
265 return intrinsics::ExecuteFloatOperation<FloatType>(
266 rm,
267 frm,
268 [](auto x, auto y, auto z) {
269 return std::get<0>(
270 FNMSubHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y, z));
271 },
272 arg1,
273 arg2,
274 arg3);
275 }
276
277 template <typename FloatType>
CanonicalizeNanTuple(std::tuple<FloatType> arg)278 FloatType CanonicalizeNanTuple(std::tuple<FloatType> arg) {
279 return std::get<0>(CanonicalizeNan<FloatType>(std::get<0>(arg)));
280 }
281
282 template <typename FloatType>
RSqrtEstimate(FloatType op)283 FloatType RSqrtEstimate(FloatType op) {
284 if (SignBit(op)) {
285 // If argument is negative - return default NaN.
286 return std::numeric_limits<FloatType>::quiet_NaN();
287 }
288 switch (FPClassify(op)) {
289 case FPInfo::kNaN:
290 // If argument is NaN - return default NaN.
291 return std::numeric_limits<FloatType>::quiet_NaN();
292 case FPInfo::kInfinite:
293 return FloatType{0.0};
294 case FPInfo::kSubnormal:
295 case FPInfo::kZero:
296 // If operand is too small - return the appropriate infinity.
297 return CopySignBit(std::numeric_limits<FloatType>::infinity(), op);
298 case FPInfo::kNormal:
299 if constexpr (std::is_same_v<FloatType, Float32>) {
300 uint32_t op_32 = bit_cast<uint32_t>(op);
301 op_32 &= ~0xffff;
302 op_32 += 0x8000;
303 Float32 fp32 = bit_cast<Float32>(op_32);
304 fp32 = (FloatType{1.0} / Sqrt(fp32));
305 op_32 = bit_cast<uint32_t>(fp32);
306 op_32 += 0x4000;
307 op_32 &= ~0x7fff;
308 return bit_cast<Float32>(op_32);
309 } else {
310 static_assert(std::is_same_v<FloatType, Float64>);
311 uint64_t op_64 = bit_cast<uint64_t>(op);
312 op_64 &= ~0x1fff'ffff'ffff;
313 op_64 += 0x1000'0000'0000;
314 Float64 fp64 = bit_cast<Float64>(op_64);
315 fp64 = (FloatType{1.0} / Sqrt(fp64));
316 op_64 = bit_cast<uint64_t>(fp64);
317 op_64 += 0x0800'0000'0000;
318 op_64 &= ~0x0fff'ffff'ffff;
319 return bit_cast<Float64>(op_64);
320 }
321 }
322 }
323
324 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FNMSubHostRounding(FloatType arg1,FloatType arg2,FloatType arg3)325 std::tuple<FloatType> FNMSubHostRounding(FloatType arg1, FloatType arg2, FloatType arg3) {
326 return {intrinsics::MulAdd(intrinsics::Negative(arg1), arg2, intrinsics::Negative(arg3))};
327 }
328
329 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSgnj(FloatType x,FloatType y)330 std::tuple<FloatType> FSgnj(FloatType x, FloatType y) {
331 using Int = typename TypeTraits<FloatType>::Int;
332 using UInt = std::make_unsigned_t<Int>;
333 constexpr UInt sign_bit = std::numeric_limits<Int>::min();
334 constexpr UInt non_sign_bit = std::numeric_limits<Int>::max();
335 return {bit_cast<FloatType>((bit_cast<UInt>(x) & non_sign_bit) | (bit_cast<UInt>(y) & sign_bit))};
336 }
337
338 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSgnjn(FloatType x,FloatType y)339 std::tuple<FloatType> FSgnjn(FloatType x, FloatType y) {
340 return FSgnj(x, Negative(y));
341 }
342
343 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSgnjx(FloatType x,FloatType y)344 std::tuple<FloatType> FSgnjx(FloatType x, FloatType y) {
345 using Int = typename TypeTraits<FloatType>::Int;
346 using UInt = std::make_unsigned_t<Int>;
347 constexpr UInt sign_bit = std::numeric_limits<Int>::min();
348 return {bit_cast<FloatType>(bit_cast<UInt>(x) ^ (bit_cast<UInt>(y) & sign_bit))};
349 }
350
351 template <typename FloatType,
352 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FSqrt(int8_t rm,int8_t frm,FloatType arg)353 std::tuple<FloatType> FSqrt(int8_t rm, int8_t frm, FloatType arg) {
354 return intrinsics::ExecuteFloatOperation<FloatType>(
355 rm,
356 frm,
357 [](auto x) {
358 return std::get<0>(FSqrtHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x));
359 },
360 arg);
361 }
362
363 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSqrtHostRounding(FloatType arg)364 std::tuple<FloatType> FSqrtHostRounding(FloatType arg) {
365 return {Sqrt(arg)};
366 }
367
368 template <typename FloatType,
369 enum PreferredIntrinsicsImplementation kPreferredIntrinsicsImplementation>
FSub(int8_t rm,int8_t frm,FloatType arg1,FloatType arg2)370 std::tuple<FloatType> FSub(int8_t rm, int8_t frm, FloatType arg1, FloatType arg2) {
371 return intrinsics::ExecuteFloatOperation<FloatType>(
372 rm,
373 frm,
374 [](auto x, auto y) {
375 return std::get<0>(FSubHostRounding<decltype(x), kPreferredIntrinsicsImplementation>(x, y));
376 },
377 arg1,
378 arg2);
379 }
380
381 template <typename FloatType, enum PreferredIntrinsicsImplementation>
FSubHostRounding(FloatType arg1,FloatType arg2)382 std::tuple<FloatType> FSubHostRounding(FloatType arg1, FloatType arg2) {
383 return {arg1 - arg2};
384 }
385
386 } // namespace berberis::intrinsics
387
388 #endif // BERBERIS_INTRINSICS_COMMON_INTRINSICS_H_
389