1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COMMON_TO_X86_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
18 #define COMMON_TO_X86_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
19 
20 #include <cmath>
21 
22 #include "berberis/base/bit_util.h"
23 #include "berberis/base/logging.h"
24 #include "berberis/intrinsics/common/intrinsics_float.h"  // Float32/Float64
25 #include "berberis/intrinsics/guest_rounding_modes.h"     // FE_HOSTROUND/FE_TIESAWAY
26 
27 namespace berberis::intrinsics {
28 
29 #define MAKE_BINARY_OPERATOR(guest_name, operator_name, assignment_name)                \
30                                                                                         \
31   inline Float32 operator operator_name(const Float32& v1, const Float32& v2) {         \
32     Float32 result;                                                                     \
33     asm(#guest_name "ss %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
34     return result;                                                                      \
35   }                                                                                     \
36                                                                                         \
37   inline Float32& operator assignment_name(Float32& v1, const Float32& v2) {            \
38     asm(#guest_name "ss %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_));     \
39     return v1;                                                                          \
40   }                                                                                     \
41                                                                                         \
42   inline Float64 operator operator_name(const Float64& v1, const Float64& v2) {         \
43     Float64 result;                                                                     \
44     asm(#guest_name "sd %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
45     return result;                                                                      \
46   }                                                                                     \
47                                                                                         \
48   inline Float64& operator assignment_name(Float64& v1, const Float64& v2) {            \
49     asm(#guest_name "sd %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_));     \
50     return v1;                                                                          \
51   }
52 
53 MAKE_BINARY_OPERATOR(add, +, +=)
54 MAKE_BINARY_OPERATOR(sub, -, -=)
55 MAKE_BINARY_OPERATOR(mul, *, *=)
56 MAKE_BINARY_OPERATOR(div, /, /=)
57 
58 #undef MAKE_BINARY_OPERATOR
59 
60 inline bool operator<(const Float32& v1, const Float32& v2) {
61   bool result;
62   asm("ucomiss %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
63   return result;
64 }
65 
66 inline bool operator<(const Float64& v1, const Float64& v2) {
67   bool result;
68   asm("ucomisd %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
69   return result;
70 }
71 
72 inline bool operator>(const Float32& v1, const Float32& v2) {
73   bool result;
74   asm("ucomiss %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
75   return result;
76 }
77 
78 inline bool operator>(const Float64& v1, const Float64& v2) {
79   bool result;
80   asm("ucomisd %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
81   return result;
82 }
83 
84 inline bool operator<=(const Float32& v1, const Float32& v2) {
85   bool result;
86   asm("ucomiss %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
87   return result;
88 }
89 
90 inline bool operator<=(const Float64& v1, const Float64& v2) {
91   bool result;
92   asm("ucomisd %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
93   return result;
94 }
95 
96 inline bool operator>=(const Float32& v1, const Float32& v2) {
97   bool result;
98   asm("ucomiss %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
99   return result;
100 }
101 
102 inline bool operator>=(const Float64& v1, const Float64& v2) {
103   bool result;
104   asm("ucomisd %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
105   return result;
106 }
107 
108 inline bool operator==(const Float32& v1, const Float32& v2) {
109   float result;
110   asm("cmpeqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
111   return bit_cast<uint32_t, float>(result) & 0x1;
112 }
113 
114 inline bool operator==(const Float64& v1, const Float64& v2) {
115   double result;
116   asm("cmpeqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
117   return bit_cast<uint64_t, double>(result) & 0x1;
118 }
119 
120 inline bool operator!=(const Float32& v1, const Float32& v2) {
121   float result;
122   asm("cmpneqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
123   return bit_cast<uint32_t, float>(result) & 0x1;
124 }
125 
126 inline bool operator!=(const Float64& v1, const Float64& v2) {
127   double result;
128   asm("cmpneqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
129   return bit_cast<uint64_t, double>(result) & 0x1;
130 }
131 
132 // It's NOT safe to use ANY functions which return float or double.  That's because IA32 ABI uses
133 // x87 stack to pass arguments (and does that even with -mfpmath=sse) and NaN float and
134 // double values would be corrupted if pushed on it.
135 //
136 // It's safe to use builtins here if that file is compiled with -mfpmath=sse (clang does not have
137 // such flag but uses SSE whenever possible, GCC needs both -msse2 and -mfpmath=sse) since builtins
138 // DON'T use an official calling conventions but are instead embedded in the function - even if all
139 // optimizations are disabled.
140 
CopySignBit(const Float32 & v1,const Float32 & v2)141 inline Float32 CopySignBit(const Float32& v1, const Float32& v2) {
142   return Float32(__builtin_copysignf(v1.value_, v2.value_));
143 }
144 
CopySignBit(const Float64 & v1,const Float64 & v2)145 inline Float64 CopySignBit(const Float64& v1, const Float64& v2) {
146   return Float64(__builtin_copysign(v1.value_, v2.value_));
147 }
148 
Absolute(const Float32 & v)149 inline Float32 Absolute(const Float32& v) {
150   return Float32(__builtin_fabsf(v.value_));
151 }
152 
Absolute(const Float64 & v)153 inline Float64 Absolute(const Float64& v) {
154   return Float64(__builtin_fabs(v.value_));
155 }
156 
Negative(const Float32 & v)157 inline Float32 Negative(const Float32& v) {
158   // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
159   Float32 result;
160   uint64_t sign_bit = 0x80000000U;
161   asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
162   return result;
163 }
164 
Negative(const Float64 & v)165 inline Float64 Negative(const Float64& v) {
166   // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
167   Float64 result;
168   uint64_t sign_bit = 0x8000000000000000ULL;
169   asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
170   return result;
171 }
172 
FPClassify(const Float32 & v)173 inline FPInfo FPClassify(const Float32& v) {
174   return static_cast<FPInfo>(__builtin_fpclassify(static_cast<int>(FPInfo::kNaN),
175                                                   static_cast<int>(FPInfo::kInfinite),
176                                                   static_cast<int>(FPInfo::kNormal),
177                                                   static_cast<int>(FPInfo::kSubnormal),
178                                                   static_cast<int>(FPInfo::kZero),
179                                                   v.value_));
180 }
181 
FPClassify(const Float64 & v)182 inline FPInfo FPClassify(const Float64& v) {
183   return static_cast<FPInfo>(__builtin_fpclassify(static_cast<int>(FPInfo::kNaN),
184                                                   static_cast<int>(FPInfo::kInfinite),
185                                                   static_cast<int>(FPInfo::kNormal),
186                                                   static_cast<int>(FPInfo::kSubnormal),
187                                                   static_cast<int>(FPInfo::kZero),
188                                                   v.value_));
189 }
190 
FPRound(const Float32 & value,uint32_t round_control)191 inline Float32 FPRound(const Float32& value, uint32_t round_control) {
192   Float32 result;
193   switch (round_control) {
194     case FE_HOSTROUND:
195       asm("roundss $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
196       break;
197     case FE_TONEAREST:
198       asm("roundss $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
199       break;
200     case FE_DOWNWARD:
201       asm("roundss $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
202       break;
203     case FE_UPWARD:
204       asm("roundss $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
205       break;
206     case FE_TOWARDZERO:
207       asm("roundss $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
208       break;
209     case FE_TIESAWAY:
210       // TODO(b/146437763): Might fail if value doesn't have a floating part.
211       if (value == FPRound(value, FE_DOWNWARD) + Float32(0.5)) {
212         result = value > Float32(0.0) ? FPRound(value, FE_UPWARD) : FPRound(value, FE_DOWNWARD);
213       } else {
214         result = FPRound(value, FE_TONEAREST);
215       }
216       break;
217     default:
218       LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
219       result.value_ = 0.f;
220   }
221   return result;
222 }
223 
FPRound(const Float64 & value,uint32_t round_control)224 inline Float64 FPRound(const Float64& value, uint32_t round_control) {
225   Float64 result;
226   switch (round_control) {
227     case FE_HOSTROUND:
228       asm("roundsd $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
229       break;
230     case FE_TONEAREST:
231       asm("roundsd $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
232       break;
233     case FE_DOWNWARD:
234       asm("roundsd $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
235       break;
236     case FE_UPWARD:
237       asm("roundsd $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
238       break;
239     case FE_TOWARDZERO:
240       asm("roundsd $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
241       break;
242     case FE_TIESAWAY:
243       // Since x86 does not support this rounding mode exactly, we must manually handle the
244       // tie-aways (from (-)x.5)
245       if (value == FPRound(value, FE_DOWNWARD)) {
246         // Value is already an integer and can be returned as-is. Checking this first avoids dealing
247         // with numbers too large to be able to have a fractional part.
248         return value;
249       } else if (value == FPRound(value, FE_DOWNWARD) + Float64(0.5)) {
250         // Fraction part is exactly 1/2, in which case we need to tie-away
251         result = value > Float64(0.0) ? FPRound(value, FE_UPWARD) : FPRound(value, FE_DOWNWARD);
252       } else {
253         // Any other case can be handled by to-nearest rounding.
254         result = FPRound(value, FE_TONEAREST);
255       }
256       break;
257     default:
258       LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
259       result.value_ = 0.;
260   }
261   return result;
262 }
263 
IsNan(const Float32 & v)264 inline int IsNan(const Float32& v) {
265   return __builtin_isnan(v.value_);
266 }
267 
IsNan(const Float64 & v)268 inline int IsNan(const Float64& v) {
269   return __builtin_isnan(v.value_);
270 }
271 
SignBit(const Float32 & v)272 inline int SignBit(const Float32& v) {
273   return __builtin_signbitf(v.value_);
274 }
275 
SignBit(const Float64 & v)276 inline int SignBit(const Float64& v) {
277   return __builtin_signbit(v.value_);
278 }
279 
Sqrt(const Float32 & v)280 inline Float32 Sqrt(const Float32& v) {
281   return Float32(__builtin_sqrtf(v.value_));
282 }
283 
Sqrt(const Float64 & v)284 inline Float64 Sqrt(const Float64& v) {
285   return Float64(__builtin_sqrt(v.value_));
286 }
287 
288 // x*y + z
MulAdd(const Float32 & v1,const Float32 & v2,const Float32 & v3)289 inline Float32 MulAdd(const Float32& v1, const Float32& v2, const Float32& v3) {
290   return Float32(fmaf(v1.value_, v2.value_, v3.value_));
291 }
292 
MulAdd(const Float64 & v1,const Float64 & v2,const Float64 & v3)293 inline Float64 MulAdd(const Float64& v1, const Float64& v2, const Float64& v3) {
294   return Float64(fma(v1.value_, v2.value_, v3.value_));
295 }
296 
297 }  // namespace berberis::intrinsics
298 
299 #endif  // COMMON_TO_X86_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
300