1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef COMMON_TO_X86_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
18 #define COMMON_TO_X86_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
19
20 #include <cmath>
21
22 #include "berberis/base/bit_util.h"
23 #include "berberis/base/logging.h"
24 #include "berberis/intrinsics/common/intrinsics_float.h" // Float32/Float64
25 #include "berberis/intrinsics/guest_rounding_modes.h" // FE_HOSTROUND/FE_TIESAWAY
26
27 namespace berberis::intrinsics {
28
29 #define MAKE_BINARY_OPERATOR(guest_name, operator_name, assignment_name) \
30 \
31 inline Float32 operator operator_name(const Float32& v1, const Float32& v2) { \
32 Float32 result; \
33 asm(#guest_name "ss %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
34 return result; \
35 } \
36 \
37 inline Float32& operator assignment_name(Float32& v1, const Float32& v2) { \
38 asm(#guest_name "ss %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_)); \
39 return v1; \
40 } \
41 \
42 inline Float64 operator operator_name(const Float64& v1, const Float64& v2) { \
43 Float64 result; \
44 asm(#guest_name "sd %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
45 return result; \
46 } \
47 \
48 inline Float64& operator assignment_name(Float64& v1, const Float64& v2) { \
49 asm(#guest_name "sd %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_)); \
50 return v1; \
51 }
52
53 MAKE_BINARY_OPERATOR(add, +, +=)
54 MAKE_BINARY_OPERATOR(sub, -, -=)
55 MAKE_BINARY_OPERATOR(mul, *, *=)
56 MAKE_BINARY_OPERATOR(div, /, /=)
57
58 #undef MAKE_BINARY_OPERATOR
59
60 inline bool operator<(const Float32& v1, const Float32& v2) {
61 bool result;
62 asm("ucomiss %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
63 return result;
64 }
65
66 inline bool operator<(const Float64& v1, const Float64& v2) {
67 bool result;
68 asm("ucomisd %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
69 return result;
70 }
71
72 inline bool operator>(const Float32& v1, const Float32& v2) {
73 bool result;
74 asm("ucomiss %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
75 return result;
76 }
77
78 inline bool operator>(const Float64& v1, const Float64& v2) {
79 bool result;
80 asm("ucomisd %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
81 return result;
82 }
83
84 inline bool operator<=(const Float32& v1, const Float32& v2) {
85 bool result;
86 asm("ucomiss %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
87 return result;
88 }
89
90 inline bool operator<=(const Float64& v1, const Float64& v2) {
91 bool result;
92 asm("ucomisd %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
93 return result;
94 }
95
96 inline bool operator>=(const Float32& v1, const Float32& v2) {
97 bool result;
98 asm("ucomiss %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
99 return result;
100 }
101
102 inline bool operator>=(const Float64& v1, const Float64& v2) {
103 bool result;
104 asm("ucomisd %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
105 return result;
106 }
107
108 inline bool operator==(const Float32& v1, const Float32& v2) {
109 float result;
110 asm("cmpeqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
111 return bit_cast<uint32_t, float>(result) & 0x1;
112 }
113
114 inline bool operator==(const Float64& v1, const Float64& v2) {
115 double result;
116 asm("cmpeqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
117 return bit_cast<uint64_t, double>(result) & 0x1;
118 }
119
120 inline bool operator!=(const Float32& v1, const Float32& v2) {
121 float result;
122 asm("cmpneqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
123 return bit_cast<uint32_t, float>(result) & 0x1;
124 }
125
126 inline bool operator!=(const Float64& v1, const Float64& v2) {
127 double result;
128 asm("cmpneqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
129 return bit_cast<uint64_t, double>(result) & 0x1;
130 }
131
132 // It's NOT safe to use ANY functions which return float or double. That's because IA32 ABI uses
133 // x87 stack to pass arguments (and does that even with -mfpmath=sse) and NaN float and
134 // double values would be corrupted if pushed on it.
135 //
136 // It's safe to use builtins here if that file is compiled with -mfpmath=sse (clang does not have
137 // such flag but uses SSE whenever possible, GCC needs both -msse2 and -mfpmath=sse) since builtins
138 // DON'T use an official calling conventions but are instead embedded in the function - even if all
139 // optimizations are disabled.
140
CopySignBit(const Float32 & v1,const Float32 & v2)141 inline Float32 CopySignBit(const Float32& v1, const Float32& v2) {
142 return Float32(__builtin_copysignf(v1.value_, v2.value_));
143 }
144
CopySignBit(const Float64 & v1,const Float64 & v2)145 inline Float64 CopySignBit(const Float64& v1, const Float64& v2) {
146 return Float64(__builtin_copysign(v1.value_, v2.value_));
147 }
148
Absolute(const Float32 & v)149 inline Float32 Absolute(const Float32& v) {
150 return Float32(__builtin_fabsf(v.value_));
151 }
152
Absolute(const Float64 & v)153 inline Float64 Absolute(const Float64& v) {
154 return Float64(__builtin_fabs(v.value_));
155 }
156
Negative(const Float32 & v)157 inline Float32 Negative(const Float32& v) {
158 // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
159 Float32 result;
160 uint64_t sign_bit = 0x80000000U;
161 asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
162 return result;
163 }
164
Negative(const Float64 & v)165 inline Float64 Negative(const Float64& v) {
166 // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
167 Float64 result;
168 uint64_t sign_bit = 0x8000000000000000ULL;
169 asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
170 return result;
171 }
172
FPClassify(const Float32 & v)173 inline FPInfo FPClassify(const Float32& v) {
174 return static_cast<FPInfo>(__builtin_fpclassify(static_cast<int>(FPInfo::kNaN),
175 static_cast<int>(FPInfo::kInfinite),
176 static_cast<int>(FPInfo::kNormal),
177 static_cast<int>(FPInfo::kSubnormal),
178 static_cast<int>(FPInfo::kZero),
179 v.value_));
180 }
181
FPClassify(const Float64 & v)182 inline FPInfo FPClassify(const Float64& v) {
183 return static_cast<FPInfo>(__builtin_fpclassify(static_cast<int>(FPInfo::kNaN),
184 static_cast<int>(FPInfo::kInfinite),
185 static_cast<int>(FPInfo::kNormal),
186 static_cast<int>(FPInfo::kSubnormal),
187 static_cast<int>(FPInfo::kZero),
188 v.value_));
189 }
190
FPRound(const Float32 & value,uint32_t round_control)191 inline Float32 FPRound(const Float32& value, uint32_t round_control) {
192 Float32 result;
193 switch (round_control) {
194 case FE_HOSTROUND:
195 asm("roundss $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
196 break;
197 case FE_TONEAREST:
198 asm("roundss $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
199 break;
200 case FE_DOWNWARD:
201 asm("roundss $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
202 break;
203 case FE_UPWARD:
204 asm("roundss $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
205 break;
206 case FE_TOWARDZERO:
207 asm("roundss $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
208 break;
209 case FE_TIESAWAY:
210 // TODO(b/146437763): Might fail if value doesn't have a floating part.
211 if (value == FPRound(value, FE_DOWNWARD) + Float32(0.5)) {
212 result = value > Float32(0.0) ? FPRound(value, FE_UPWARD) : FPRound(value, FE_DOWNWARD);
213 } else {
214 result = FPRound(value, FE_TONEAREST);
215 }
216 break;
217 default:
218 LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
219 result.value_ = 0.f;
220 }
221 return result;
222 }
223
FPRound(const Float64 & value,uint32_t round_control)224 inline Float64 FPRound(const Float64& value, uint32_t round_control) {
225 Float64 result;
226 switch (round_control) {
227 case FE_HOSTROUND:
228 asm("roundsd $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
229 break;
230 case FE_TONEAREST:
231 asm("roundsd $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
232 break;
233 case FE_DOWNWARD:
234 asm("roundsd $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
235 break;
236 case FE_UPWARD:
237 asm("roundsd $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
238 break;
239 case FE_TOWARDZERO:
240 asm("roundsd $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
241 break;
242 case FE_TIESAWAY:
243 // Since x86 does not support this rounding mode exactly, we must manually handle the
244 // tie-aways (from (-)x.5)
245 if (value == FPRound(value, FE_DOWNWARD)) {
246 // Value is already an integer and can be returned as-is. Checking this first avoids dealing
247 // with numbers too large to be able to have a fractional part.
248 return value;
249 } else if (value == FPRound(value, FE_DOWNWARD) + Float64(0.5)) {
250 // Fraction part is exactly 1/2, in which case we need to tie-away
251 result = value > Float64(0.0) ? FPRound(value, FE_UPWARD) : FPRound(value, FE_DOWNWARD);
252 } else {
253 // Any other case can be handled by to-nearest rounding.
254 result = FPRound(value, FE_TONEAREST);
255 }
256 break;
257 default:
258 LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
259 result.value_ = 0.;
260 }
261 return result;
262 }
263
IsNan(const Float32 & v)264 inline int IsNan(const Float32& v) {
265 return __builtin_isnan(v.value_);
266 }
267
IsNan(const Float64 & v)268 inline int IsNan(const Float64& v) {
269 return __builtin_isnan(v.value_);
270 }
271
SignBit(const Float32 & v)272 inline int SignBit(const Float32& v) {
273 return __builtin_signbitf(v.value_);
274 }
275
SignBit(const Float64 & v)276 inline int SignBit(const Float64& v) {
277 return __builtin_signbit(v.value_);
278 }
279
Sqrt(const Float32 & v)280 inline Float32 Sqrt(const Float32& v) {
281 return Float32(__builtin_sqrtf(v.value_));
282 }
283
Sqrt(const Float64 & v)284 inline Float64 Sqrt(const Float64& v) {
285 return Float64(__builtin_sqrt(v.value_));
286 }
287
288 // x*y + z
MulAdd(const Float32 & v1,const Float32 & v2,const Float32 & v3)289 inline Float32 MulAdd(const Float32& v1, const Float32& v2, const Float32& v3) {
290 return Float32(fmaf(v1.value_, v2.value_, v3.value_));
291 }
292
MulAdd(const Float64 & v1,const Float64 & v2,const Float64 & v3)293 inline Float64 MulAdd(const Float64& v1, const Float64& v2, const Float64& v3) {
294 return Float64(fma(v1.value_, v2.value_, v3.value_));
295 }
296
297 } // namespace berberis::intrinsics
298
299 #endif // COMMON_TO_X86_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
300