1 //===-- x86_64 floating point env manipulation functions --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_UTILS_FPUTIL_X86_64_FENV_H
10 #define LLVM_LIBC_UTILS_FPUTIL_X86_64_FENV_H
11 
12 #include <fenv.h>
13 #include <stdint.h>
14 
15 namespace __llvm_libc {
16 namespace fputil {
17 
18 namespace internal {
19 
20 // Normally, one should be able to define FE_* macros to the exact rounding mode
21 // encodings. However, since we want LLVM libc to be compiled against headers
22 // from other libcs, we cannot assume that FE_* macros are always defined in
23 // such a manner. So, we will define enums corresponding to the x86_64 bit
24 // encodings. The implementations can map from FE_* to the corresponding enum
25 // values.
26 
27 // The rounding control values in the x87 control register and the MXCSR
28 // register have the same 2-bit enoding but have different bit positions.
29 // See below for the bit positions.
30 struct RoundingControlValue {
31   static constexpr uint16_t ToNearest = 0x0;
32   static constexpr uint16_t Downward = 0x1;
33   static constexpr uint16_t Upward = 0x2;
34   static constexpr uint16_t TowardZero = 0x3;
35 };
36 
37 static constexpr uint16_t X87RoundingControlBitPosition = 10;
38 static constexpr uint16_t MXCSRRoundingControlBitPosition = 13;
39 
40 // The exception flags in the x87 status register and the MXCSR have the same
41 // encoding as well as the same bit positions.
42 struct ExceptionFlags {
43   static constexpr uint16_t Invalid = 0x1;
44   static constexpr uint16_t Denormal = 0x2; // This flag is not used
45   static constexpr uint16_t DivByZero = 0x4;
46   static constexpr uint16_t Overflow = 0x8;
47   static constexpr uint16_t Underflow = 0x10;
48   static constexpr uint16_t Inexact = 0x20;
49 };
50 
51 // The exception control bits occupy six bits, one bit for each exception.
52 // In the x87 control word, they occupy the first 6 bits. In the MXCSR
53 // register, they occupy bits 7 to 12.
54 static constexpr uint16_t X87ExceptionControlBitPosition = 0;
55 static constexpr uint16_t MXCSRExceptionContolBitPoistion = 7;
56 
57 // Exception flags are individual bits in the corresponding registers.
58 // So, we just OR the bit values to get the full set of exceptions.
getStatusValueForExcept(int excepts)59 static inline uint16_t getStatusValueForExcept(int excepts) {
60   // We will make use of the fact that exception control bits are single
61   // bit flags in the control registers.
62   return (excepts & FE_INVALID ? ExceptionFlags::Invalid : 0) |
63          (excepts & FE_DIVBYZERO ? ExceptionFlags::DivByZero : 0) |
64          (excepts & FE_OVERFLOW ? ExceptionFlags::Overflow : 0) |
65          (excepts & FE_UNDERFLOW ? ExceptionFlags::Underflow : 0) |
66          (excepts & FE_INEXACT ? ExceptionFlags::Inexact : 0);
67 }
68 
exceptionStatusToMacro(uint16_t status)69 static inline int exceptionStatusToMacro(uint16_t status) {
70   return (status & ExceptionFlags::Invalid ? FE_INVALID : 0) |
71          (status & ExceptionFlags::DivByZero ? FE_DIVBYZERO : 0) |
72          (status & ExceptionFlags::Overflow ? FE_OVERFLOW : 0) |
73          (status & ExceptionFlags::Underflow ? FE_UNDERFLOW : 0) |
74          (status & ExceptionFlags::Inexact ? FE_INEXACT : 0);
75 }
76 
77 struct X87State {
78   uint16_t ControlWord;
79   uint16_t Unused1;
80   uint16_t StatusWord;
81   uint16_t Unused2;
82   // TODO: Elaborate the remaining 20 bytes as required.
83   uint32_t _[5];
84 };
85 
getX87ControlWord()86 static inline uint16_t getX87ControlWord() {
87   uint16_t w;
88   __asm__ __volatile__("fnstcw %0" : "=m"(w)::);
89   return w;
90 }
91 
writeX87ControlWord(uint16_t w)92 static inline void writeX87ControlWord(uint16_t w) {
93   __asm__ __volatile__("fldcw %0" : : "m"(w) :);
94 }
95 
getX87StatusWord()96 static inline uint16_t getX87StatusWord() {
97   uint16_t w;
98   __asm__ __volatile__("fnstsw %0" : "=m"(w)::);
99   return w;
100 }
101 
clearX87Exceptions()102 static inline void clearX87Exceptions() {
103   __asm__ __volatile__("fnclex" : : :);
104 }
105 
getMXCSR()106 static inline uint32_t getMXCSR() {
107   uint32_t w;
108   __asm__ __volatile__("stmxcsr %0" : "=m"(w)::);
109   return w;
110 }
111 
writeMXCSR(uint32_t w)112 static inline void writeMXCSR(uint32_t w) {
113   __asm__ __volatile__("ldmxcsr %0" : : "m"(w) :);
114 }
115 
getX87State(X87State & s)116 static inline void getX87State(X87State &s) {
117   __asm__ __volatile__("fnstenv %0" : "=m"(s));
118 }
119 
writeX87State(const X87State & s)120 static inline void writeX87State(const X87State &s) {
121   __asm__ __volatile__("fldenv %0" : : "m"(s) :);
122 }
123 
fwait()124 static inline void fwait() { __asm__ __volatile__("fwait"); }
125 
126 } // namespace internal
127 
enableExcept(int excepts)128 static inline int enableExcept(int excepts) {
129   // In the x87 control word and in MXCSR, an exception is blocked
130   // if the corresponding bit is set. That is the reason for all the
131   // bit-flip operations below as we need to turn the bits to zero
132   // to enable them.
133 
134   uint16_t bitMask = internal::getStatusValueForExcept(excepts);
135 
136   uint16_t x87CW = internal::getX87ControlWord();
137   uint16_t oldExcepts = ~x87CW & 0x3F; // Save previously enabled exceptions.
138   x87CW &= ~bitMask;
139   internal::writeX87ControlWord(x87CW);
140 
141   // Enabling SSE exceptions via MXCSR is a nice thing to do but
142   // might not be of much use practically as SSE exceptions and the x87
143   // exceptions are independent of each other.
144   uint32_t mxcsr = internal::getMXCSR();
145   mxcsr &= ~(bitMask << internal::MXCSRExceptionContolBitPoistion);
146   internal::writeMXCSR(mxcsr);
147 
148   // Since the x87 exceptions and SSE exceptions are independent of each,
149   // it doesn't make much sence to report both in the return value. Most
150   // often, the standard floating point functions deal with FPU operations
151   // so we will retrun only the old x87 exceptions.
152   return internal::exceptionStatusToMacro(oldExcepts);
153 }
154 
disableExcept(int excepts)155 static inline int disableExcept(int excepts) {
156   // In the x87 control word and in MXCSR, an exception is blocked
157   // if the corresponding bit is set.
158 
159   uint16_t bitMask = internal::getStatusValueForExcept(excepts);
160 
161   uint16_t x87CW = internal::getX87ControlWord();
162   uint16_t oldExcepts = ~x87CW & 0x3F; // Save previously enabled exceptions.
163   x87CW |= bitMask;
164   internal::writeX87ControlWord(x87CW);
165 
166   // Just like in enableExcept, it is not clear if disabling SSE exceptions
167   // is required. But, we will still do it only as a "nice thing to do".
168   uint32_t mxcsr = internal::getMXCSR();
169   mxcsr |= (bitMask << internal::MXCSRExceptionContolBitPoistion);
170   internal::writeMXCSR(mxcsr);
171 
172   return internal::exceptionStatusToMacro(oldExcepts);
173 }
174 
clearExcept(int excepts)175 static inline int clearExcept(int excepts) {
176   // An instruction to write to x87 status word ins't available. So, we
177   // just clear all of the x87 exceptions.
178   // TODO: One can potentially use fegetenv/fesetenv to clear only the
179   // listed exceptions in the x87 status word. We can do this if it is
180   // really required.
181   internal::clearX87Exceptions();
182 
183   uint32_t mxcsr = internal::getMXCSR();
184   mxcsr &= ~internal::getStatusValueForExcept(excepts);
185   internal::writeMXCSR(mxcsr);
186   return 0;
187 }
188 
testExcept(int excepts)189 static inline int testExcept(int excepts) {
190   uint16_t statusValue = internal::getStatusValueForExcept(excepts);
191   // Check both x87 status word and MXCSR.
192   return internal::exceptionStatusToMacro(
193       (statusValue & internal::getX87StatusWord()) |
194       (statusValue & internal::getMXCSR()));
195 }
196 
raiseExcept(int excepts)197 static inline int raiseExcept(int excepts) {
198   uint16_t statusValue = internal::getStatusValueForExcept(excepts);
199 
200   // We set the status flag for exception one at a time and call the
201   // fwait instruction to actually get the processor to raise the
202   // exception by calling the exception handler. This scheme is per
203   // the description in in "8.6 X87 FPU EXCEPTION SYNCHRONIZATION"
204   // of the "Intel 64 and IA-32 Architectures Software Developer's
205   // Manual, Vol 1".
206 
207   // FPU status word is read for each exception seperately as the
208   // exception handler can potentially write to it (typically to clear
209   // the corresponding exception flag). By reading it separately, we
210   // ensure that the writes by the exception handler are maintained
211   // when raising the next exception.
212 
213   if (statusValue & internal::ExceptionFlags::Invalid) {
214     internal::X87State state;
215     internal::getX87State(state);
216     state.StatusWord |= internal::ExceptionFlags::Invalid;
217     internal::writeX87State(state);
218     internal::fwait();
219   }
220   if (statusValue & internal::ExceptionFlags::DivByZero) {
221     internal::X87State state;
222     internal::getX87State(state);
223     state.StatusWord |= internal::ExceptionFlags::DivByZero;
224     internal::writeX87State(state);
225     internal::fwait();
226   }
227   if (statusValue & internal::ExceptionFlags::Overflow) {
228     internal::X87State state;
229     internal::getX87State(state);
230     state.StatusWord |= internal::ExceptionFlags::Overflow;
231     internal::writeX87State(state);
232     internal::fwait();
233   }
234   if (statusValue & internal::ExceptionFlags::Underflow) {
235     internal::X87State state;
236     internal::getX87State(state);
237     state.StatusWord |= internal::ExceptionFlags::Underflow;
238     internal::writeX87State(state);
239     internal::fwait();
240   }
241   if (statusValue & internal::ExceptionFlags::Inexact) {
242     internal::X87State state;
243     internal::getX87State(state);
244     state.StatusWord |= internal::ExceptionFlags::Inexact;
245     internal::writeX87State(state);
246     internal::fwait();
247   }
248 
249   // There is no special synchronization scheme available to
250   // raise SEE exceptions. So, we will ignore that for now.
251   // Just plain writing to the MXCSR register does not guarantee
252   // the exception handler will be called.
253 
254   return 0;
255 }
256 
getRound()257 static inline int getRound() {
258   uint16_t bitValue =
259       (internal::getMXCSR() >> internal::MXCSRRoundingControlBitPosition) & 0x3;
260   switch (bitValue) {
261   case internal::RoundingControlValue::ToNearest:
262     return FE_TONEAREST;
263   case internal::RoundingControlValue::Downward:
264     return FE_DOWNWARD;
265   case internal::RoundingControlValue::Upward:
266     return FE_UPWARD;
267   case internal::RoundingControlValue::TowardZero:
268     return FE_TOWARDZERO;
269   default:
270     return -1; // Error value.
271   }
272 }
273 
setRound(int mode)274 static inline int setRound(int mode) {
275   uint16_t bitValue;
276   switch (mode) {
277   case FE_TONEAREST:
278     bitValue = internal::RoundingControlValue::ToNearest;
279     break;
280   case FE_DOWNWARD:
281     bitValue = internal::RoundingControlValue::Downward;
282     break;
283   case FE_UPWARD:
284     bitValue = internal::RoundingControlValue::Upward;
285     break;
286   case FE_TOWARDZERO:
287     bitValue = internal::RoundingControlValue::TowardZero;
288     break;
289   default:
290     return 1; // To indicate failure
291   }
292 
293   uint16_t x87Value = bitValue << internal::X87RoundingControlBitPosition;
294   uint16_t x87Control = internal::getX87ControlWord();
295   x87Control =
296       (x87Control & ~(0x3 << internal::X87RoundingControlBitPosition)) |
297       x87Value;
298   internal::writeX87ControlWord(x87Control);
299 
300   uint32_t mxcsrValue = bitValue << internal::MXCSRRoundingControlBitPosition;
301   uint32_t mxcsrControl = internal::getMXCSR();
302   mxcsrControl =
303       (mxcsrControl & ~(0x3 << internal::MXCSRRoundingControlBitPosition)) |
304       mxcsrValue;
305   internal::writeMXCSR(mxcsrControl);
306 
307   return 0;
308 }
309 
310 } // namespace fputil
311 } // namespace __llvm_libc
312 
313 #endif // LLVM_LIBC_UTILS_FPUTIL_X86_64_FENV_H
314