1 /*-
2  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/types.h>
31 #include "npx.h"
32 #include "fenv.h"
33 
34 #define ROUND_MASK   (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO)
35 
36 /*
37  * As compared to the x87 control word, the SSE unit's control word
38  * has the rounding control bits offset by 3 and the exception mask
39  * bits offset by 7.
40  */
41 #define _SSE_ROUND_SHIFT 3
42 #define _SSE_EMASK_SHIFT 7
43 
44 const fenv_t __fe_dfl_env = {
45   __INITIAL_NPXCW__, /*__control*/
46   0x0000,            /*__mxcsr_hi*/
47   0x0000,            /*__status*/
48   0x1f80,            /*__mxcsr_lo*/
49   0xffffffff,        /*__tag*/
50   { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
52 };
53 
54 #define __fldcw(__cw)           __asm __volatile("fldcw %0" : : "m" (__cw))
55 #define __fldenv(__env)         __asm __volatile("fldenv %0" : : "m" (__env))
56 #define __fldenvx(__env)        __asm __volatile("fldenv %0" : : "m" (__env)  \
57                                 : "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
58                                 "st(5)", "st(6)", "st(7)")
59 #define __fnclex()              __asm __volatile("fnclex")
60 #define __fnstenv(__env)        __asm __volatile("fnstenv %0" : "=m" (*(__env)))
61 #define __fnstcw(__cw)          __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
62 #define __fnstsw(__sw)          __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
63 #define __fwait()               __asm __volatile("fwait")
64 #define __ldmxcsr(__csr)        __asm __volatile("ldmxcsr %0" : : "m" (__csr))
65 #define __stmxcsr(__csr)        __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
66 
67 /* After testing for SSE support once, we cache the result in __has_sse. */
68 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
69 #ifdef __SSE__
70 #define __HAS_SSE()     1
71 #else
72 #define __HAS_SSE()     (__has_sse == __SSE_YES ||                      \
73                         (__has_sse == __SSE_UNK && __test_sse()))
74 #endif
75 
76 enum __sse_support __has_sse =
77 #ifdef __SSE__
78   __SSE_YES;
79 #else
80   __SSE_UNK;
81 #endif
82 
83 #ifndef __SSE__
84 #define getfl(x)    __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
85 #define setfl(x)    __asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
86 #define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t"  \
87                     "cpuid\n\tpopl %%ebx"          \
88                     : "=d" (*(x)) : : "eax", "ecx")
89 
90 /*
91  * Test for SSE support on this processor.  We need to do this because
92  * we need to use ldmxcsr/stmxcsr to get correct results if any part
93  * of the program was compiled to use SSE floating-point, but we can't
94  * use SSE on older processors.
95  */
96 int
__test_sse(void)97 __test_sse(void)
98 {
99   int flag, nflag;
100   int dx_features;
101 
102   /* Am I a 486? */
103   getfl(&flag);
104   nflag = flag ^ 0x200000;
105   setfl(nflag);
106   getfl(&nflag);
107   if (flag != nflag) {
108     /* Not a 486, so CPUID should work. */
109     cpuid_dx(&dx_features);
110     if (dx_features & 0x2000000) {
111       __has_sse = __SSE_YES;
112       return (1);
113     }
114   }
115   __has_sse = __SSE_NO;
116   return (0);
117 }
118 #endif /* __SSE__ */
119 
120 int
fesetexceptflag(const fexcept_t * flagp,int excepts)121 fesetexceptflag(const fexcept_t *flagp, int excepts)
122 {
123   fenv_t env;
124   __uint32_t mxcsr;
125 
126   excepts &= FE_ALL_EXCEPT;
127   if (excepts) { /* Do nothing if excepts is 0 */
128     __fnstenv(&env);
129     env.__status &= ~excepts;
130     env.__status |= *flagp & excepts;
131     __fnclex();
132     __fldenv(env);
133     if (__HAS_SSE()) {
134       __stmxcsr(&mxcsr);
135       mxcsr &= ~excepts;
136       mxcsr |= *flagp & excepts;
137       __ldmxcsr(mxcsr);
138     }
139   }
140 
141   return (0);
142 }
143 
144 int
feraiseexcept(int excepts)145 feraiseexcept(int excepts)
146 {
147   fexcept_t ex = excepts;
148 
149   fesetexceptflag(&ex, excepts);
150   __fwait();
151   return (0);
152 }
153 
154 int
fegetenv(fenv_t * envp)155 fegetenv(fenv_t *envp)
156 {
157   __uint32_t mxcsr;
158 
159   __fnstenv(envp);
160   /*
161    * fnstenv masks all exceptions, so we need to restore
162    * the old control word to avoid this side effect.
163    */
164   __fldcw(envp->__control);
165   if (__HAS_SSE()) {
166     __stmxcsr(&mxcsr);
167     envp->__mxcsr_hi = mxcsr >> 16;
168     envp->__mxcsr_lo = mxcsr & 0xffff;
169   }
170   return (0);
171 }
172 
173 int
feholdexcept(fenv_t * envp)174 feholdexcept(fenv_t *envp)
175 {
176   __uint32_t mxcsr;
177   fenv_t env;
178 
179   __fnstenv(&env);
180   *envp = env;
181   env.__status &= ~FE_ALL_EXCEPT;
182   env.__control |= FE_ALL_EXCEPT;
183   __fnclex();
184   __fldenv(env);
185   if (__HAS_SSE()) {
186     __stmxcsr(&mxcsr);
187     envp->__mxcsr_hi = mxcsr >> 16;
188     envp->__mxcsr_lo = mxcsr & 0xffff;
189     mxcsr &= ~FE_ALL_EXCEPT;
190     mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
191     __ldmxcsr(mxcsr);
192   }
193   return (0);
194 }
195 
196 int
feupdateenv(const fenv_t * envp)197 feupdateenv(const fenv_t *envp)
198 {
199   __uint32_t mxcsr;
200   __uint16_t status;
201 
202   __fnstsw(&status);
203   if (__HAS_SSE()) {
204     __stmxcsr(&mxcsr);
205   } else {
206     mxcsr = 0;
207   }
208   fesetenv(envp);
209   feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
210   return (0);
211 }
212 
213 int
feenableexcept(int mask)214 feenableexcept(int mask)
215 {
216   __uint32_t mxcsr;
217   __uint16_t control, omask;
218 
219   mask &= FE_ALL_EXCEPT;
220   __fnstcw(&control);
221   if (__HAS_SSE()) {
222     __stmxcsr(&mxcsr);
223   } else {
224     mxcsr = 0;
225   }
226   omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
227   if (mask) {
228     control &= ~mask;
229     __fldcw(control);
230     if (__HAS_SSE()) {
231       mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
232       __ldmxcsr(mxcsr);
233     }
234   }
235   return (omask);
236 }
237 
238 int
fedisableexcept(int mask)239 fedisableexcept(int mask)
240 {
241   __uint32_t mxcsr;
242   __uint16_t control, omask;
243 
244   mask &= FE_ALL_EXCEPT;
245   __fnstcw(&control);
246   if (__HAS_SSE()) {
247     __stmxcsr(&mxcsr);
248   } else {
249     mxcsr = 0;
250   }
251   omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
252   if (mask) {
253     control |= mask;
254     __fldcw(control);
255     if (__HAS_SSE()) {
256       mxcsr |= mask << _SSE_EMASK_SHIFT;
257       __ldmxcsr(mxcsr);
258     }
259   }
260   return (omask);
261 }
262 
263 int
feclearexcept(int excepts)264 feclearexcept(int excepts)
265 {
266   fenv_t env;
267   __uint32_t mxcsr;
268 
269   excepts &= FE_ALL_EXCEPT;
270   if (excepts) { /* Do nothing if excepts is 0 */
271     __fnstenv(&env);
272     env.__status &= ~excepts;
273     __fnclex();
274     __fldenv(env);
275     if (__HAS_SSE()) {
276       __stmxcsr(&mxcsr);
277       mxcsr &= ~excepts;
278       __ldmxcsr(mxcsr);
279     }
280   }
281   return (0);
282 }
283 
284 int
fegetexceptflag(fexcept_t * flagp,int excepts)285 fegetexceptflag(fexcept_t *flagp, int excepts)
286 {
287   __uint32_t mxcsr;
288   __uint16_t status;
289 
290   excepts &= FE_ALL_EXCEPT;
291   __fnstsw(&status);
292   if (__HAS_SSE()) {
293     __stmxcsr(&mxcsr);
294   } else {
295     mxcsr = 0;
296   }
297   *flagp = (status | mxcsr) & excepts;
298   return (0);
299 }
300 
301 int
fetestexcept(int excepts)302 fetestexcept(int excepts)
303 {
304   __uint32_t mxcsr;
305   __uint16_t status;
306 
307   excepts &= FE_ALL_EXCEPT;
308   if (excepts) { /* Do nothing if excepts is 0 */
309     __fnstsw(&status);
310     if (__HAS_SSE()) {
311       __stmxcsr(&mxcsr);
312     } else {
313       mxcsr = 0;
314     }
315     return ((status | mxcsr) & excepts);
316   }
317   return (0);
318 }
319 
320 int
fegetround(void)321 fegetround(void)
322 {
323   __uint16_t control;
324 
325   /*
326    * We assume that the x87 and the SSE unit agree on the
327    * rounding mode.  Reading the control word on the x87 turns
328    * out to be about 5 times faster than reading it on the SSE
329    * unit on an Opteron 244.
330    */
331   __fnstcw(&control);
332   return (control & ROUND_MASK);
333 }
334 
335 int
fesetround(int round)336 fesetround(int round)
337 {
338   __uint32_t mxcsr;
339   __uint16_t control;
340 
341   if (round & ~ROUND_MASK) {
342     return (-1);
343   } else {
344     __fnstcw(&control);
345     control &= ~ROUND_MASK;
346     control |= round;
347     __fldcw(control);
348     if (__HAS_SSE()) {
349       __stmxcsr(&mxcsr);
350       mxcsr &= ~(ROUND_MASK << _SSE_ROUND_SHIFT);
351       mxcsr |= round << _SSE_ROUND_SHIFT;
352       __ldmxcsr(mxcsr);
353     }
354     return (0);
355   }
356 }
357 
358 int
fesetenv(const fenv_t * envp)359 fesetenv(const fenv_t *envp)
360 {
361   fenv_t env = *envp;
362   __uint32_t mxcsr;
363 
364   mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
365   env.__mxcsr_hi = 0xffff;
366   env.__mxcsr_lo = 0xffff;
367   /*
368    * XXX Using fldenvx() instead of fldenv() tells the compiler that this
369    * instruction clobbers the i387 register stack.  This happens because
370    * we restore the tag word from the saved environment.  Normally, this
371    * would happen anyway and we wouldn't care, because the ABI allows
372    * function calls to clobber the i387 regs.  However, fesetenv() is
373    * inlined, so we need to be more careful.
374    */
375   __fldenvx(env);
376   if (__HAS_SSE()) {
377     __ldmxcsr(mxcsr);
378   }
379   return (0);
380 }
381 
382 int
fegetexcept(void)383 fegetexcept(void)
384 {
385   __uint16_t control;
386 
387   /*
388    * We assume that the masks for the x87 and the SSE unit are
389    * the same.
390    */
391   __fnstcw(&control);
392   return (~control & FE_ALL_EXCEPT);
393 }
394