1 /*-
2 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/types.h>
31 #include "npx.h"
32 #include "fenv.h"
33
34 #define ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO)
35
36 /*
37 * As compared to the x87 control word, the SSE unit's control word
38 * has the rounding control bits offset by 3 and the exception mask
39 * bits offset by 7.
40 */
41 #define _SSE_ROUND_SHIFT 3
42 #define _SSE_EMASK_SHIFT 7
43
44 const fenv_t __fe_dfl_env = {
45 __INITIAL_NPXCW__, /*__control*/
46 0x0000, /*__mxcsr_hi*/
47 0x0000, /*__status*/
48 0x1f80, /*__mxcsr_lo*/
49 0xffffffff, /*__tag*/
50 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
52 };
53
54 #define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw))
55 #define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env))
56 #define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \
57 : "st", "st(1)", "st(2)", "st(3)", "st(4)", \
58 "st(5)", "st(6)", "st(7)")
59 #define __fnclex() __asm __volatile("fnclex")
60 #define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env)))
61 #define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
62 #define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
63 #define __fwait() __asm __volatile("fwait")
64 #define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
65 #define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
66
67 /* After testing for SSE support once, we cache the result in __has_sse. */
68 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
69 #ifdef __SSE__
70 #define __HAS_SSE() 1
71 #else
72 #define __HAS_SSE() (__has_sse == __SSE_YES || \
73 (__has_sse == __SSE_UNK && __test_sse()))
74 #endif
75
76 enum __sse_support __has_sse =
77 #ifdef __SSE__
78 __SSE_YES;
79 #else
80 __SSE_UNK;
81 #endif
82
83 #ifndef __SSE__
84 #define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
85 #define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
86 #define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \
87 "cpuid\n\tpopl %%ebx" \
88 : "=d" (*(x)) : : "eax", "ecx")
89
90 /*
91 * Test for SSE support on this processor. We need to do this because
92 * we need to use ldmxcsr/stmxcsr to get correct results if any part
93 * of the program was compiled to use SSE floating-point, but we can't
94 * use SSE on older processors.
95 */
96 int
__test_sse(void)97 __test_sse(void)
98 {
99 int flag, nflag;
100 int dx_features;
101
102 /* Am I a 486? */
103 getfl(&flag);
104 nflag = flag ^ 0x200000;
105 setfl(nflag);
106 getfl(&nflag);
107 if (flag != nflag) {
108 /* Not a 486, so CPUID should work. */
109 cpuid_dx(&dx_features);
110 if (dx_features & 0x2000000) {
111 __has_sse = __SSE_YES;
112 return (1);
113 }
114 }
115 __has_sse = __SSE_NO;
116 return (0);
117 }
118 #endif /* __SSE__ */
119
120 int
fesetexceptflag(const fexcept_t * flagp,int excepts)121 fesetexceptflag(const fexcept_t *flagp, int excepts)
122 {
123 fenv_t env;
124 __uint32_t mxcsr;
125
126 excepts &= FE_ALL_EXCEPT;
127 if (excepts) { /* Do nothing if excepts is 0 */
128 __fnstenv(&env);
129 env.__status &= ~excepts;
130 env.__status |= *flagp & excepts;
131 __fnclex();
132 __fldenv(env);
133 if (__HAS_SSE()) {
134 __stmxcsr(&mxcsr);
135 mxcsr &= ~excepts;
136 mxcsr |= *flagp & excepts;
137 __ldmxcsr(mxcsr);
138 }
139 }
140
141 return (0);
142 }
143
144 int
feraiseexcept(int excepts)145 feraiseexcept(int excepts)
146 {
147 fexcept_t ex = excepts;
148
149 fesetexceptflag(&ex, excepts);
150 __fwait();
151 return (0);
152 }
153
154 int
fegetenv(fenv_t * envp)155 fegetenv(fenv_t *envp)
156 {
157 __uint32_t mxcsr;
158
159 __fnstenv(envp);
160 /*
161 * fnstenv masks all exceptions, so we need to restore
162 * the old control word to avoid this side effect.
163 */
164 __fldcw(envp->__control);
165 if (__HAS_SSE()) {
166 __stmxcsr(&mxcsr);
167 envp->__mxcsr_hi = mxcsr >> 16;
168 envp->__mxcsr_lo = mxcsr & 0xffff;
169 }
170 return (0);
171 }
172
173 int
feholdexcept(fenv_t * envp)174 feholdexcept(fenv_t *envp)
175 {
176 __uint32_t mxcsr;
177 fenv_t env;
178
179 __fnstenv(&env);
180 *envp = env;
181 env.__status &= ~FE_ALL_EXCEPT;
182 env.__control |= FE_ALL_EXCEPT;
183 __fnclex();
184 __fldenv(env);
185 if (__HAS_SSE()) {
186 __stmxcsr(&mxcsr);
187 envp->__mxcsr_hi = mxcsr >> 16;
188 envp->__mxcsr_lo = mxcsr & 0xffff;
189 mxcsr &= ~FE_ALL_EXCEPT;
190 mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
191 __ldmxcsr(mxcsr);
192 }
193 return (0);
194 }
195
196 int
feupdateenv(const fenv_t * envp)197 feupdateenv(const fenv_t *envp)
198 {
199 __uint32_t mxcsr;
200 __uint16_t status;
201
202 __fnstsw(&status);
203 if (__HAS_SSE()) {
204 __stmxcsr(&mxcsr);
205 } else {
206 mxcsr = 0;
207 }
208 fesetenv(envp);
209 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
210 return (0);
211 }
212
213 int
feenableexcept(int mask)214 feenableexcept(int mask)
215 {
216 __uint32_t mxcsr;
217 __uint16_t control, omask;
218
219 mask &= FE_ALL_EXCEPT;
220 __fnstcw(&control);
221 if (__HAS_SSE()) {
222 __stmxcsr(&mxcsr);
223 } else {
224 mxcsr = 0;
225 }
226 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
227 if (mask) {
228 control &= ~mask;
229 __fldcw(control);
230 if (__HAS_SSE()) {
231 mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
232 __ldmxcsr(mxcsr);
233 }
234 }
235 return (omask);
236 }
237
238 int
fedisableexcept(int mask)239 fedisableexcept(int mask)
240 {
241 __uint32_t mxcsr;
242 __uint16_t control, omask;
243
244 mask &= FE_ALL_EXCEPT;
245 __fnstcw(&control);
246 if (__HAS_SSE()) {
247 __stmxcsr(&mxcsr);
248 } else {
249 mxcsr = 0;
250 }
251 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
252 if (mask) {
253 control |= mask;
254 __fldcw(control);
255 if (__HAS_SSE()) {
256 mxcsr |= mask << _SSE_EMASK_SHIFT;
257 __ldmxcsr(mxcsr);
258 }
259 }
260 return (omask);
261 }
262
263 int
feclearexcept(int excepts)264 feclearexcept(int excepts)
265 {
266 fenv_t env;
267 __uint32_t mxcsr;
268
269 excepts &= FE_ALL_EXCEPT;
270 if (excepts) { /* Do nothing if excepts is 0 */
271 __fnstenv(&env);
272 env.__status &= ~excepts;
273 __fnclex();
274 __fldenv(env);
275 if (__HAS_SSE()) {
276 __stmxcsr(&mxcsr);
277 mxcsr &= ~excepts;
278 __ldmxcsr(mxcsr);
279 }
280 }
281 return (0);
282 }
283
284 int
fegetexceptflag(fexcept_t * flagp,int excepts)285 fegetexceptflag(fexcept_t *flagp, int excepts)
286 {
287 __uint32_t mxcsr;
288 __uint16_t status;
289
290 excepts &= FE_ALL_EXCEPT;
291 __fnstsw(&status);
292 if (__HAS_SSE()) {
293 __stmxcsr(&mxcsr);
294 } else {
295 mxcsr = 0;
296 }
297 *flagp = (status | mxcsr) & excepts;
298 return (0);
299 }
300
301 int
fetestexcept(int excepts)302 fetestexcept(int excepts)
303 {
304 __uint32_t mxcsr;
305 __uint16_t status;
306
307 excepts &= FE_ALL_EXCEPT;
308 if (excepts) { /* Do nothing if excepts is 0 */
309 __fnstsw(&status);
310 if (__HAS_SSE()) {
311 __stmxcsr(&mxcsr);
312 } else {
313 mxcsr = 0;
314 }
315 return ((status | mxcsr) & excepts);
316 }
317 return (0);
318 }
319
320 int
fegetround(void)321 fegetround(void)
322 {
323 __uint16_t control;
324
325 /*
326 * We assume that the x87 and the SSE unit agree on the
327 * rounding mode. Reading the control word on the x87 turns
328 * out to be about 5 times faster than reading it on the SSE
329 * unit on an Opteron 244.
330 */
331 __fnstcw(&control);
332 return (control & ROUND_MASK);
333 }
334
335 int
fesetround(int round)336 fesetround(int round)
337 {
338 __uint32_t mxcsr;
339 __uint16_t control;
340
341 if (round & ~ROUND_MASK) {
342 return (-1);
343 } else {
344 __fnstcw(&control);
345 control &= ~ROUND_MASK;
346 control |= round;
347 __fldcw(control);
348 if (__HAS_SSE()) {
349 __stmxcsr(&mxcsr);
350 mxcsr &= ~(ROUND_MASK << _SSE_ROUND_SHIFT);
351 mxcsr |= round << _SSE_ROUND_SHIFT;
352 __ldmxcsr(mxcsr);
353 }
354 return (0);
355 }
356 }
357
358 int
fesetenv(const fenv_t * envp)359 fesetenv(const fenv_t *envp)
360 {
361 fenv_t env = *envp;
362 __uint32_t mxcsr;
363
364 mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
365 env.__mxcsr_hi = 0xffff;
366 env.__mxcsr_lo = 0xffff;
367 /*
368 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
369 * instruction clobbers the i387 register stack. This happens because
370 * we restore the tag word from the saved environment. Normally, this
371 * would happen anyway and we wouldn't care, because the ABI allows
372 * function calls to clobber the i387 regs. However, fesetenv() is
373 * inlined, so we need to be more careful.
374 */
375 __fldenvx(env);
376 if (__HAS_SSE()) {
377 __ldmxcsr(mxcsr);
378 }
379 return (0);
380 }
381
382 int
fegetexcept(void)383 fegetexcept(void)
384 {
385 __uint16_t control;
386
387 /*
388 * We assume that the masks for the x87 and the SSE unit are
389 * the same.
390 */
391 __fnstcw(&control);
392 return (~control & FE_ALL_EXCEPT);
393 }
394