1 /*
2  * rredf.h - trigonometric range reduction function written new for RVCT 4.1
3  *
4  * Copyright (c) 2009-2018, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /*
9  * This header file defines an inline function which all three of
10  * the single-precision trig functions (sinf, cosf, tanf) should use
11  * to perform range reduction. The inline function handles the
12  * quickest and most common cases inline, before handing off to an
13  * out-of-line function defined in rredf.c for everything else. Thus
14  * a reasonable compromise is struck between speed and space. (I
15  * hope.) In particular, this approach avoids a function call
16  * overhead in the common case.
17  */
18 
19 #ifndef _included_rredf_h
20 #define _included_rredf_h
21 
22 #include "math_private.h"
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #endif /* __cplusplus */
27 
28 extern float __mathlib_rredf2(float x, int *q, unsigned k);
29 
30 /*
31  * Semantics of the function:
32  *  - x is the single-precision input value provided by the user
33  *  - the return value is in the range [-pi/4,pi/4], and is equal
34  *    (within reasonable accuracy bounds) to x minus n*pi/2 for some
35  *    integer n. (FIXME: perhaps some slippage on the output
36  *    interval is acceptable, requiring more range from the
37  *    following polynomial approximations but permitting more
38  *    approximate rred decisions?)
39  *  - *q is set to a positive value whose low two bits match those
40  *    of n. Alternatively, it comes back as -1 indicating that the
41  *    input value was trivial in some way (infinity, NaN, or so
42  *    small that we can safely return sin(x)=tan(x)=x,cos(x)=1).
43  */
__mathlib_rredf(float x,int * q)44 static __inline float __mathlib_rredf(float x, int *q)
45 {
46     /*
47      * First, extract the bit pattern of x as an integer, so that we
48      * can repeatedly compare things to it without multiple
49      * overheads in retrieving comparison results from the VFP.
50      */
51     unsigned k = fai(x);
52 
53     /*
54      * Deal immediately with the simplest possible case, in which x
55      * is already within the interval [-pi/4,pi/4]. This also
56      * identifies the subcase of ludicrously small x.
57      */
58     if ((k << 1) < (0x3f490fdb << 1)) {
59         if ((k << 1) < (0x39800000 << 1))
60             *q = -1;
61         else
62             *q = 0;
63         return x;
64     }
65 
66     /*
67      * The next plan is to multiply x by 2/pi and convert to an
68      * integer, which gives us n; then we subtract n*pi/2 from x to
69      * get our output value.
70      *
71      * By representing pi/2 in that final step by a prec-and-a-half
72      * approximation, we can arrange good accuracy for n strictly
73      * less than 2^13 (so that an FP representation of n has twelve
74      * zero bits at the bottom). So our threshold for this strategy
75      * is 2^13 * pi/2 - pi/4, otherwise known as 8191.75 * pi/2 or
76      * 4095.875*pi. (Or, for those perverse people interested in
77      * actual numbers rather than multiples of pi/2, about 12867.5.)
78      */
79     if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1)) {
80         float nf = 0.636619772367581343f * x;
81         /*
82          * The difference between that single-precision constant and
83          * the real 2/pi is about 2.568e-8. Hence the product nf has a
84          * potential error of 2.568e-8|x| even before rounding; since
85          * |x| < 4096 pi, that gives us an error bound of about
86          * 0.0003305.
87          *
88          * nf is then rounded to single precision, with a max error of
89          * 1/2 ULP, and since nf goes up to just under 8192, half a
90          * ULP could be as big as 2^-12 ~= 0.0002441.
91          *
92          * So by the time we convert nf to an integer, it could be off
93          * by that much, causing the wrong integer to be selected, and
94          * causing us to return a value a little bit outside the
95          * theoretical [-pi/4,+pi/4] output interval.
96          *
97          * How much outside? Well, we subtract nf*pi/2 from x, so the
98          * error bounds above have be be multiplied by pi/2. And if
99          * both of the above sources of error suffer their worst cases
100          * at once, then the very largest value we could return is
101          * obtained by adding that lot to the interval bound pi/4 to
102          * get
103          *
104          *    pi/4 + ((2/pi - 0f_3f22f983)*4096*pi + 2^-12) * pi/2
105          *
106          * which comes to 0f_3f494b02. (Compare 0f_3f490fdb = pi/4.)
107          *
108          * So callers of this range reducer should be prepared to
109          * handle numbers up to that large.
110          */
111 #ifdef __TARGET_FPU_SOFTVFP
112         nf = _frnd(nf);
113 #else
114         if (k & 0x80000000)
115             nf = (nf - 8388608.0f) + 8388608.0f;
116         else
117             nf = (nf + 8388608.0f) - 8388608.0f;   /* round to _nearest_ integer. FIXME: use some sort of frnd in softfp */
118 #endif
119         *q = 3 & (int)nf;
120 #if 0
121         /*
122          * FIXME: now I need a bunch of special cases to avoid
123          * having to do the full four-word reduction every time.
124          * Also, adjust the comment at the top of this section!
125          */
126         if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1))
127             return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.4442d2p-24F;
128         else
129 #endif
130             return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.444p-24F - nf * 0x1.68c234p-39F;
131     }
132 
133     /*
134      * That's enough to do in-line; if we're still playing, hand off
135      * to the out-of-line main range reducer.
136      */
137     return __mathlib_rredf2(x, q, k);
138 }
139 
140 #ifdef __cplusplus
141 } /* end of extern "C" */
142 #endif /* __cplusplus */
143 
144 #endif /* included */
145 
146 /* end of rredf.h */
147