1 /*
2  * e_logf.c - single precision log function
3  *
4  * Copyright (c) 2009-2018, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /*
9  * Algorithm was once taken from Cody & Waite, but has been munged
10  * out of all recognition by SGT.
11  */
12 
13 #include <math.h>
14 #include <errno.h>
15 #include "math_private.h"
16 
17 float
logf(float X)18 logf(float X)
19 {
20     int N = 0;
21     int aindex;
22     float a, x, s;
23     unsigned ix = fai(X);
24 
25     if (__builtin_expect((ix - 0x00800000) >= 0x7f800000 - 0x00800000, 0)) {
26         if ((ix << 1) > 0xff000000) /* NaN */
27             return FLOAT_INFNAN(X);
28         if (ix == 0x7f800000)          /* +inf */
29             return X;
30         if (X < 0) {                   /* anything negative */
31             return MATHERR_LOGF_NEG(X);
32         }
33         if (X == 0) {
34             return MATHERR_LOGF_0(X);
35         }
36         /* That leaves denormals. */
37         N = -23;
38         X *= 0x1p+23F;
39         ix = fai(X);
40     }
41 
42     /*
43      * Separate X into three parts:
44      *  - 2^N for some integer N
45      *  - a number a of the form (1+k/8) for k=0,...,7
46      *  - a residual which we compute as s = (x-a)/(x+a), for
47      *    x=X/2^N.
48      *
49      * We pick the _nearest_ (N,a) pair, so that (x-a) has magnitude
50      * at most 1/16. Hence, we must round things that are just
51      * _below_ a power of two up to the next power of two, so this
52      * isn't as simple as extracting the raw exponent of the FP
53      * number. Instead we must grab the exponent together with the
54      * top few bits of the mantissa, and round (in integers) there.
55      */
56     {
57         int rounded = ix + 0x00080000;
58         int Nnew = (rounded >> 23) - 127;
59         aindex = (rounded >> 20) & 7;
60         a = fhex(0x3f800000 + (aindex << 20));
61         N += Nnew;
62         x = fhex(ix - (Nnew << 23));
63     }
64 
65     if (!N && !aindex) {
66         /*
67          * Use an alternative strategy for very small |x|, which
68          * avoids the 1ULP of relative error introduced in the
69          * computation of s. If our nearest (N,a) pair is N=0,a=1,
70          * that means we have -1/32 < x-a < 1/16, on which interval
71          * the ordinary series for log(1+z) (setting z-x-a) will
72          * converge adequately fast; so we can simply find an
73          * approximation to log(1+z)/z good on that interval and
74          * scale it by z on the way out.
75          *
76          * Coefficients generated by the command
77 
78 ./auxiliary/remez.jl --variable=z --suffix=f -- '-1/BigFloat(32)' '+1/BigFloat(16)' 3 0 '(log1p(x)-x)/x^2'
79 
80          */
81         float z = x - 1.0f;
82         float p = z*z * (
83             -4.999999767382730053173434595877399055021398381370452534949864039404089549132551e-01f+z*(3.333416379155995401749506866323446447523793085809161350343357014272193712456391e-01f+z*(-2.501299948811686421962724839011563450757435183422532362736159418564644404218257e-01f+z*(1.903576945606738444146078468935429697455230136403008172485495359631510244557255e-01f)))
84             );
85 
86         return z + p;
87     }
88 
89     /*
90      * Now we have N, a and x correct, so that |x-a| <= 1/16.
91      * Compute s.
92      *
93      * (Since |x+a| >= 2, this means that |s| will be at most 1/32.)
94      */
95     s = (x - a) / (x + a);
96 
97     /*
98      * The point of computing s = (x-a)/(x+a) was that this makes x
99      * equal to a * (1+s)/(1-s). So we can now compute log(x) by
100      * means of computing log((1+s)/(1-s)) (which has a more
101      * efficiently converging series), and adding log(a) which we
102      * obtain from a lookup table.
103      *
104      * So our full answer to log(X) is now formed by adding together
105      * N*log(2) + log(a) + log((1+s)/(1-s)).
106      *
107      * Now log((1+s)/(1-s)) has the exact Taylor series
108      *
109      *   2s + 2s^3/3 + 2s^5/5 + ...
110      *
111      * and what we do is to compute all but the first term of that
112      * as a polynomial approximation in s^2, then add on the first
113      * term - and all the other bits and pieces above - in
114      * precision-and-a-half so as to keep the total error down.
115      */
116     {
117         float s2 = s*s;
118 
119         /*
120          * We want a polynomial L(s^2) such that
121          *
122          *    2s + s^3*L(s^2) = log((1+s)/(1-s))
123          *
124          * => L(s^2) = (log((1+s)/(1-s)) - 2s) / s^3
125          *
126          * => L(z) = (log((1+sqrt(z))/(1-sqrt(z))) - 2*sqrt(z)) / sqrt(z)^3
127          *
128          * The required range of the polynomial is only [0,1/32^2].
129          *
130          * Our accuracy requirement for the polynomial approximation
131          * is that we don't want to introduce any error more than
132          * about 2^-23 times the _top_ bit of s. But the value of
133          * the polynomial has magnitude about s^3; and since |s| <
134          * 2^-5, this gives us |s^3/s| < 2^-10. In other words,
135          * our approximation only needs to be accurate to 13 bits or
136          * so before its error is lost in the noise when we add it
137          * to everything else.
138          *
139          * Coefficients generated by the command
140 
141 ./auxiliary/remez.jl --variable=s2 --suffix=f -- '0' '1/BigFloat(32^2)' 1 0 '(abs(x) < 1e-20 ? BigFloat(2)/3 + 2*x/5 + 2*x^2/7 + 2*x^3/9 : (log((1+sqrt(x))/(1-sqrt(x)))-2*sqrt(x))/sqrt(x^3))'
142 
143          */
144         float p = s * s2 * (
145             6.666666325680271091157649745099739739798210281016897722498744752867165138320995e-01f+s2*(4.002792299542401431889592846825025487338520940900492146195427243856292349188402e-01f)
146             );
147 
148         static const float log2hi = 0x1.62ep-1F, log2lo = 0x1.0bfbe8p-15F;
149         static const float logahi[8] = { 0x0p+0F, 0x1.e26p-4F, 0x1.c8ep-3F, 0x1.46p-2F, 0x1.9f2p-2F, 0x1.f12p-2F, 0x1.1e8p-1F, 0x1.41cp-1F };
150         static const float logalo[8] = { 0x0p+0F, 0x1.076e2ap-16F, 0x1.f7c79ap-15F, 0x1.8bc21cp-14F, 0x1.23eccp-14F, 0x1.1ebf5ep-15F, 0x1.7d79c2p-15F, 0x1.8fe846p-13F };
151         return (N*log2hi + logahi[aindex]) + (2.0f*s + (N*log2lo + logalo[aindex] + p));
152     }
153 }
154