1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 //
3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice
9 // is preserved.
10 // ====================================================
11 //
12 // The original source code covered by the above license above has been
13 // modified significantly by Google Inc.
14 // Copyright 2016 the V8 project authors. All rights reserved.
15 
16 #include "src/base/ieee754.h"
17 
18 #include <cmath>
19 #include <limits>
20 
21 #include "src/base/build_config.h"
22 #include "src/base/macros.h"
23 
24 namespace v8 {
25 namespace base {
26 namespace ieee754 {
27 
28 namespace {
29 
30 /* Disable "potential divide by 0" warning in Visual Studio compiler. */
31 
32 #if V8_CC_MSVC
33 
34 #pragma warning(disable : 4723)
35 
36 #endif
37 
38 /*
39  * The original fdlibm code used statements like:
40  *  n0 = ((*(int*)&one)>>29)^1;   * index of high word *
41  *  ix0 = *(n0+(int*)&x);     * high word of x *
42  *  ix1 = *((1-n0)+(int*)&x);   * low word of x *
43  * to dig two 32 bit words out of the 64 bit IEEE floating point
44  * value.  That is non-ANSI, and, moreover, the gcc instruction
45  * scheduler gets it wrong.  We instead use the following macros.
46  * Unlike the original code, we determine the endianness at compile
47  * time, not at run time; I don't see much benefit to selecting
48  * endianness at run time.
49  */
50 
51 /*
52  * A union which permits us to convert between a double and two 32 bit
53  * ints.
54  * TODO(jkummerow): This is undefined behavior. Use bit_cast instead.
55  */
56 
57 #if V8_TARGET_LITTLE_ENDIAN
58 
59 typedef union {
60   double value;
61   struct {
62     uint32_t lsw;
63     uint32_t msw;
64   } parts;
65   struct {
66     uint64_t w;
67   } xparts;
68 } ieee_double_shape_type;
69 
70 #else
71 
72 typedef union {
73   double value;
74   struct {
75     uint32_t msw;
76     uint32_t lsw;
77   } parts;
78   struct {
79     uint64_t w;
80   } xparts;
81 } ieee_double_shape_type;
82 
83 #endif
84 
85 /* Get two 32 bit ints from a double.  */
86 
87 #define EXTRACT_WORDS(ix0, ix1, d) \
88   do {                             \
89     ieee_double_shape_type ew_u;   \
90     ew_u.value = (d);              \
91     (ix0) = ew_u.parts.msw;        \
92     (ix1) = ew_u.parts.lsw;        \
93   } while (0)
94 
95 /* Get a 64-bit int from a double. */
96 #define EXTRACT_WORD64(ix, d)    \
97   do {                           \
98     ieee_double_shape_type ew_u; \
99     ew_u.value = (d);            \
100     (ix) = ew_u.xparts.w;        \
101   } while (0)
102 
103 /* Get the more significant 32 bit int from a double.  */
104 
105 #define GET_HIGH_WORD(i, d)      \
106   do {                           \
107     ieee_double_shape_type gh_u; \
108     gh_u.value = (d);            \
109     (i) = gh_u.parts.msw;        \
110   } while (0)
111 
112 /* Get the less significant 32 bit int from a double.  */
113 
114 #define GET_LOW_WORD(i, d)       \
115   do {                           \
116     ieee_double_shape_type gl_u; \
117     gl_u.value = (d);            \
118     (i) = gl_u.parts.lsw;        \
119   } while (0)
120 
121 /* Set a double from two 32 bit ints.  */
122 
123 #define INSERT_WORDS(d, ix0, ix1) \
124   do {                            \
125     ieee_double_shape_type iw_u;  \
126     iw_u.parts.msw = (ix0);       \
127     iw_u.parts.lsw = (ix1);       \
128     (d) = iw_u.value;             \
129   } while (0)
130 
131 /* Set a double from a 64-bit int. */
132 #define INSERT_WORD64(d, ix)     \
133   do {                           \
134     ieee_double_shape_type iw_u; \
135     iw_u.xparts.w = (ix);        \
136     (d) = iw_u.value;            \
137   } while (0)
138 
139 /* Set the more significant 32 bits of a double from an int.  */
140 
141 #define SET_HIGH_WORD(d, v)      \
142   do {                           \
143     ieee_double_shape_type sh_u; \
144     sh_u.value = (d);            \
145     sh_u.parts.msw = (v);        \
146     (d) = sh_u.value;            \
147   } while (0)
148 
149 /* Set the less significant 32 bits of a double from an int.  */
150 
151 #define SET_LOW_WORD(d, v)       \
152   do {                           \
153     ieee_double_shape_type sl_u; \
154     sl_u.value = (d);            \
155     sl_u.parts.lsw = (v);        \
156     (d) = sl_u.value;            \
157   } while (0)
158 
159 /* Support macro. */
160 
161 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
162 
163 int32_t __ieee754_rem_pio2(double x, double* y) V8_WARN_UNUSED_RESULT;
164 double __kernel_cos(double x, double y) V8_WARN_UNUSED_RESULT;
165 int __kernel_rem_pio2(double* x, double* y, int e0, int nx, int prec,
166                       const int32_t* ipio2) V8_WARN_UNUSED_RESULT;
167 double __kernel_sin(double x, double y, int iy) V8_WARN_UNUSED_RESULT;
168 
169 /* __ieee754_rem_pio2(x,y)
170  *
171  * return the remainder of x rem pi/2 in y[0]+y[1]
172  * use __kernel_rem_pio2()
173  */
__ieee754_rem_pio2(double x,double * y)174 int32_t __ieee754_rem_pio2(double x, double *y) {
175   /*
176    * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
177    */
178   static const int32_t two_over_pi[] = {
179       0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
180       0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
181       0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
182       0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
183       0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
184       0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
185       0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
186       0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
187       0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
188       0x73A8C9, 0x60E27B, 0xC08C6B,
189   };
190 
191   static const int32_t npio2_hw[] = {
192       0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
193       0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
194       0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
195       0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
196       0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
197       0x404858EB, 0x404921FB,
198   };
199 
200   /*
201    * invpio2:  53 bits of 2/pi
202    * pio2_1:   first  33 bit of pi/2
203    * pio2_1t:  pi/2 - pio2_1
204    * pio2_2:   second 33 bit of pi/2
205    * pio2_2t:  pi/2 - (pio2_1+pio2_2)
206    * pio2_3:   third  33 bit of pi/2
207    * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
208    */
209 
210   static const double
211       zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
212       half = 5.00000000000000000000e-01,    /* 0x3FE00000, 0x00000000 */
213       two24 = 1.67772160000000000000e+07,   /* 0x41700000, 0x00000000 */
214       invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
215       pio2_1 = 1.57079632673412561417e+00,  /* 0x3FF921FB, 0x54400000 */
216       pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
217       pio2_2 = 6.07710050630396597660e-11,  /* 0x3DD0B461, 0x1A600000 */
218       pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
219       pio2_3 = 2.02226624871116645580e-21,  /* 0x3BA3198A, 0x2E000000 */
220       pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
221 
222   double z, w, t, r, fn;
223   double tx[3];
224   int32_t e0, i, j, nx, n, ix, hx;
225   uint32_t low;
226 
227   z = 0;
228   GET_HIGH_WORD(hx, x); /* high word of x */
229   ix = hx & 0x7FFFFFFF;
230   if (ix <= 0x3FE921FB) { /* |x| ~<= pi/4 , no need for reduction */
231     y[0] = x;
232     y[1] = 0;
233     return 0;
234   }
235   if (ix < 0x4002D97C) { /* |x| < 3pi/4, special case with n=+-1 */
236     if (hx > 0) {
237       z = x - pio2_1;
238       if (ix != 0x3FF921FB) { /* 33+53 bit pi is good enough */
239         y[0] = z - pio2_1t;
240         y[1] = (z - y[0]) - pio2_1t;
241       } else { /* near pi/2, use 33+33+53 bit pi */
242         z -= pio2_2;
243         y[0] = z - pio2_2t;
244         y[1] = (z - y[0]) - pio2_2t;
245       }
246       return 1;
247     } else { /* negative x */
248       z = x + pio2_1;
249       if (ix != 0x3FF921FB) { /* 33+53 bit pi is good enough */
250         y[0] = z + pio2_1t;
251         y[1] = (z - y[0]) + pio2_1t;
252       } else { /* near pi/2, use 33+33+53 bit pi */
253         z += pio2_2;
254         y[0] = z + pio2_2t;
255         y[1] = (z - y[0]) + pio2_2t;
256       }
257       return -1;
258     }
259   }
260   if (ix <= 0x413921FB) { /* |x| ~<= 2^19*(pi/2), medium size */
261     t = fabs(x);
262     n = static_cast<int32_t>(t * invpio2 + half);
263     fn = static_cast<double>(n);
264     r = t - fn * pio2_1;
265     w = fn * pio2_1t; /* 1st round good to 85 bit */
266     if (n < 32 && ix != npio2_hw[n - 1]) {
267       y[0] = r - w; /* quick check no cancellation */
268     } else {
269       uint32_t high;
270       j = ix >> 20;
271       y[0] = r - w;
272       GET_HIGH_WORD(high, y[0]);
273       i = j - ((high >> 20) & 0x7FF);
274       if (i > 16) { /* 2nd iteration needed, good to 118 */
275         t = r;
276         w = fn * pio2_2;
277         r = t - w;
278         w = fn * pio2_2t - ((t - r) - w);
279         y[0] = r - w;
280         GET_HIGH_WORD(high, y[0]);
281         i = j - ((high >> 20) & 0x7FF);
282         if (i > 49) { /* 3rd iteration need, 151 bits acc */
283           t = r;      /* will cover all possible cases */
284           w = fn * pio2_3;
285           r = t - w;
286           w = fn * pio2_3t - ((t - r) - w);
287           y[0] = r - w;
288         }
289       }
290     }
291     y[1] = (r - y[0]) - w;
292     if (hx < 0) {
293       y[0] = -y[0];
294       y[1] = -y[1];
295       return -n;
296     } else {
297       return n;
298     }
299   }
300   /*
301    * all other (large) arguments
302    */
303   if (ix >= 0x7FF00000) { /* x is inf or NaN */
304     y[0] = y[1] = x - x;
305     return 0;
306   }
307   /* set z = scalbn(|x|,ilogb(x)-23) */
308   GET_LOW_WORD(low, x);
309   SET_LOW_WORD(z, low);
310   e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
311   SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
312   for (i = 0; i < 2; i++) {
313     tx[i] = static_cast<double>(static_cast<int32_t>(z));
314     z = (z - tx[i]) * two24;
315   }
316   tx[2] = z;
317   nx = 3;
318   while (tx[nx - 1] == zero) nx--; /* skip zero term */
319   n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
320   if (hx < 0) {
321     y[0] = -y[0];
322     y[1] = -y[1];
323     return -n;
324   }
325   return n;
326 }
327 
328 /* __kernel_cos( x,  y )
329  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
330  * Input x is assumed to be bounded by ~pi/4 in magnitude.
331  * Input y is the tail of x.
332  *
333  * Algorithm
334  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
335  *      2. if x < 2^-27 (hx<0x3E400000 0), return 1 with inexact if x!=0.
336  *      3. cos(x) is approximated by a polynomial of degree 14 on
337  *         [0,pi/4]
338  *                                       4            14
339  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
340  *         where the remez error is
341  *
342  *      |              2     4     6     8     10    12     14 |     -58
343  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
344  *      |                                                      |
345  *
346  *                     4     6     8     10    12     14
347  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
348  *             cos(x) = 1 - x*x/2 + r
349  *         since cos(x+y) ~ cos(x) - sin(x)*y
350  *                        ~ cos(x) - x*y,
351  *         a correction term is necessary in cos(x) and hence
352  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
353  *         For better accuracy when x > 0.3, let qx = |x|/4 with
354  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
355  *         Then
356  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
357  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
358  *         magnitude of the latter is at least a quarter of x*x/2,
359  *         thus, reducing the rounding error in the subtraction.
360  */
__kernel_cos(double x,double y)361 V8_INLINE double __kernel_cos(double x, double y) {
362   static const double
363       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
364       C1 = 4.16666666666666019037e-02,  /* 0x3FA55555, 0x5555554C */
365       C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
366       C3 = 2.48015872894767294178e-05,  /* 0x3EFA01A0, 0x19CB1590 */
367       C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
368       C5 = 2.08757232129817482790e-09,  /* 0x3E21EE9E, 0xBDB4B1C4 */
369       C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
370 
371   double a, iz, z, r, qx;
372   int32_t ix;
373   GET_HIGH_WORD(ix, x);
374   ix &= 0x7FFFFFFF;                           /* ix = |x|'s high word*/
375   if (ix < 0x3E400000) {                      /* if x < 2**27 */
376     if (static_cast<int>(x) == 0) return one; /* generate inexact */
377   }
378   z = x * x;
379   r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
380   if (ix < 0x3FD33333) { /* if |x| < 0.3 */
381     return one - (0.5 * z - (z * r - x * y));
382   } else {
383     if (ix > 0x3FE90000) { /* x > 0.78125 */
384       qx = 0.28125;
385     } else {
386       INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
387     }
388     iz = 0.5 * z - qx;
389     a = one - qx;
390     return a - (iz - (z * r - x * y));
391   }
392 }
393 
394 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
395  * double x[],y[]; int e0,nx,prec; int ipio2[];
396  *
397  * __kernel_rem_pio2 return the last three digits of N with
398  *              y = x - N*pi/2
399  * so that |y| < pi/2.
400  *
401  * The method is to compute the integer (mod 8) and fraction parts of
402  * (2/pi)*x without doing the full multiplication. In general we
403  * skip the part of the product that are known to be a huge integer (
404  * more accurately, = 0 mod 8 ). Thus the number of operations are
405  * independent of the exponent of the input.
406  *
407  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
408  *
409  * Input parameters:
410  *      x[]     The input value (must be positive) is broken into nx
411  *              pieces of 24-bit integers in double precision format.
412  *              x[i] will be the i-th 24 bit of x. The scaled exponent
413  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
414  *              match x's up to 24 bits.
415  *
416  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
417  *                      e0 = ilogb(z)-23
418  *                      z  = scalbn(z,-e0)
419  *              for i = 0,1,2
420  *                      x[i] = floor(z)
421  *                      z    = (z-x[i])*2**24
422  *
423  *
424  *      y[]     output result in an array of double precision numbers.
425  *              The dimension of y[] is:
426  *                      24-bit  precision       1
427  *                      53-bit  precision       2
428  *                      64-bit  precision       2
429  *                      113-bit precision       3
430  *              The actual value is the sum of them. Thus for 113-bit
431  *              precison, one may have to do something like:
432  *
433  *              long double t,w,r_head, r_tail;
434  *              t = (long double)y[2] + (long double)y[1];
435  *              w = (long double)y[0];
436  *              r_head = t+w;
437  *              r_tail = w - (r_head - t);
438  *
439  *      e0      The exponent of x[0]
440  *
441  *      nx      dimension of x[]
442  *
443  *      prec    an integer indicating the precision:
444  *                      0       24  bits (single)
445  *                      1       53  bits (double)
446  *                      2       64  bits (extended)
447  *                      3       113 bits (quad)
448  *
449  *      ipio2[]
450  *              integer array, contains the (24*i)-th to (24*i+23)-th
451  *              bit of 2/pi after binary point. The corresponding
452  *              floating value is
453  *
454  *                      ipio2[i] * 2^(-24(i+1)).
455  *
456  * External function:
457  *      double scalbn(), floor();
458  *
459  *
460  * Here is the description of some local variables:
461  *
462  *      jk      jk+1 is the initial number of terms of ipio2[] needed
463  *              in the computation. The recommended value is 2,3,4,
464  *              6 for single, double, extended,and quad.
465  *
466  *      jz      local integer variable indicating the number of
467  *              terms of ipio2[] used.
468  *
469  *      jx      nx - 1
470  *
471  *      jv      index for pointing to the suitable ipio2[] for the
472  *              computation. In general, we want
473  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
474  *              is an integer. Thus
475  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
476  *              Hence jv = max(0,(e0-3)/24).
477  *
478  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
479  *
480  *      q[]     double array with integral value, representing the
481  *              24-bits chunk of the product of x and 2/pi.
482  *
483  *      q0      the corresponding exponent of q[0]. Note that the
484  *              exponent for q[i] would be q0-24*i.
485  *
486  *      PIo2[]  double precision array, obtained by cutting pi/2
487  *              into 24 bits chunks.
488  *
489  *      f[]     ipio2[] in floating point
490  *
491  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
492  *
493  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
494  *
495  *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
496  *              it also indicates the *sign* of the result.
497  *
498  */
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec,const int32_t * ipio2)499 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
500                       const int32_t *ipio2) {
501   /* Constants:
502    * The hexadecimal values are the intended ones for the following
503    * constants. The decimal values may be used, provided that the
504    * compiler will convert from decimal to binary accurately enough
505    * to produce the hexadecimal values shown.
506    */
507   static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
508 
509   static const double PIo2[] = {
510       1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
511       7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
512       5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
513       3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
514       1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
515       1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
516       2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
517       2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
518   };
519 
520   static const double
521       zero = 0.0,
522       one = 1.0,
523       two24 = 1.67772160000000000000e+07,  /* 0x41700000, 0x00000000 */
524       twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
525 
526   int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
527   double z, fw, f[20], fq[20], q[20];
528 
529   /* initialize jk*/
530   jk = init_jk[prec];
531   jp = jk;
532 
533   /* determine jx,jv,q0, note that 3>q0 */
534   jx = nx - 1;
535   jv = (e0 - 3) / 24;
536   if (jv < 0) jv = 0;
537   q0 = e0 - 24 * (jv + 1);
538 
539   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
540   j = jv - jx;
541   m = jx + jk;
542   for (i = 0; i <= m; i++, j++) {
543     f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
544   }
545 
546   /* compute q[0],q[1],...q[jk] */
547   for (i = 0; i <= jk; i++) {
548     for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
549     q[i] = fw;
550   }
551 
552   jz = jk;
553 recompute:
554   /* distill q[] into iq[] reversingly */
555   for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
556     fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
557     iq[i] = static_cast<int32_t>(z - two24 * fw);
558     z = q[j - 1] + fw;
559   }
560 
561   /* compute n */
562   z = scalbn(z, q0);           /* actual value of z */
563   z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
564   n = static_cast<int32_t>(z);
565   z -= static_cast<double>(n);
566   ih = 0;
567   if (q0 > 0) { /* need iq[jz-1] to determine n */
568     i = (iq[jz - 1] >> (24 - q0));
569     n += i;
570     iq[jz - 1] -= i << (24 - q0);
571     ih = iq[jz - 1] >> (23 - q0);
572   } else if (q0 == 0) {
573     ih = iq[jz - 1] >> 23;
574   } else if (z >= 0.5) {
575     ih = 2;
576   }
577 
578   if (ih > 0) { /* q > 0.5 */
579     n += 1;
580     carry = 0;
581     for (i = 0; i < jz; i++) { /* compute 1-q */
582       j = iq[i];
583       if (carry == 0) {
584         if (j != 0) {
585           carry = 1;
586           iq[i] = 0x1000000 - j;
587         }
588       } else {
589         iq[i] = 0xFFFFFF - j;
590       }
591     }
592     if (q0 > 0) { /* rare case: chance is 1 in 12 */
593       switch (q0) {
594         case 1:
595           iq[jz - 1] &= 0x7FFFFF;
596           break;
597         case 2:
598           iq[jz - 1] &= 0x3FFFFF;
599           break;
600       }
601     }
602     if (ih == 2) {
603       z = one - z;
604       if (carry != 0) z -= scalbn(one, q0);
605     }
606   }
607 
608   /* check if recomputation is needed */
609   if (z == zero) {
610     j = 0;
611     for (i = jz - 1; i >= jk; i--) j |= iq[i];
612     if (j == 0) { /* need recomputation */
613       for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
614         /* k = no. of terms needed */
615       }
616 
617       for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
618         f[jx + i] = ipio2[jv + i];
619         for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
620         q[i] = fw;
621       }
622       jz += k;
623       goto recompute;
624     }
625   }
626 
627   /* chop off zero terms */
628   if (z == 0.0) {
629     jz -= 1;
630     q0 -= 24;
631     while (iq[jz] == 0) {
632       jz--;
633       q0 -= 24;
634     }
635   } else { /* break z into 24-bit if necessary */
636     z = scalbn(z, -q0);
637     if (z >= two24) {
638       fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
639       iq[jz] = z - two24 * fw;
640       jz += 1;
641       q0 += 24;
642       iq[jz] = fw;
643     } else {
644       iq[jz] = z;
645     }
646   }
647 
648   /* convert integer "bit" chunk to floating-point value */
649   fw = scalbn(one, q0);
650   for (i = jz; i >= 0; i--) {
651     q[i] = fw * iq[i];
652     fw *= twon24;
653   }
654 
655   /* compute PIo2[0,...,jp]*q[jz,...,0] */
656   for (i = jz; i >= 0; i--) {
657     for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
658     fq[jz - i] = fw;
659   }
660 
661   /* compress fq[] into y[] */
662   switch (prec) {
663     case 0:
664       fw = 0.0;
665       for (i = jz; i >= 0; i--) fw += fq[i];
666       y[0] = (ih == 0) ? fw : -fw;
667       break;
668     case 1:
669     case 2:
670       fw = 0.0;
671       for (i = jz; i >= 0; i--) fw += fq[i];
672       y[0] = (ih == 0) ? fw : -fw;
673       fw = fq[0] - fw;
674       for (i = 1; i <= jz; i++) fw += fq[i];
675       y[1] = (ih == 0) ? fw : -fw;
676       break;
677     case 3: /* painful */
678       for (i = jz; i > 0; i--) {
679         fw = fq[i - 1] + fq[i];
680         fq[i] += fq[i - 1] - fw;
681         fq[i - 1] = fw;
682       }
683       for (i = jz; i > 1; i--) {
684         fw = fq[i - 1] + fq[i];
685         fq[i] += fq[i - 1] - fw;
686         fq[i - 1] = fw;
687       }
688       for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
689       if (ih == 0) {
690         y[0] = fq[0];
691         y[1] = fq[1];
692         y[2] = fw;
693       } else {
694         y[0] = -fq[0];
695         y[1] = -fq[1];
696         y[2] = -fw;
697       }
698   }
699   return n & 7;
700 }
701 
702 /* __kernel_sin( x, y, iy)
703  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
704  * Input x is assumed to be bounded by ~pi/4 in magnitude.
705  * Input y is the tail of x.
706  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
707  *
708  * Algorithm
709  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
710  *      2. if x < 2^-27 (hx<0x3E400000 0), return x with inexact if x!=0.
711  *      3. sin(x) is approximated by a polynomial of degree 13 on
712  *         [0,pi/4]
713  *                               3            13
714  *              sin(x) ~ x + S1*x + ... + S6*x
715  *         where
716  *
717  *      |sin(x)         2     4     6     8     10     12  |     -58
718  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
719  *      |  x                                               |
720  *
721  *      4. sin(x+y) = sin(x) + sin'(x')*y
722  *                  ~ sin(x) + (1-x*x/2)*y
723  *         For better accuracy, let
724  *                   3      2      2      2      2
725  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
726  *         then                   3    2
727  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
728  */
__kernel_sin(double x,double y,int iy)729 V8_INLINE double __kernel_sin(double x, double y, int iy) {
730   static const double
731       half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
732       S1 = -1.66666666666666324348e-01,  /* 0xBFC55555, 0x55555549 */
733       S2 = 8.33333333332248946124e-03,   /* 0x3F811111, 0x1110F8A6 */
734       S3 = -1.98412698298579493134e-04,  /* 0xBF2A01A0, 0x19C161D5 */
735       S4 = 2.75573137070700676789e-06,   /* 0x3EC71DE3, 0x57B1FE7D */
736       S5 = -2.50507602534068634195e-08,  /* 0xBE5AE5E6, 0x8A2B9CEB */
737       S6 = 1.58969099521155010221e-10;   /* 0x3DE5D93A, 0x5ACFD57C */
738 
739   double z, r, v;
740   int32_t ix;
741   GET_HIGH_WORD(ix, x);
742   ix &= 0x7FFFFFFF;      /* high word of x */
743   if (ix < 0x3E400000) { /* |x| < 2**-27 */
744     if (static_cast<int>(x) == 0) return x;
745   } /* generate inexact */
746   z = x * x;
747   v = z * x;
748   r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
749   if (iy == 0) {
750     return x + v * (S1 + z * r);
751   } else {
752     return x - ((z * (half * y - v * r) - y) - v * S1);
753   }
754 }
755 
756 /* __kernel_tan( x, y, k )
757  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
758  * Input x is assumed to be bounded by ~pi/4 in magnitude.
759  * Input y is the tail of x.
760  * Input k indicates whether tan (if k=1) or
761  * -1/tan (if k= -1) is returned.
762  *
763  * Algorithm
764  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
765  *      2. if x < 2^-28 (hx<0x3E300000 0), return x with inexact if x!=0.
766  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
767  *         [0,0.67434]
768  *                               3             27
769  *              tan(x) ~ x + T1*x + ... + T13*x
770  *         where
771  *
772  *              |tan(x)         2     4            26   |     -59.2
773  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
774  *              |  x                                    |
775  *
776  *         Note: tan(x+y) = tan(x) + tan'(x)*y
777  *                        ~ tan(x) + (1+x*x)*y
778  *         Therefore, for better accuracy in computing tan(x+y), let
779  *                   3      2      2       2       2
780  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
781  *         then
782  *                                  3    2
783  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
784  *
785  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
786  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
787  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
788  */
__kernel_tan(double x,double y,int iy)789 double __kernel_tan(double x, double y, int iy) {
790   static const double xxx[] = {
791       3.33333333333334091986e-01,             /* 3FD55555, 55555563 */
792       1.33333333333201242699e-01,             /* 3FC11111, 1110FE7A */
793       5.39682539762260521377e-02,             /* 3FABA1BA, 1BB341FE */
794       2.18694882948595424599e-02,             /* 3F9664F4, 8406D637 */
795       8.86323982359930005737e-03,             /* 3F8226E3, E96E8493 */
796       3.59207910759131235356e-03,             /* 3F6D6D22, C9560328 */
797       1.45620945432529025516e-03,             /* 3F57DBC8, FEE08315 */
798       5.88041240820264096874e-04,             /* 3F4344D8, F2F26501 */
799       2.46463134818469906812e-04,             /* 3F3026F7, 1A8D1068 */
800       7.81794442939557092300e-05,             /* 3F147E88, A03792A6 */
801       7.14072491382608190305e-05,             /* 3F12B80F, 32F0A7E9 */
802       -1.85586374855275456654e-05,            /* BEF375CB, DB605373 */
803       2.59073051863633712884e-05,             /* 3EFB2A70, 74BF7AD4 */
804       /* one */ 1.00000000000000000000e+00,   /* 3FF00000, 00000000 */
805       /* pio4 */ 7.85398163397448278999e-01,  /* 3FE921FB, 54442D18 */
806       /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
807   };
808 #define one xxx[13]
809 #define pio4 xxx[14]
810 #define pio4lo xxx[15]
811 #define T xxx
812 
813   double z, r, v, w, s;
814   int32_t ix, hx;
815 
816   GET_HIGH_WORD(hx, x);             /* high word of x */
817   ix = hx & 0x7FFFFFFF;             /* high word of |x| */
818   if (ix < 0x3E300000) {            /* x < 2**-28 */
819     if (static_cast<int>(x) == 0) { /* generate inexact */
820       uint32_t low;
821       GET_LOW_WORD(low, x);
822       if (((ix | low) | (iy + 1)) == 0) {
823         return one / fabs(x);
824       } else {
825         if (iy == 1) {
826           return x;
827         } else { /* compute -1 / (x+y) carefully */
828           double a, t;
829 
830           z = w = x + y;
831           SET_LOW_WORD(z, 0);
832           v = y - (z - x);
833           t = a = -one / w;
834           SET_LOW_WORD(t, 0);
835           s = one + t * z;
836           return t + a * (s + t * v);
837         }
838       }
839     }
840   }
841   if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
842     if (hx < 0) {
843       x = -x;
844       y = -y;
845     }
846     z = pio4 - x;
847     w = pio4lo - y;
848     x = z + w;
849     y = 0.0;
850   }
851   z = x * x;
852   w = z * z;
853   /*
854    * Break x^5*(T[1]+x^2*T[2]+...) into
855    * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
856    * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
857    */
858   r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
859   v = z *
860       (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
861   s = z * x;
862   r = y + z * (s * (r + v) + y);
863   r += T[0] * s;
864   w = x + r;
865   if (ix >= 0x3FE59428) {
866     v = iy;
867     return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
868   }
869   if (iy == 1) {
870     return w;
871   } else {
872     /*
873      * if allow error up to 2 ulp, simply return
874      * -1.0 / (x+r) here
875      */
876     /* compute -1.0 / (x+r) accurately */
877     double a, t;
878     z = w;
879     SET_LOW_WORD(z, 0);
880     v = r - (z - x);  /* z+v = r+x */
881     t = a = -1.0 / w; /* a = -1.0/w */
882     SET_LOW_WORD(t, 0);
883     s = 1.0 + t * z;
884     return t + a * (s + t * v);
885   }
886 
887 #undef one
888 #undef pio4
889 #undef pio4lo
890 #undef T
891 }
892 
893 }  // namespace
894 
895 /* acos(x)
896  * Method :
897  *      acos(x)  = pi/2 - asin(x)
898  *      acos(-x) = pi/2 + asin(x)
899  * For |x|<=0.5
900  *      acos(x) = pi/2 - (x + x*x^2*R(x^2))     (see asin.c)
901  * For x>0.5
902  *      acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
903  *              = 2asin(sqrt((1-x)/2))
904  *              = 2s + 2s*z*R(z)        ...z=(1-x)/2, s=sqrt(z)
905  *              = 2f + (2c + 2s*z*R(z))
906  *     where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
907  *     for f so that f+c ~ sqrt(z).
908  * For x<-0.5
909  *      acos(x) = pi - 2asin(sqrt((1-|x|)/2))
910  *              = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
911  *
912  * Special cases:
913  *      if x is NaN, return x itself;
914  *      if |x|>1, return NaN with invalid signal.
915  *
916  * Function needed: sqrt
917  */
acos(double x)918 double acos(double x) {
919   static const double
920       one = 1.00000000000000000000e+00,     /* 0x3FF00000, 0x00000000 */
921       pi = 3.14159265358979311600e+00,      /* 0x400921FB, 0x54442D18 */
922       pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
923       pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
924       pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
925       pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
926       pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
927       pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
928       pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
929       pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
930       qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
931       qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
932       qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
933       qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
934 
935   double z, p, q, r, w, s, c, df;
936   int32_t hx, ix;
937   GET_HIGH_WORD(hx, x);
938   ix = hx & 0x7FFFFFFF;
939   if (ix >= 0x3FF00000) { /* |x| >= 1 */
940     uint32_t lx;
941     GET_LOW_WORD(lx, x);
942     if (((ix - 0x3FF00000) | lx) == 0) { /* |x|==1 */
943       if (hx > 0)
944         return 0.0; /* acos(1) = 0  */
945       else
946         return pi + 2.0 * pio2_lo; /* acos(-1)= pi */
947     }
948     return (x - x) / (x - x); /* acos(|x|>1) is NaN */
949   }
950   if (ix < 0x3FE00000) {                            /* |x| < 0.5 */
951     if (ix <= 0x3C600000) return pio2_hi + pio2_lo; /*if|x|<2**-57*/
952     z = x * x;
953     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
954     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
955     r = p / q;
956     return pio2_hi - (x - (pio2_lo - x * r));
957   } else if (hx < 0) { /* x < -0.5 */
958     z = (one + x) * 0.5;
959     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
960     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
961     s = sqrt(z);
962     r = p / q;
963     w = r * s - pio2_lo;
964     return pi - 2.0 * (s + w);
965   } else { /* x > 0.5 */
966     z = (one - x) * 0.5;
967     s = sqrt(z);
968     df = s;
969     SET_LOW_WORD(df, 0);
970     c = (z - df * df) / (s + df);
971     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
972     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
973     r = p / q;
974     w = r * s + c;
975     return 2.0 * (df + w);
976   }
977 }
978 
979 /* acosh(x)
980  * Method :
981  *      Based on
982  *              acosh(x) = log [ x + sqrt(x*x-1) ]
983  *      we have
984  *              acosh(x) := log(x)+ln2, if x is large; else
985  *              acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
986  *              acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
987  *
988  * Special cases:
989  *      acosh(x) is NaN with signal if x<1.
990  *      acosh(NaN) is NaN without signal.
991  */
acosh(double x)992 double acosh(double x) {
993   static const double
994       one = 1.0,
995       ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
996   double t;
997   int32_t hx;
998   uint32_t lx;
999   EXTRACT_WORDS(hx, lx, x);
1000   if (hx < 0x3FF00000) { /* x < 1 */
1001     return (x - x) / (x - x);
1002   } else if (hx >= 0x41B00000) { /* x > 2**28 */
1003     if (hx >= 0x7FF00000) {      /* x is inf of NaN */
1004       return x + x;
1005     } else {
1006       return log(x) + ln2; /* acosh(huge)=log(2x) */
1007     }
1008   } else if (((hx - 0x3FF00000) | lx) == 0) {
1009     return 0.0;                 /* acosh(1) = 0 */
1010   } else if (hx > 0x40000000) { /* 2**28 > x > 2 */
1011     t = x * x;
1012     return log(2.0 * x - one / (x + sqrt(t - one)));
1013   } else { /* 1<x<2 */
1014     t = x - one;
1015     return log1p(t + sqrt(2.0 * t + t * t));
1016   }
1017 }
1018 
1019 /* asin(x)
1020  * Method :
1021  *      Since  asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
1022  *      we approximate asin(x) on [0,0.5] by
1023  *              asin(x) = x + x*x^2*R(x^2)
1024  *      where
1025  *              R(x^2) is a rational approximation of (asin(x)-x)/x^3
1026  *      and its remez error is bounded by
1027  *              |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
1028  *
1029  *      For x in [0.5,1]
1030  *              asin(x) = pi/2-2*asin(sqrt((1-x)/2))
1031  *      Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
1032  *      then for x>0.98
1033  *              asin(x) = pi/2 - 2*(s+s*z*R(z))
1034  *                      = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
1035  *      For x<=0.98, let pio4_hi = pio2_hi/2, then
1036  *              f = hi part of s;
1037  *              c = sqrt(z) - f = (z-f*f)/(s+f)         ...f+c=sqrt(z)
1038  *      and
1039  *              asin(x) = pi/2 - 2*(s+s*z*R(z))
1040  *                      = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
1041  *                      = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
1042  *
1043  * Special cases:
1044  *      if x is NaN, return x itself;
1045  *      if |x|>1, return NaN with invalid signal.
1046  */
asin(double x)1047 double asin(double x) {
1048   static const double
1049       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1050       huge = 1.000e+300,
1051       pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
1052       pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
1053       pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
1054                                             /* coefficient for R(x^2) */
1055       pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
1056       pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
1057       pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
1058       pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
1059       pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
1060       pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
1061       qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
1062       qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
1063       qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
1064       qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
1065 
1066   double t, w, p, q, c, r, s;
1067   int32_t hx, ix;
1068 
1069   t = 0;
1070   GET_HIGH_WORD(hx, x);
1071   ix = hx & 0x7FFFFFFF;
1072   if (ix >= 0x3FF00000) { /* |x|>= 1 */
1073     uint32_t lx;
1074     GET_LOW_WORD(lx, x);
1075     if (((ix - 0x3FF00000) | lx) == 0) /* asin(1)=+-pi/2 with inexact */
1076       return x * pio2_hi + x * pio2_lo;
1077     return (x - x) / (x - x);       /* asin(|x|>1) is NaN */
1078   } else if (ix < 0x3FE00000) {     /* |x|<0.5 */
1079     if (ix < 0x3E400000) {          /* if |x| < 2**-27 */
1080       if (huge + x > one) return x; /* return x with inexact if x!=0*/
1081     } else {
1082       t = x * x;
1083     }
1084     p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1085     q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1086     w = p / q;
1087     return x + x * w;
1088   }
1089   /* 1> |x|>= 0.5 */
1090   w = one - fabs(x);
1091   t = w * 0.5;
1092   p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1093   q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1094   s = sqrt(t);
1095   if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */
1096     w = p / q;
1097     t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
1098   } else {
1099     w = s;
1100     SET_LOW_WORD(w, 0);
1101     c = (t - w * w) / (s + w);
1102     r = p / q;
1103     p = 2.0 * s * r - (pio2_lo - 2.0 * c);
1104     q = pio4_hi - 2.0 * w;
1105     t = pio4_hi - (p - q);
1106   }
1107   if (hx > 0)
1108     return t;
1109   else
1110     return -t;
1111 }
1112 /* asinh(x)
1113  * Method :
1114  *      Based on
1115  *              asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
1116  *      we have
1117  *      asinh(x) := x  if  1+x*x=1,
1118  *               := sign(x)*(log(x)+ln2)) for large |x|, else
1119  *               := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
1120  *               := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
1121  */
asinh(double x)1122 double asinh(double x) {
1123   static const double
1124       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1125       ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
1126       huge = 1.00000000000000000000e+300;
1127 
1128   double t, w;
1129   int32_t hx, ix;
1130   GET_HIGH_WORD(hx, x);
1131   ix = hx & 0x7FFFFFFF;
1132   if (ix >= 0x7FF00000) return x + x; /* x is inf or NaN */
1133   if (ix < 0x3E300000) {              /* |x|<2**-28 */
1134     if (huge + x > one) return x;     /* return x inexact except 0 */
1135   }
1136   if (ix > 0x41B00000) { /* |x| > 2**28 */
1137     w = log(fabs(x)) + ln2;
1138   } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */
1139     t = fabs(x);
1140     w = log(2.0 * t + one / (sqrt(x * x + one) + t));
1141   } else { /* 2.0 > |x| > 2**-28 */
1142     t = x * x;
1143     w = log1p(fabs(x) + t / (one + sqrt(one + t)));
1144   }
1145   if (hx > 0) {
1146     return w;
1147   } else {
1148     return -w;
1149   }
1150 }
1151 
1152 /* atan(x)
1153  * Method
1154  *   1. Reduce x to positive by atan(x) = -atan(-x).
1155  *   2. According to the integer k=4t+0.25 chopped, t=x, the argument
1156  *      is further reduced to one of the following intervals and the
1157  *      arctangent of t is evaluated by the corresponding formula:
1158  *
1159  *      [0,7/16]      atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
1160  *      [7/16,11/16]  atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
1161  *      [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
1162  *      [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
1163  *      [39/16,INF]   atan(x) = atan(INF) + atan( -1/t )
1164  *
1165  * Constants:
1166  * The hexadecimal values are the intended ones for the following
1167  * constants. The decimal values may be used, provided that the
1168  * compiler will convert from decimal to binary accurately enough
1169  * to produce the hexadecimal values shown.
1170  */
atan(double x)1171 double atan(double x) {
1172   static const double atanhi[] = {
1173       4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
1174       7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
1175       9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1176       1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
1177   };
1178 
1179   static const double atanlo[] = {
1180       2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
1181       3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1182       1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
1183       6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
1184   };
1185 
1186   static const double aT[] = {
1187       3.33333333333329318027e-01,  /* 0x3FD55555, 0x5555550D */
1188       -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1189       1.42857142725034663711e-01,  /* 0x3FC24924, 0x920083FF */
1190       -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
1191       9.09088713343650656196e-02,  /* 0x3FB745CD, 0xC54C206E */
1192       -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
1193       6.66107313738753120669e-02,  /* 0x3FB10D66, 0xA0D03D51 */
1194       -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
1195       4.97687799461593236017e-02,  /* 0x3FA97B4B, 0x24760DEB */
1196       -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1197       1.62858201153657823623e-02,  /* 0x3F90AD3A, 0xE322DA11 */
1198   };
1199 
1200   static const double one = 1.0, huge = 1.0e300;
1201 
1202   double w, s1, s2, z;
1203   int32_t ix, hx, id;
1204 
1205   GET_HIGH_WORD(hx, x);
1206   ix = hx & 0x7FFFFFFF;
1207   if (ix >= 0x44100000) { /* if |x| >= 2^66 */
1208     uint32_t low;
1209     GET_LOW_WORD(low, x);
1210     if (ix > 0x7FF00000 || (ix == 0x7FF00000 && (low != 0)))
1211       return x + x; /* NaN */
1212     if (hx > 0)
1213       return atanhi[3] + *(volatile double *)&atanlo[3];
1214     else
1215       return -atanhi[3] - *(volatile double *)&atanlo[3];
1216   }
1217   if (ix < 0x3FDC0000) {            /* |x| < 0.4375 */
1218     if (ix < 0x3E400000) {          /* |x| < 2^-27 */
1219       if (huge + x > one) return x; /* raise inexact */
1220     }
1221     id = -1;
1222   } else {
1223     x = fabs(x);
1224     if (ix < 0x3FF30000) {   /* |x| < 1.1875 */
1225       if (ix < 0x3FE60000) { /* 7/16 <=|x|<11/16 */
1226         id = 0;
1227         x = (2.0 * x - one) / (2.0 + x);
1228       } else { /* 11/16<=|x|< 19/16 */
1229         id = 1;
1230         x = (x - one) / (x + one);
1231       }
1232     } else {
1233       if (ix < 0x40038000) { /* |x| < 2.4375 */
1234         id = 2;
1235         x = (x - 1.5) / (one + 1.5 * x);
1236       } else { /* 2.4375 <= |x| < 2^66 */
1237         id = 3;
1238         x = -1.0 / x;
1239       }
1240     }
1241   }
1242   /* end of argument reduction */
1243   z = x * x;
1244   w = z * z;
1245   /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
1246   s1 = z * (aT[0] +
1247             w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
1248   s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
1249   if (id < 0) {
1250     return x - x * (s1 + s2);
1251   } else {
1252     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
1253     return (hx < 0) ? -z : z;
1254   }
1255 }
1256 
1257 /* atan2(y,x)
1258  * Method :
1259  *  1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1260  *  2. Reduce x to positive by (if x and y are unexceptional):
1261  *    ARG (x+iy) = arctan(y/x)       ... if x > 0,
1262  *    ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
1263  *
1264  * Special cases:
1265  *
1266  *  ATAN2((anything), NaN ) is NaN;
1267  *  ATAN2(NAN , (anything) ) is NaN;
1268  *  ATAN2(+-0, +(anything but NaN)) is +-0  ;
1269  *  ATAN2(+-0, -(anything but NaN)) is +-pi ;
1270  *  ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1271  *  ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1272  *  ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1273  *  ATAN2(+-INF,+INF ) is +-pi/4 ;
1274  *  ATAN2(+-INF,-INF ) is +-3pi/4;
1275  *  ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1276  *
1277  * Constants:
1278  * The hexadecimal values are the intended ones for the following
1279  * constants. The decimal values may be used, provided that the
1280  * compiler will convert from decimal to binary accurately enough
1281  * to produce the hexadecimal values shown.
1282  */
atan2(double y,double x)1283 double atan2(double y, double x) {
1284   static volatile double tiny = 1.0e-300;
1285   static const double
1286       zero = 0.0,
1287       pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1288       pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1289       pi = 3.1415926535897931160E+00;     /* 0x400921FB, 0x54442D18 */
1290   static volatile double pi_lo =
1291       1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1292 
1293   double z;
1294   int32_t k, m, hx, hy, ix, iy;
1295   uint32_t lx, ly;
1296 
1297   EXTRACT_WORDS(hx, lx, x);
1298   ix = hx & 0x7FFFFFFF;
1299   EXTRACT_WORDS(hy, ly, y);
1300   iy = hy & 0x7FFFFFFF;
1301   if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7FF00000) ||
1302       ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7FF00000)) {
1303     return x + y; /* x or y is NaN */
1304   }
1305   if (((hx - 0x3FF00000) | lx) == 0) return atan(y); /* x=1.0 */
1306   m = ((hy >> 31) & 1) | ((hx >> 30) & 2);           /* 2*sign(x)+sign(y) */
1307 
1308   /* when y = 0 */
1309   if ((iy | ly) == 0) {
1310     switch (m) {
1311       case 0:
1312       case 1:
1313         return y; /* atan(+-0,+anything)=+-0 */
1314       case 2:
1315         return pi + tiny; /* atan(+0,-anything) = pi */
1316       case 3:
1317         return -pi - tiny; /* atan(-0,-anything) =-pi */
1318     }
1319   }
1320   /* when x = 0 */
1321   if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1322 
1323   /* when x is INF */
1324   if (ix == 0x7FF00000) {
1325     if (iy == 0x7FF00000) {
1326       switch (m) {
1327         case 0:
1328           return pi_o_4 + tiny; /* atan(+INF,+INF) */
1329         case 1:
1330           return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1331         case 2:
1332           return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1333         case 3:
1334           return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1335       }
1336     } else {
1337       switch (m) {
1338         case 0:
1339           return zero; /* atan(+...,+INF) */
1340         case 1:
1341           return -zero; /* atan(-...,+INF) */
1342         case 2:
1343           return pi + tiny; /* atan(+...,-INF) */
1344         case 3:
1345           return -pi - tiny; /* atan(-...,-INF) */
1346       }
1347     }
1348   }
1349   /* when y is INF */
1350   if (iy == 0x7FF00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1351 
1352   /* compute y/x */
1353   k = (iy - ix) >> 20;
1354   if (k > 60) { /* |y/x| >  2**60 */
1355     z = pi_o_2 + 0.5 * pi_lo;
1356     m &= 1;
1357   } else if (hx < 0 && k < -60) {
1358     z = 0.0; /* 0 > |y|/x > -2**-60 */
1359   } else {
1360     z = atan(fabs(y / x)); /* safe to do y/x */
1361   }
1362   switch (m) {
1363     case 0:
1364       return z; /* atan(+,+) */
1365     case 1:
1366       return -z; /* atan(-,+) */
1367     case 2:
1368       return pi - (z - pi_lo); /* atan(+,-) */
1369     default:                   /* case 3 */
1370       return (z - pi_lo) - pi; /* atan(-,-) */
1371   }
1372 }
1373 
1374 /* cos(x)
1375  * Return cosine function of x.
1376  *
1377  * kernel function:
1378  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
1379  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
1380  *      __ieee754_rem_pio2      ... argument reduction routine
1381  *
1382  * Method.
1383  *      Let S,C and T denote the sin, cos and tan respectively on
1384  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1385  *      in [-pi/4 , +pi/4], and let n = k mod 4.
1386  *      We have
1387  *
1388  *          n        sin(x)      cos(x)        tan(x)
1389  *     ----------------------------------------------------------
1390  *          0          S           C             T
1391  *          1          C          -S            -1/T
1392  *          2         -S          -C             T
1393  *          3         -C           S            -1/T
1394  *     ----------------------------------------------------------
1395  *
1396  * Special cases:
1397  *      Let trig be any of sin, cos, or tan.
1398  *      trig(+-INF)  is NaN, with signals;
1399  *      trig(NaN)    is that NaN;
1400  *
1401  * Accuracy:
1402  *      TRIG(x) returns trig(x) nearly rounded
1403  */
cos(double x)1404 double cos(double x) {
1405   double y[2], z = 0.0;
1406   int32_t n, ix;
1407 
1408   /* High word of x. */
1409   GET_HIGH_WORD(ix, x);
1410 
1411   /* |x| ~< pi/4 */
1412   ix &= 0x7FFFFFFF;
1413   if (ix <= 0x3FE921FB) {
1414     return __kernel_cos(x, z);
1415   } else if (ix >= 0x7FF00000) {
1416     /* cos(Inf or NaN) is NaN */
1417     return x - x;
1418   } else {
1419     /* argument reduction needed */
1420     n = __ieee754_rem_pio2(x, y);
1421     switch (n & 3) {
1422       case 0:
1423         return __kernel_cos(y[0], y[1]);
1424       case 1:
1425         return -__kernel_sin(y[0], y[1], 1);
1426       case 2:
1427         return -__kernel_cos(y[0], y[1]);
1428       default:
1429         return __kernel_sin(y[0], y[1], 1);
1430     }
1431   }
1432 }
1433 
1434 /* exp(x)
1435  * Returns the exponential of x.
1436  *
1437  * Method
1438  *   1. Argument reduction:
1439  *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1440  *      Given x, find r and integer k such that
1441  *
1442  *               x = k*ln2 + r,  |r| <= 0.5*ln2.
1443  *
1444  *      Here r will be represented as r = hi-lo for better
1445  *      accuracy.
1446  *
1447  *   2. Approximation of exp(r) by a special rational function on
1448  *      the interval [0,0.34658]:
1449  *      Write
1450  *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1451  *      We use a special Remes algorithm on [0,0.34658] to generate
1452  *      a polynomial of degree 5 to approximate R. The maximum error
1453  *      of this polynomial approximation is bounded by 2**-59. In
1454  *      other words,
1455  *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1456  *      (where z=r*r, and the values of P1 to P5 are listed below)
1457  *      and
1458  *          |                  5          |     -59
1459  *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
1460  *          |                             |
1461  *      The computation of exp(r) thus becomes
1462  *                             2*r
1463  *              exp(r) = 1 + -------
1464  *                            R - r
1465  *                                 r*R1(r)
1466  *                     = 1 + r + ----------- (for better accuracy)
1467  *                                2 - R1(r)
1468  *      where
1469  *                               2       4             10
1470  *              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
1471  *
1472  *   3. Scale back to obtain exp(x):
1473  *      From step 1, we have
1474  *         exp(x) = 2^k * exp(r)
1475  *
1476  * Special cases:
1477  *      exp(INF) is INF, exp(NaN) is NaN;
1478  *      exp(-INF) is 0, and
1479  *      for finite argument, only exp(0)=1 is exact.
1480  *
1481  * Accuracy:
1482  *      according to an error analysis, the error is always less than
1483  *      1 ulp (unit in the last place).
1484  *
1485  * Misc. info.
1486  *      For IEEE double
1487  *          if x >  7.09782712893383973096e+02 then exp(x) overflow
1488  *          if x < -7.45133219101941108420e+02 then exp(x) underflow
1489  *
1490  * Constants:
1491  * The hexadecimal values are the intended ones for the following
1492  * constants. The decimal values may be used, provided that the
1493  * compiler will convert from decimal to binary accurately enough
1494  * to produce the hexadecimal values shown.
1495  */
exp(double x)1496 double exp(double x) {
1497   static const double
1498       one = 1.0,
1499       halF[2] = {0.5, -0.5},
1500       o_threshold = 7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
1501       u_threshold = -7.45133219101941108420e+02, /* 0xC0874910, 0xD52D3051 */
1502       ln2HI[2] = {6.93147180369123816490e-01,    /* 0x3FE62E42, 0xFEE00000 */
1503                   -6.93147180369123816490e-01},  /* 0xBFE62E42, 0xFEE00000 */
1504       ln2LO[2] = {1.90821492927058770002e-10,    /* 0x3DEA39EF, 0x35793C76 */
1505                   -1.90821492927058770002e-10},  /* 0xBDEA39EF, 0x35793C76 */
1506       invln2 = 1.44269504088896338700e+00,       /* 0x3FF71547, 0x652B82FE */
1507       P1 = 1.66666666666666019037e-01,           /* 0x3FC55555, 0x5555553E */
1508       P2 = -2.77777777770155933842e-03,          /* 0xBF66C16C, 0x16BEBD93 */
1509       P3 = 6.61375632143793436117e-05,           /* 0x3F11566A, 0xAF25DE2C */
1510       P4 = -1.65339022054652515390e-06,          /* 0xBEBBBD41, 0xC5D26BF1 */
1511       P5 = 4.13813679705723846039e-08,           /* 0x3E663769, 0x72BEA4D0 */
1512       E = 2.718281828459045;                     /* 0x4005BF0A, 0x8B145769 */
1513 
1514   static volatile double
1515       huge = 1.0e+300,
1516       twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1517       two1023 = 8.988465674311579539e307;     /* 0x1p1023 */
1518 
1519   double y, hi = 0.0, lo = 0.0, c, t, twopk;
1520   int32_t k = 0, xsb;
1521   uint32_t hx;
1522 
1523   GET_HIGH_WORD(hx, x);
1524   xsb = (hx >> 31) & 1; /* sign bit of x */
1525   hx &= 0x7FFFFFFF;     /* high word of |x| */
1526 
1527   /* filter out non-finite argument */
1528   if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1529     if (hx >= 0x7FF00000) {
1530       uint32_t lx;
1531       GET_LOW_WORD(lx, x);
1532       if (((hx & 0xFFFFF) | lx) != 0)
1533         return x + x; /* NaN */
1534       else
1535         return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1536     }
1537     if (x > o_threshold) return huge * huge;         /* overflow */
1538     if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1539   }
1540 
1541   /* argument reduction */
1542   if (hx > 0x3FD62E42) {   /* if  |x| > 0.5 ln2 */
1543     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1544       /* TODO(rtoy): We special case exp(1) here to return the correct
1545        * value of E, as the computation below would get the last bit
1546        * wrong. We should probably fix the algorithm instead.
1547        */
1548       if (x == 1.0) return E;
1549       hi = x - ln2HI[xsb];
1550       lo = ln2LO[xsb];
1551       k = 1 - xsb - xsb;
1552     } else {
1553       k = static_cast<int>(invln2 * x + halF[xsb]);
1554       t = k;
1555       hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1556       lo = t * ln2LO[0];
1557     }
1558     STRICT_ASSIGN(double, x, hi - lo);
1559   } else if (hx < 0x3E300000) {         /* when |x|<2**-28 */
1560     if (huge + x > one) return one + x; /* trigger inexact */
1561   } else {
1562     k = 0;
1563   }
1564 
1565   /* x is now in primary range */
1566   t = x * x;
1567   if (k >= -1021) {
1568     INSERT_WORDS(twopk, 0x3FF00000 + (k << 20), 0);
1569   } else {
1570     INSERT_WORDS(twopk, 0x3FF00000 + ((k + 1000) << 20), 0);
1571   }
1572   c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1573   if (k == 0) {
1574     return one - ((x * c) / (c - 2.0) - x);
1575   } else {
1576     y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1577   }
1578   if (k >= -1021) {
1579     if (k == 1024) return y * 2.0 * two1023;
1580     return y * twopk;
1581   } else {
1582     return y * twopk * twom1000;
1583   }
1584 }
1585 
1586 /*
1587  * Method :
1588  *    1.Reduced x to positive by atanh(-x) = -atanh(x)
1589  *    2.For x>=0.5
1590  *              1              2x                          x
1591  *  atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1592  *              2             1 - x                      1 - x
1593  *
1594  *   For x<0.5
1595  *  atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1596  *
1597  * Special cases:
1598  *  atanh(x) is NaN if |x| > 1 with signal;
1599  *  atanh(NaN) is that NaN with no signal;
1600  *  atanh(+-1) is +-INF with signal.
1601  *
1602  */
atanh(double x)1603 double atanh(double x) {
1604   static const double one = 1.0, huge = 1e300;
1605   static const double zero = 0.0;
1606 
1607   double t;
1608   int32_t hx, ix;
1609   uint32_t lx;
1610   EXTRACT_WORDS(hx, lx, x);
1611   ix = hx & 0x7FFFFFFF;
1612   if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3FF00000) /* |x|>1 */
1613     return (x - x) / (x - x);
1614   if (ix == 0x3FF00000) return x / zero;
1615   if (ix < 0x3E300000 && (huge + x) > zero) return x; /* x<2**-28 */
1616   SET_HIGH_WORD(x, ix);
1617   if (ix < 0x3FE00000) { /* x < 0.5 */
1618     t = x + x;
1619     t = 0.5 * log1p(t + t * x / (one - x));
1620   } else {
1621     t = 0.5 * log1p((x + x) / (one - x));
1622   }
1623   if (hx >= 0)
1624     return t;
1625   else
1626     return -t;
1627 }
1628 
1629 /* log(x)
1630  * Return the logrithm of x
1631  *
1632  * Method :
1633  *   1. Argument Reduction: find k and f such that
1634  *     x = 2^k * (1+f),
1635  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1636  *
1637  *   2. Approximation of log(1+f).
1638  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1639  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1640  *         = 2s + s*R
1641  *      We use a special Reme algorithm on [0,0.1716] to generate
1642  *  a polynomial of degree 14 to approximate R The maximum error
1643  *  of this polynomial approximation is bounded by 2**-58.45. In
1644  *  other words,
1645  *            2      4      6      8      10      12      14
1646  *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1647  *    (the values of Lg1 to Lg7 are listed in the program)
1648  *  and
1649  *      |      2          14          |     -58.45
1650  *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1651  *      |                             |
1652  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1653  *  In order to guarantee error in log below 1ulp, we compute log
1654  *  by
1655  *    log(1+f) = f - s*(f - R)  (if f is not too large)
1656  *    log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1657  *
1658  *  3. Finally,  log(x) = k*ln2 + log(1+f).
1659  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1660  *     Here ln2 is split into two floating point number:
1661  *      ln2_hi + ln2_lo,
1662  *     where n*ln2_hi is always exact for |n| < 2000.
1663  *
1664  * Special cases:
1665  *  log(x) is NaN with signal if x < 0 (including -INF) ;
1666  *  log(+INF) is +INF; log(0) is -INF with signal;
1667  *  log(NaN) is that NaN with no signal.
1668  *
1669  * Accuracy:
1670  *  according to an error analysis, the error is always less than
1671  *  1 ulp (unit in the last place).
1672  *
1673  * Constants:
1674  * The hexadecimal values are the intended ones for the following
1675  * constants. The decimal values may be used, provided that the
1676  * compiler will convert from decimal to binary accurately enough
1677  * to produce the hexadecimal values shown.
1678  */
log(double x)1679 double log(double x) {
1680   static const double                      /* -- */
1681       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1682       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1683       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1684       Lg1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1685       Lg2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1686       Lg3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1687       Lg4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1688       Lg5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1689       Lg6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1690       Lg7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1691 
1692   static const double zero = 0.0;
1693   static volatile double vzero = 0.0;
1694 
1695   double hfsq, f, s, z, R, w, t1, t2, dk;
1696   int32_t k, hx, i, j;
1697   uint32_t lx;
1698 
1699   EXTRACT_WORDS(hx, lx, x);
1700 
1701   k = 0;
1702   if (hx < 0x00100000) { /* x < 2**-1022  */
1703     if (((hx & 0x7FFFFFFF) | lx) == 0)
1704       return -two54 / vzero;           /* log(+-0)=-inf */
1705     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1706     k -= 54;
1707     x *= two54; /* subnormal number, scale up x */
1708     GET_HIGH_WORD(hx, x);
1709   }
1710   if (hx >= 0x7FF00000) return x + x;
1711   k += (hx >> 20) - 1023;
1712   hx &= 0x000FFFFF;
1713   i = (hx + 0x95F64) & 0x100000;
1714   SET_HIGH_WORD(x, hx | (i ^ 0x3FF00000)); /* normalize x or x/2 */
1715   k += (i >> 20);
1716   f = x - 1.0;
1717   if ((0x000FFFFF & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1718     if (f == zero) {
1719       if (k == 0) {
1720         return zero;
1721       } else {
1722         dk = static_cast<double>(k);
1723         return dk * ln2_hi + dk * ln2_lo;
1724       }
1725     }
1726     R = f * f * (0.5 - 0.33333333333333333 * f);
1727     if (k == 0) {
1728       return f - R;
1729     } else {
1730       dk = static_cast<double>(k);
1731       return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1732     }
1733   }
1734   s = f / (2.0 + f);
1735   dk = static_cast<double>(k);
1736   z = s * s;
1737   i = hx - 0x6147A;
1738   w = z * z;
1739   j = 0x6B851 - hx;
1740   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1741   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1742   i |= j;
1743   R = t2 + t1;
1744   if (i > 0) {
1745     hfsq = 0.5 * f * f;
1746     if (k == 0)
1747       return f - (hfsq - s * (hfsq + R));
1748     else
1749       return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1750   } else {
1751     if (k == 0)
1752       return f - s * (f - R);
1753     else
1754       return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1755   }
1756 }
1757 
1758 /* double log1p(double x)
1759  *
1760  * Method :
1761  *   1. Argument Reduction: find k and f such that
1762  *      1+x = 2^k * (1+f),
1763  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1764  *
1765  *      Note. If k=0, then f=x is exact. However, if k!=0, then f
1766  *  may not be representable exactly. In that case, a correction
1767  *  term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1768  *  log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1769  *  and add back the correction term c/u.
1770  *  (Note: when x > 2**53, one can simply return log(x))
1771  *
1772  *   2. Approximation of log1p(f).
1773  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1774  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1775  *         = 2s + s*R
1776  *      We use a special Reme algorithm on [0,0.1716] to generate
1777  *  a polynomial of degree 14 to approximate R The maximum error
1778  *  of this polynomial approximation is bounded by 2**-58.45. In
1779  *  other words,
1780  *            2      4      6      8      10      12      14
1781  *      R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
1782  *    (the values of Lp1 to Lp7 are listed in the program)
1783  *  and
1784  *      |      2          14          |     -58.45
1785  *      | Lp1*s +...+Lp7*s    -  R(z) | <= 2
1786  *      |                             |
1787  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1788  *  In order to guarantee error in log below 1ulp, we compute log
1789  *  by
1790  *    log1p(f) = f - (hfsq - s*(hfsq+R)).
1791  *
1792  *  3. Finally, log1p(x) = k*ln2 + log1p(f).
1793  *           = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1794  *     Here ln2 is split into two floating point number:
1795  *      ln2_hi + ln2_lo,
1796  *     where n*ln2_hi is always exact for |n| < 2000.
1797  *
1798  * Special cases:
1799  *  log1p(x) is NaN with signal if x < -1 (including -INF) ;
1800  *  log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1801  *  log1p(NaN) is that NaN with no signal.
1802  *
1803  * Accuracy:
1804  *  according to an error analysis, the error is always less than
1805  *  1 ulp (unit in the last place).
1806  *
1807  * Constants:
1808  * The hexadecimal values are the intended ones for the following
1809  * constants. The decimal values may be used, provided that the
1810  * compiler will convert from decimal to binary accurately enough
1811  * to produce the hexadecimal values shown.
1812  *
1813  * Note: Assuming log() return accurate answer, the following
1814  *   algorithm can be used to compute log1p(x) to within a few ULP:
1815  *
1816  *    u = 1+x;
1817  *    if(u==1.0) return x ; else
1818  *         return log(u)*(x/(u-1.0));
1819  *
1820  *   See HP-15C Advanced Functions Handbook, p.193.
1821  */
log1p(double x)1822 double log1p(double x) {
1823   static const double                      /* -- */
1824       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1825       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1826       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1827       Lp1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1828       Lp2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1829       Lp3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1830       Lp4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1831       Lp5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1832       Lp6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1833       Lp7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1834 
1835   static const double zero = 0.0;
1836   static volatile double vzero = 0.0;
1837 
1838   double hfsq, f, c, s, z, R, u;
1839   int32_t k, hx, hu, ax;
1840 
1841   GET_HIGH_WORD(hx, x);
1842   ax = hx & 0x7FFFFFFF;
1843 
1844   k = 1;
1845   if (hx < 0x3FDA827A) {    /* 1+x < sqrt(2)+ */
1846     if (ax >= 0x3FF00000) { /* x <= -1.0 */
1847       if (x == -1.0)
1848         return -two54 / vzero; /* log1p(-1)=+inf */
1849       else
1850         return (x - x) / (x - x); /* log1p(x<-1)=NaN */
1851     }
1852     if (ax < 0x3E200000) {    /* |x| < 2**-29 */
1853       if (two54 + x > zero    /* raise inexact */
1854           && ax < 0x3C900000) /* |x| < 2**-54 */
1855         return x;
1856       else
1857         return x - x * x * 0.5;
1858     }
1859     if (hx > 0 || hx <= static_cast<int32_t>(0xBFD2BEC4)) {
1860       k = 0;
1861       f = x;
1862       hu = 1;
1863     } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1864   }
1865   if (hx >= 0x7FF00000) return x + x;
1866   if (k != 0) {
1867     if (hx < 0x43400000) {
1868       STRICT_ASSIGN(double, u, 1.0 + x);
1869       GET_HIGH_WORD(hu, u);
1870       k = (hu >> 20) - 1023;
1871       c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1872       c /= u;
1873     } else {
1874       u = x;
1875       GET_HIGH_WORD(hu, u);
1876       k = (hu >> 20) - 1023;
1877       c = 0;
1878     }
1879     hu &= 0x000FFFFF;
1880     /*
1881      * The approximation to sqrt(2) used in thresholds is not
1882      * critical.  However, the ones used above must give less
1883      * strict bounds than the one here so that the k==0 case is
1884      * never reached from here, since here we have committed to
1885      * using the correction term but don't use it if k==0.
1886      */
1887     if (hu < 0x6A09E) {                  /* u ~< sqrt(2) */
1888       SET_HIGH_WORD(u, hu | 0x3FF00000); /* normalize u */
1889     } else {
1890       k += 1;
1891       SET_HIGH_WORD(u, hu | 0x3FE00000); /* normalize u/2 */
1892       hu = (0x00100000 - hu) >> 2;
1893     }
1894     f = u - 1.0;
1895   }
1896   hfsq = 0.5 * f * f;
1897   if (hu == 0) { /* |f| < 2**-20 */
1898     if (f == zero) {
1899       if (k == 0) {
1900         return zero;
1901       } else {
1902         c += k * ln2_lo;
1903         return k * ln2_hi + c;
1904       }
1905     }
1906     R = hfsq * (1.0 - 0.66666666666666666 * f);
1907     if (k == 0)
1908       return f - R;
1909     else
1910       return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1911   }
1912   s = f / (2.0 + f);
1913   z = s * s;
1914   R = z * (Lp1 +
1915            z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1916   if (k == 0)
1917     return f - (hfsq - s * (hfsq + R));
1918   else
1919     return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1920 }
1921 
1922 /*
1923  * k_log1p(f):
1924  * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1925  *
1926  * The following describes the overall strategy for computing
1927  * logarithms in base e.  The argument reduction and adding the final
1928  * term of the polynomial are done by the caller for increased accuracy
1929  * when different bases are used.
1930  *
1931  * Method :
1932  *   1. Argument Reduction: find k and f such that
1933  *         x = 2^k * (1+f),
1934  *         where  sqrt(2)/2 < 1+f < sqrt(2) .
1935  *
1936  *   2. Approximation of log(1+f).
1937  *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1938  *            = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1939  *            = 2s + s*R
1940  *      We use a special Reme algorithm on [0,0.1716] to generate
1941  *      a polynomial of degree 14 to approximate R The maximum error
1942  *      of this polynomial approximation is bounded by 2**-58.45. In
1943  *      other words,
1944  *          2      4      6      8      10      12      14
1945  *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1946  *      (the values of Lg1 to Lg7 are listed in the program)
1947  *      and
1948  *          |      2          14          |     -58.45
1949  *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1950  *          |                             |
1951  *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1952  *      In order to guarantee error in log below 1ulp, we compute log
1953  *      by
1954  *          log(1+f) = f - s*(f - R)            (if f is not too large)
1955  *          log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1956  *
1957  *   3. Finally,  log(x) = k*ln2 + log(1+f).
1958  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1959  *      Here ln2 is split into two floating point number:
1960  *          ln2_hi + ln2_lo,
1961  *      where n*ln2_hi is always exact for |n| < 2000.
1962  *
1963  * Special cases:
1964  *      log(x) is NaN with signal if x < 0 (including -INF) ;
1965  *      log(+INF) is +INF; log(0) is -INF with signal;
1966  *      log(NaN) is that NaN with no signal.
1967  *
1968  * Accuracy:
1969  *      according to an error analysis, the error is always less than
1970  *      1 ulp (unit in the last place).
1971  *
1972  * Constants:
1973  * The hexadecimal values are the intended ones for the following
1974  * constants. The decimal values may be used, provided that the
1975  * compiler will convert from decimal to binary accurately enough
1976  * to produce the hexadecimal values shown.
1977  */
1978 
1979 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1980     Lg2 = 3.999999999940941908e-01,                 /* 3FD99999 9997FA04 */
1981     Lg3 = 2.857142874366239149e-01,                 /* 3FD24924 94229359 */
1982     Lg4 = 2.222219843214978396e-01,                 /* 3FCC71C5 1D8E78AF */
1983     Lg5 = 1.818357216161805012e-01,                 /* 3FC74664 96CB03DE */
1984     Lg6 = 1.531383769920937332e-01,                 /* 3FC39A09 D078C69F */
1985     Lg7 = 1.479819860511658591e-01;                 /* 3FC2F112 DF3E5244 */
1986 
1987 /*
1988  * We always inline k_log1p(), since doing so produces a
1989  * substantial performance improvement (~40% on amd64).
1990  */
k_log1p(double f)1991 static inline double k_log1p(double f) {
1992   double hfsq, s, z, R, w, t1, t2;
1993 
1994   s = f / (2.0 + f);
1995   z = s * s;
1996   w = z * z;
1997   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1998   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1999   R = t2 + t1;
2000   hfsq = 0.5 * f * f;
2001   return s * (hfsq + R);
2002 }
2003 
2004 /*
2005  * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
2006  * comments.
2007  *
2008  * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
2009  * then does the combining and scaling steps
2010  *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
2011  * in not-quite-routine extra precision.
2012  */
log2(double x)2013 double log2(double x) {
2014   static const double
2015       two54 = 1.80143985094819840000e+16,   /* 0x43500000, 0x00000000 */
2016       ivln2hi = 1.44269504072144627571e+00, /* 0x3FF71547, 0x65200000 */
2017       ivln2lo = 1.67517131648865118353e-10; /* 0x3DE705FC, 0x2EEFA200 */
2018 
2019   static const double zero = 0.0;
2020   static volatile double vzero = 0.0;
2021 
2022   double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
2023   int32_t i, k, hx;
2024   uint32_t lx;
2025 
2026   EXTRACT_WORDS(hx, lx, x);
2027 
2028   k = 0;
2029   if (hx < 0x00100000) { /* x < 2**-1022  */
2030     if (((hx & 0x7FFFFFFF) | lx) == 0)
2031       return -two54 / vzero;           /* log(+-0)=-inf */
2032     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2033     k -= 54;
2034     x *= two54; /* subnormal number, scale up x */
2035     GET_HIGH_WORD(hx, x);
2036   }
2037   if (hx >= 0x7FF00000) return x + x;
2038   if (hx == 0x3FF00000 && lx == 0) return zero; /* log(1) = +0 */
2039   k += (hx >> 20) - 1023;
2040   hx &= 0x000FFFFF;
2041   i = (hx + 0x95F64) & 0x100000;
2042   SET_HIGH_WORD(x, hx | (i ^ 0x3FF00000)); /* normalize x or x/2 */
2043   k += (i >> 20);
2044   y = static_cast<double>(k);
2045   f = x - 1.0;
2046   hfsq = 0.5 * f * f;
2047   r = k_log1p(f);
2048 
2049   /*
2050    * f-hfsq must (for args near 1) be evaluated in extra precision
2051    * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
2052    * This is fairly efficient since f-hfsq only depends on f, so can
2053    * be evaluated in parallel with R.  Not combining hfsq with R also
2054    * keeps R small (though not as small as a true `lo' term would be),
2055    * so that extra precision is not needed for terms involving R.
2056    *
2057    * Compiler bugs involving extra precision used to break Dekker's
2058    * theorem for spitting f-hfsq as hi+lo, unless double_t was used
2059    * or the multi-precision calculations were avoided when double_t
2060    * has extra precision.  These problems are now automatically
2061    * avoided as a side effect of the optimization of combining the
2062    * Dekker splitting step with the clear-low-bits step.
2063    *
2064    * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
2065    * precision to avoid a very large cancellation when x is very near
2066    * these values.  Unlike the above cancellations, this problem is
2067    * specific to base 2.  It is strange that adding +-1 is so much
2068    * harder than adding +-ln2 or +-log10_2.
2069    *
2070    * This uses Dekker's theorem to normalize y+val_hi, so the
2071    * compiler bugs are back in some configurations, sigh.  And I
2072    * don't want to used double_t to avoid them, since that gives a
2073    * pessimization and the support for avoiding the pessimization
2074    * is not yet available.
2075    *
2076    * The multi-precision calculations for the multiplications are
2077    * routine.
2078    */
2079   hi = f - hfsq;
2080   SET_LOW_WORD(hi, 0);
2081   lo = (f - hi) - hfsq + r;
2082   val_hi = hi * ivln2hi;
2083   val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
2084 
2085   /* spadd(val_hi, val_lo, y), except for not using double_t: */
2086   w = y + val_hi;
2087   val_lo += (y - w) + val_hi;
2088   val_hi = w;
2089 
2090   return val_lo + val_hi;
2091 }
2092 
2093 /*
2094  * Return the base 10 logarithm of x
2095  *
2096  * Method :
2097  *      Let log10_2hi = leading 40 bits of log10(2) and
2098  *          log10_2lo = log10(2) - log10_2hi,
2099  *          ivln10   = 1/log(10) rounded.
2100  *      Then
2101  *              n = ilogb(x),
2102  *              if(n<0)  n = n+1;
2103  *              x = scalbn(x,-n);
2104  *              log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
2105  *
2106  *  Note 1:
2107  *     To guarantee log10(10**n)=n, where 10**n is normal, the rounding
2108  *     mode must set to Round-to-Nearest.
2109  *  Note 2:
2110  *      [1/log(10)] rounded to 53 bits has error .198 ulps;
2111  *      log10 is monotonic at all binary break points.
2112  *
2113  *  Special cases:
2114  *      log10(x) is NaN if x < 0;
2115  *      log10(+INF) is +INF; log10(0) is -INF;
2116  *      log10(NaN) is that NaN;
2117  *      log10(10**N) = N  for N=0,1,...,22.
2118  */
log10(double x)2119 double log10(double x) {
2120   static const double
2121       two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2122       ivln10 = 4.34294481903251816668e-01,
2123       log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
2124       log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
2125 
2126   static const double zero = 0.0;
2127   static volatile double vzero = 0.0;
2128 
2129   double y;
2130   int32_t i, k, hx;
2131   uint32_t lx;
2132 
2133   EXTRACT_WORDS(hx, lx, x);
2134 
2135   k = 0;
2136   if (hx < 0x00100000) { /* x < 2**-1022  */
2137     if (((hx & 0x7FFFFFFF) | lx) == 0)
2138       return -two54 / vzero;           /* log(+-0)=-inf */
2139     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2140     k -= 54;
2141     x *= two54; /* subnormal number, scale up x */
2142     GET_HIGH_WORD(hx, x);
2143     GET_LOW_WORD(lx, x);
2144   }
2145   if (hx >= 0x7FF00000) return x + x;
2146   if (hx == 0x3FF00000 && lx == 0) return zero; /* log(1) = +0 */
2147   k += (hx >> 20) - 1023;
2148 
2149   i = (k & 0x80000000) >> 31;
2150   hx = (hx & 0x000FFFFF) | ((0x3FF - i) << 20);
2151   y = k + i;
2152   SET_HIGH_WORD(x, hx);
2153   SET_LOW_WORD(x, lx);
2154 
2155   double z = y * log10_2lo + ivln10 * log(x);
2156   return z + y * log10_2hi;
2157 }
2158 
2159 /* expm1(x)
2160  * Returns exp(x)-1, the exponential of x minus 1.
2161  *
2162  * Method
2163  *   1. Argument reduction:
2164  *  Given x, find r and integer k such that
2165  *
2166  *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
2167  *
2168  *      Here a correction term c will be computed to compensate
2169  *  the error in r when rounded to a floating-point number.
2170  *
2171  *   2. Approximating expm1(r) by a special rational function on
2172  *  the interval [0,0.34658]:
2173  *  Since
2174  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
2175  *  we define R1(r*r) by
2176  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
2177  *  That is,
2178  *      R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
2179  *         = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
2180  *         = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
2181  *      We use a special Reme algorithm on [0,0.347] to generate
2182  *   a polynomial of degree 5 in r*r to approximate R1. The
2183  *  maximum error of this polynomial approximation is bounded
2184  *  by 2**-61. In other words,
2185  *      R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
2186  *  where   Q1  =  -1.6666666666666567384E-2,
2187  *     Q2  =   3.9682539681370365873E-4,
2188  *     Q3  =  -9.9206344733435987357E-6,
2189  *     Q4  =   2.5051361420808517002E-7,
2190  *     Q5  =  -6.2843505682382617102E-9;
2191  *    z   =  r*r,
2192  *  with error bounded by
2193  *      |                  5           |     -61
2194  *      | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
2195  *      |                              |
2196  *
2197  *  expm1(r) = exp(r)-1 is then computed by the following
2198  *   specific way which minimize the accumulation rounding error:
2199  *             2     3
2200  *            r     r    [ 3 - (R1 + R1*r/2)  ]
2201  *        expm1(r) = r + --- + --- * [--------------------]
2202  *                  2     2    [ 6 - r*(3 - R1*r/2) ]
2203  *
2204  *  To compensate the error in the argument reduction, we use
2205  *    expm1(r+c) = expm1(r) + c + expm1(r)*c
2206  *         ~ expm1(r) + c + r*c
2207  *  Thus c+r*c will be added in as the correction terms for
2208  *  expm1(r+c). Now rearrange the term to avoid optimization
2209  *   screw up:
2210  *            (      2                                    2 )
2211  *            ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
2212  *   expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
2213  *                  ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
2214  *                      (                                             )
2215  *
2216  *       = r - E
2217  *   3. Scale back to obtain expm1(x):
2218  *  From step 1, we have
2219  *     expm1(x) = either 2^k*[expm1(r)+1] - 1
2220  *        = or     2^k*[expm1(r) + (1-2^-k)]
2221  *   4. Implementation notes:
2222  *  (A). To save one multiplication, we scale the coefficient Qi
2223  *       to Qi*2^i, and replace z by (x^2)/2.
2224  *  (B). To achieve maximum accuracy, we compute expm1(x) by
2225  *    (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
2226  *    (ii)  if k=0, return r-E
2227  *    (iii) if k=-1, return 0.5*(r-E)-0.5
2228  *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
2229  *                  else       return  1.0+2.0*(r-E);
2230  *    (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
2231  *    (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
2232  *    (vii) return 2^k(1-((E+2^-k)-r))
2233  *
2234  * Special cases:
2235  *  expm1(INF) is INF, expm1(NaN) is NaN;
2236  *  expm1(-INF) is -1, and
2237  *  for finite argument, only expm1(0)=0 is exact.
2238  *
2239  * Accuracy:
2240  *  according to an error analysis, the error is always less than
2241  *  1 ulp (unit in the last place).
2242  *
2243  * Misc. info.
2244  *  For IEEE double
2245  *      if x >  7.09782712893383973096e+02 then expm1(x) overflow
2246  *
2247  * Constants:
2248  * The hexadecimal values are the intended ones for the following
2249  * constants. The decimal values may be used, provided that the
2250  * compiler will convert from decimal to binary accurately enough
2251  * to produce the hexadecimal values shown.
2252  */
expm1(double x)2253 double expm1(double x) {
2254   static const double
2255       one = 1.0,
2256       tiny = 1.0e-300,
2257       o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2258       ln2_hi = 6.93147180369123816490e-01,      /* 0x3FE62E42, 0xFEE00000 */
2259       ln2_lo = 1.90821492927058770002e-10,      /* 0x3DEA39EF, 0x35793C76 */
2260       invln2 = 1.44269504088896338700e+00,      /* 0x3FF71547, 0x652B82FE */
2261       /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2262          x*x/2: */
2263       Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2264       Q2 = 1.58730158725481460165e-03,  /* 3F5A01A0 19FE5585 */
2265       Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2266       Q4 = 4.00821782732936239552e-06,  /* 3ED0CFCA 86E65239 */
2267       Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2268 
2269   static volatile double huge = 1.0e+300;
2270 
2271   double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2272   int32_t k, xsb;
2273   uint32_t hx;
2274 
2275   GET_HIGH_WORD(hx, x);
2276   xsb = hx & 0x80000000; /* sign bit of x */
2277   hx &= 0x7FFFFFFF;      /* high word of |x| */
2278 
2279   /* filter out huge and non-finite argument */
2280   if (hx >= 0x4043687A) {   /* if |x|>=56*ln2 */
2281     if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2282       if (hx >= 0x7FF00000) {
2283         uint32_t low;
2284         GET_LOW_WORD(low, x);
2285         if (((hx & 0xFFFFF) | low) != 0)
2286           return x + x; /* NaN */
2287         else
2288           return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2289       }
2290       if (x > o_threshold) return huge * huge; /* overflow */
2291     }
2292     if (xsb != 0) {        /* x < -56*ln2, return -1.0 with inexact */
2293       if (x + tiny < 0.0)  /* raise inexact */
2294         return tiny - one; /* return -1 */
2295     }
2296   }
2297 
2298   /* argument reduction */
2299   if (hx > 0x3FD62E42) {   /* if  |x| > 0.5 ln2 */
2300     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2301       if (xsb == 0) {
2302         hi = x - ln2_hi;
2303         lo = ln2_lo;
2304         k = 1;
2305       } else {
2306         hi = x + ln2_hi;
2307         lo = -ln2_lo;
2308         k = -1;
2309       }
2310     } else {
2311       k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2312       t = k;
2313       hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2314       lo = t * ln2_lo;
2315     }
2316     STRICT_ASSIGN(double, x, hi - lo);
2317     c = (hi - x) - lo;
2318   } else if (hx < 0x3C900000) { /* when |x|<2**-54, return x */
2319     t = huge + x;               /* return x with inexact flags when x!=0 */
2320     return x - (t - (huge + x));
2321   } else {
2322     k = 0;
2323   }
2324 
2325   /* x is now in primary range */
2326   hfx = 0.5 * x;
2327   hxs = x * hfx;
2328   r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2329   t = 3.0 - r1 * hfx;
2330   e = hxs * ((r1 - t) / (6.0 - x * t));
2331   if (k == 0) {
2332     return x - (x * e - hxs); /* c is 0 */
2333   } else {
2334     INSERT_WORDS(twopk, 0x3FF00000 + (k << 20), 0); /* 2^k */
2335     e = (x * (e - c) - c);
2336     e -= hxs;
2337     if (k == -1) return 0.5 * (x - e) - 0.5;
2338     if (k == 1) {
2339       if (x < -0.25)
2340         return -2.0 * (e - (x + 0.5));
2341       else
2342         return one + 2.0 * (x - e);
2343     }
2344     if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2345       y = one - (e - x);
2346       // TODO(mvstanton): is this replacement for the hex float
2347       // sufficient?
2348       // if (k == 1024) y = y*2.0*0x1p1023;
2349       if (k == 1024)
2350         y = y * 2.0 * 8.98846567431158e+307;
2351       else
2352         y = y * twopk;
2353       return y - one;
2354     }
2355     t = one;
2356     if (k < 20) {
2357       SET_HIGH_WORD(t, 0x3FF00000 - (0x200000 >> k)); /* t=1-2^-k */
2358       y = t - (e - x);
2359       y = y * twopk;
2360     } else {
2361       SET_HIGH_WORD(t, ((0x3FF - k) << 20)); /* 2^-k */
2362       y = x - (e + t);
2363       y += one;
2364       y = y * twopk;
2365     }
2366   }
2367   return y;
2368 }
2369 
cbrt(double x)2370 double cbrt(double x) {
2371   static const uint32_t
2372       B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2373       B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2374 
2375   /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2376   static const double P0 = 1.87595182427177009643, /* 0x3FFE03E6, 0x0F61E692 */
2377       P1 = -1.88497979543377169875,                /* 0xBFFE28E0, 0x92F02420 */
2378       P2 = 1.621429720105354466140,                /* 0x3FF9F160, 0x4A49D6C2 */
2379       P3 = -0.758397934778766047437,               /* 0xBFE844CB, 0xBEE751D9 */
2380       P4 = 0.145996192886612446982;                /* 0x3FC2B000, 0xD4E4EDD7 */
2381 
2382   int32_t hx;
2383   union {
2384     double value;
2385     uint64_t bits;
2386   } u;
2387   double r, s, t = 0.0, w;
2388   uint32_t sign;
2389   uint32_t high, low;
2390 
2391   EXTRACT_WORDS(hx, low, x);
2392   sign = hx & 0x80000000; /* sign= sign(x) */
2393   hx ^= sign;
2394   if (hx >= 0x7FF00000) return (x + x); /* cbrt(NaN,INF) is itself */
2395 
2396   /*
2397    * Rough cbrt to 5 bits:
2398    *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2399    * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2400    * "%" are integer division and modulus with rounding towards minus
2401    * infinity.  The RHS is always >= the LHS and has a maximum relative
2402    * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
2403    * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2404    * floating point representation, for finite positive normal values,
2405    * ordinary integer division of the value in bits magically gives
2406    * almost exactly the RHS of the above provided we first subtract the
2407    * exponent bias (1023 for doubles) and later add it back.  We do the
2408    * subtraction virtually to keep e >= 0 so that ordinary integer
2409    * division rounds towards minus infinity; this is also efficient.
2410    */
2411   if (hx < 0x00100000) {             /* zero or subnormal? */
2412     if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2413     SET_HIGH_WORD(t, 0x43500000);    /* set t= 2**54 */
2414     t *= x;
2415     GET_HIGH_WORD(high, t);
2416     INSERT_WORDS(t, sign | ((high & 0x7FFFFFFF) / 3 + B2), 0);
2417   } else {
2418     INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2419   }
2420 
2421   /*
2422    * New cbrt to 23 bits:
2423    *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2424    * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2425    * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
2426    * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2427    * gives us bounds for r = t**3/x.
2428    *
2429    * Try to optimize for parallel evaluation as in k_tanf.c.
2430    */
2431   r = (t * t) * (t / x);
2432   t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2433 
2434   /*
2435    * Round t away from zero to 23 bits (sloppily except for ensuring that
2436    * the result is larger in magnitude than cbrt(x) but not much more than
2437    * 2 23-bit ulps larger).  With rounding towards zero, the error bound
2438    * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
2439    * in the rounded t, the infinite-precision error in the Newton
2440    * approximation barely affects third digit in the final error
2441    * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2442    * before the final error is larger than 0.667 ulps.
2443    */
2444   u.value = t;
2445   u.bits = (u.bits + 0x80000000) & 0xFFFFFFFFC0000000ULL;
2446   t = u.value;
2447 
2448   /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2449   s = t * t;             /* t*t is exact */
2450   r = x / s;             /* error <= 0.5 ulps; |r| < |t| */
2451   w = t + t;             /* t+t is exact */
2452   r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2453   t = t + t * r;         /* error <= 0.5 + 0.5/3 + epsilon */
2454 
2455   return (t);
2456 }
2457 
2458 /* sin(x)
2459  * Return sine function of x.
2460  *
2461  * kernel function:
2462  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
2463  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
2464  *      __ieee754_rem_pio2      ... argument reduction routine
2465  *
2466  * Method.
2467  *      Let S,C and T denote the sin, cos and tan respectively on
2468  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2469  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2470  *      We have
2471  *
2472  *          n        sin(x)      cos(x)        tan(x)
2473  *     ----------------------------------------------------------
2474  *          0          S           C             T
2475  *          1          C          -S            -1/T
2476  *          2         -S          -C             T
2477  *          3         -C           S            -1/T
2478  *     ----------------------------------------------------------
2479  *
2480  * Special cases:
2481  *      Let trig be any of sin, cos, or tan.
2482  *      trig(+-INF)  is NaN, with signals;
2483  *      trig(NaN)    is that NaN;
2484  *
2485  * Accuracy:
2486  *      TRIG(x) returns trig(x) nearly rounded
2487  */
sin(double x)2488 double sin(double x) {
2489   double y[2], z = 0.0;
2490   int32_t n, ix;
2491 
2492   /* High word of x. */
2493   GET_HIGH_WORD(ix, x);
2494 
2495   /* |x| ~< pi/4 */
2496   ix &= 0x7FFFFFFF;
2497   if (ix <= 0x3FE921FB) {
2498     return __kernel_sin(x, z, 0);
2499   } else if (ix >= 0x7FF00000) {
2500     /* sin(Inf or NaN) is NaN */
2501     return x - x;
2502   } else {
2503     /* argument reduction needed */
2504     n = __ieee754_rem_pio2(x, y);
2505     switch (n & 3) {
2506       case 0:
2507         return __kernel_sin(y[0], y[1], 1);
2508       case 1:
2509         return __kernel_cos(y[0], y[1]);
2510       case 2:
2511         return -__kernel_sin(y[0], y[1], 1);
2512       default:
2513         return -__kernel_cos(y[0], y[1]);
2514     }
2515   }
2516 }
2517 
2518 /* tan(x)
2519  * Return tangent function of x.
2520  *
2521  * kernel function:
2522  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
2523  *      __ieee754_rem_pio2      ... argument reduction routine
2524  *
2525  * Method.
2526  *      Let S,C and T denote the sin, cos and tan respectively on
2527  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2528  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2529  *      We have
2530  *
2531  *          n        sin(x)      cos(x)        tan(x)
2532  *     ----------------------------------------------------------
2533  *          0          S           C             T
2534  *          1          C          -S            -1/T
2535  *          2         -S          -C             T
2536  *          3         -C           S            -1/T
2537  *     ----------------------------------------------------------
2538  *
2539  * Special cases:
2540  *      Let trig be any of sin, cos, or tan.
2541  *      trig(+-INF)  is NaN, with signals;
2542  *      trig(NaN)    is that NaN;
2543  *
2544  * Accuracy:
2545  *      TRIG(x) returns trig(x) nearly rounded
2546  */
tan(double x)2547 double tan(double x) {
2548   double y[2], z = 0.0;
2549   int32_t n, ix;
2550 
2551   /* High word of x. */
2552   GET_HIGH_WORD(ix, x);
2553 
2554   /* |x| ~< pi/4 */
2555   ix &= 0x7FFFFFFF;
2556   if (ix <= 0x3FE921FB) {
2557     return __kernel_tan(x, z, 1);
2558   } else if (ix >= 0x7FF00000) {
2559     /* tan(Inf or NaN) is NaN */
2560     return x - x; /* NaN */
2561   } else {
2562     /* argument reduction needed */
2563     n = __ieee754_rem_pio2(x, y);
2564     /* 1 -> n even, -1 -> n odd */
2565     return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2566   }
2567 }
2568 
2569 /*
2570  * ES6 draft 09-27-13, section 20.2.2.12.
2571  * Math.cosh
2572  * Method :
2573  * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2
2574  *      1. Replace x by |x| (cosh(x) = cosh(-x)).
2575  *      2.
2576  *                                                      [ exp(x) - 1 ]^2
2577  *          0        <= x <= ln2/2  :  cosh(x) := 1 + -------------------
2578  *                                                         2*exp(x)
2579  *
2580  *                                                 exp(x) + 1/exp(x)
2581  *          ln2/2    <= x <= 22     :  cosh(x) := -------------------
2582  *                                                        2
2583  *          22       <= x <= lnovft :  cosh(x) := exp(x)/2
2584  *          lnovft   <= x <= ln2ovft:  cosh(x) := exp(x/2)/2 * exp(x/2)
2585  *          ln2ovft  <  x           :  cosh(x) := huge*huge (overflow)
2586  *
2587  * Special cases:
2588  *      cosh(x) is |x| if x is +INF, -INF, or NaN.
2589  *      only cosh(0)=1 is exact for finite x.
2590  */
cosh(double x)2591 double cosh(double x) {
2592   static const double KCOSH_OVERFLOW = 710.4758600739439;
2593   static const double one = 1.0, half = 0.5;
2594   static volatile double huge = 1.0e+300;
2595 
2596   int32_t ix;
2597 
2598   /* High word of |x|. */
2599   GET_HIGH_WORD(ix, x);
2600   ix &= 0x7FFFFFFF;
2601 
2602   // |x| in [0,0.5*log2], return 1+expm1(|x|)^2/(2*exp(|x|))
2603   if (ix < 0x3FD62E43) {
2604     double t = expm1(fabs(x));
2605     double w = one + t;
2606     // For |x| < 2^-55, cosh(x) = 1
2607     if (ix < 0x3C800000) return w;
2608     return one + (t * t) / (w + w);
2609   }
2610 
2611   // |x| in [0.5*log2, 22], return (exp(|x|)+1/exp(|x|)/2
2612   if (ix < 0x40360000) {
2613     double t = exp(fabs(x));
2614     return half * t + half / t;
2615   }
2616 
2617   // |x| in [22, log(maxdouble)], return half*exp(|x|)
2618   if (ix < 0x40862E42) return half * exp(fabs(x));
2619 
2620   // |x| in [log(maxdouble), overflowthreshold]
2621   if (fabs(x) <= KCOSH_OVERFLOW) {
2622     double w = exp(half * fabs(x));
2623     double t = half * w;
2624     return t * w;
2625   }
2626 
2627   /* x is INF or NaN */
2628   if (ix >= 0x7FF00000) return x * x;
2629 
2630   // |x| > overflowthreshold.
2631   return huge * huge;
2632 }
2633 
2634 /*
2635  * ES6 draft 09-27-13, section 20.2.2.30.
2636  * Math.sinh
2637  * Method :
2638  * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2
2639  *      1. Replace x by |x| (sinh(-x) = -sinh(x)).
2640  *      2.
2641  *                                                  E + E/(E+1)
2642  *          0        <= x <= 22     :  sinh(x) := --------------, E=expm1(x)
2643  *                                                      2
2644  *
2645  *          22       <= x <= lnovft :  sinh(x) := exp(x)/2
2646  *          lnovft   <= x <= ln2ovft:  sinh(x) := exp(x/2)/2 * exp(x/2)
2647  *          ln2ovft  <  x           :  sinh(x) := x*shuge (overflow)
2648  *
2649  * Special cases:
2650  *      sinh(x) is |x| if x is +Infinity, -Infinity, or NaN.
2651  *      only sinh(0)=0 is exact for finite x.
2652  */
sinh(double x)2653 double sinh(double x) {
2654   static const double KSINH_OVERFLOW = 710.4758600739439,
2655                       TWO_M28 =
2656                           3.725290298461914e-9,  // 2^-28, empty lower half
2657       LOG_MAXD = 709.7822265625;  // 0x40862E42 00000000, empty lower half
2658   static const double shuge = 1.0e307;
2659 
2660   double h = (x < 0) ? -0.5 : 0.5;
2661   // |x| in [0, 22]. return sign(x)*0.5*(E+E/(E+1))
2662   double ax = fabs(x);
2663   if (ax < 22) {
2664     // For |x| < 2^-28, sinh(x) = x
2665     if (ax < TWO_M28) return x;
2666     double t = expm1(ax);
2667     if (ax < 1) {
2668       return h * (2 * t - t * t / (t + 1));
2669     }
2670     return h * (t + t / (t + 1));
2671   }
2672   // |x| in [22, log(maxdouble)], return 0.5 * exp(|x|)
2673   if (ax < LOG_MAXD) return h * exp(ax);
2674   // |x| in [log(maxdouble), overflowthreshold]
2675   // overflowthreshold = 710.4758600739426
2676   if (ax <= KSINH_OVERFLOW) {
2677     double w = exp(0.5 * ax);
2678     double t = h * w;
2679     return t * w;
2680   }
2681   // |x| > overflowthreshold or is NaN.
2682   // Return Infinity of the appropriate sign or NaN.
2683   return x * shuge;
2684 }
2685 
2686 /* Tanh(x)
2687  * Return the Hyperbolic Tangent of x
2688  *
2689  * Method :
2690  *                                 x    -x
2691  *                                e  - e
2692  *  0. tanh(x) is defined to be -----------
2693  *                                 x    -x
2694  *                                e  + e
2695  *  1. reduce x to non-negative by tanh(-x) = -tanh(x).
2696  *  2.  0      <= x <  2**-28 : tanh(x) := x with inexact if x != 0
2697  *                                          -t
2698  *      2**-28 <= x <  1      : tanh(x) := -----; t = expm1(-2x)
2699  *                                         t + 2
2700  *                                               2
2701  *      1      <= x <  22     : tanh(x) := 1 - -----; t = expm1(2x)
2702  *                                             t + 2
2703  *      22     <= x <= INF    : tanh(x) := 1.
2704  *
2705  * Special cases:
2706  *      tanh(NaN) is NaN;
2707  *      only tanh(0)=0 is exact for finite argument.
2708  */
tanh(double x)2709 double tanh(double x) {
2710   static const volatile double tiny = 1.0e-300;
2711   static const double one = 1.0, two = 2.0, huge = 1.0e300;
2712   double t, z;
2713   int32_t jx, ix;
2714 
2715   GET_HIGH_WORD(jx, x);
2716   ix = jx & 0x7FFFFFFF;
2717 
2718   /* x is INF or NaN */
2719   if (ix >= 0x7FF00000) {
2720     if (jx >= 0)
2721       return one / x + one; /* tanh(+-inf)=+-1 */
2722     else
2723       return one / x - one; /* tanh(NaN) = NaN */
2724   }
2725 
2726   /* |x| < 22 */
2727   if (ix < 0x40360000) {            /* |x|<22 */
2728     if (ix < 0x3E300000) {          /* |x|<2**-28 */
2729       if (huge + x > one) return x; /* tanh(tiny) = tiny with inexact */
2730     }
2731     if (ix >= 0x3FF00000) { /* |x|>=1  */
2732       t = expm1(two * fabs(x));
2733       z = one - two / (t + two);
2734     } else {
2735       t = expm1(-two * fabs(x));
2736       z = -t / (t + two);
2737     }
2738     /* |x| >= 22, return +-1 */
2739   } else {
2740     z = one - tiny; /* raise inexact flag */
2741   }
2742   return (jx >= 0) ? z : -z;
2743 }
2744 
2745 }  // namespace ieee754
2746 }  // namespace base
2747 }  // namespace v8
2748