1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 //
3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice
9 // is preserved.
10 // ====================================================
11 //
12 // The original source code covered by the above license above has been
13 // modified significantly by Google Inc.
14 // Copyright 2016 the V8 project authors. All rights reserved.
15 
16 #include "src/base/ieee754.h"
17 
18 #include <cmath>
19 #include <limits>
20 
21 #include "src/base/build_config.h"
22 #include "src/base/macros.h"
23 
24 namespace v8 {
25 namespace base {
26 namespace ieee754 {
27 
28 namespace {
29 
30 /* Disable "potential divide by 0" warning in Visual Studio compiler. */
31 
32 #if V8_CC_MSVC
33 
34 #pragma warning(disable : 4723)
35 
36 #endif
37 
38 /*
39  * The original fdlibm code used statements like:
40  *  n0 = ((*(int*)&one)>>29)^1;   * index of high word *
41  *  ix0 = *(n0+(int*)&x);     * high word of x *
42  *  ix1 = *((1-n0)+(int*)&x);   * low word of x *
43  * to dig two 32 bit words out of the 64 bit IEEE floating point
44  * value.  That is non-ANSI, and, moreover, the gcc instruction
45  * scheduler gets it wrong.  We instead use the following macros.
46  * Unlike the original code, we determine the endianness at compile
47  * time, not at run time; I don't see much benefit to selecting
48  * endianness at run time.
49  */
50 
51 /*
52  * A union which permits us to convert between a double and two 32 bit
53  * ints.
54  */
55 
56 #if V8_TARGET_LITTLE_ENDIAN
57 
58 typedef union {
59   double value;
60   struct {
61     uint32_t lsw;
62     uint32_t msw;
63   } parts;
64   struct {
65     uint64_t w;
66   } xparts;
67 } ieee_double_shape_type;
68 
69 #else
70 
71 typedef union {
72   double value;
73   struct {
74     uint32_t msw;
75     uint32_t lsw;
76   } parts;
77   struct {
78     uint64_t w;
79   } xparts;
80 } ieee_double_shape_type;
81 
82 #endif
83 
84 /* Get two 32 bit ints from a double.  */
85 
86 #define EXTRACT_WORDS(ix0, ix1, d) \
87   do {                             \
88     ieee_double_shape_type ew_u;   \
89     ew_u.value = (d);              \
90     (ix0) = ew_u.parts.msw;        \
91     (ix1) = ew_u.parts.lsw;        \
92   } while (0)
93 
94 /* Get a 64-bit int from a double. */
95 #define EXTRACT_WORD64(ix, d)    \
96   do {                           \
97     ieee_double_shape_type ew_u; \
98     ew_u.value = (d);            \
99     (ix) = ew_u.xparts.w;        \
100   } while (0)
101 
102 /* Get the more significant 32 bit int from a double.  */
103 
104 #define GET_HIGH_WORD(i, d)      \
105   do {                           \
106     ieee_double_shape_type gh_u; \
107     gh_u.value = (d);            \
108     (i) = gh_u.parts.msw;        \
109   } while (0)
110 
111 /* Get the less significant 32 bit int from a double.  */
112 
113 #define GET_LOW_WORD(i, d)       \
114   do {                           \
115     ieee_double_shape_type gl_u; \
116     gl_u.value = (d);            \
117     (i) = gl_u.parts.lsw;        \
118   } while (0)
119 
120 /* Set a double from two 32 bit ints.  */
121 
122 #define INSERT_WORDS(d, ix0, ix1) \
123   do {                            \
124     ieee_double_shape_type iw_u;  \
125     iw_u.parts.msw = (ix0);       \
126     iw_u.parts.lsw = (ix1);       \
127     (d) = iw_u.value;             \
128   } while (0)
129 
130 /* Set a double from a 64-bit int. */
131 #define INSERT_WORD64(d, ix)     \
132   do {                           \
133     ieee_double_shape_type iw_u; \
134     iw_u.xparts.w = (ix);        \
135     (d) = iw_u.value;            \
136   } while (0)
137 
138 /* Set the more significant 32 bits of a double from an int.  */
139 
140 #define SET_HIGH_WORD(d, v)      \
141   do {                           \
142     ieee_double_shape_type sh_u; \
143     sh_u.value = (d);            \
144     sh_u.parts.msw = (v);        \
145     (d) = sh_u.value;            \
146   } while (0)
147 
148 /* Set the less significant 32 bits of a double from an int.  */
149 
150 #define SET_LOW_WORD(d, v)       \
151   do {                           \
152     ieee_double_shape_type sl_u; \
153     sl_u.value = (d);            \
154     sl_u.parts.lsw = (v);        \
155     (d) = sl_u.value;            \
156   } while (0)
157 
158 /* Support macro. */
159 
160 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
161 
162 int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;
163 double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;
164 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
165                       const int32_t *ipio2) WARN_UNUSED_RESULT;
166 double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;
167 
168 /* __ieee754_rem_pio2(x,y)
169  *
170  * return the remainder of x rem pi/2 in y[0]+y[1]
171  * use __kernel_rem_pio2()
172  */
__ieee754_rem_pio2(double x,double * y)173 int32_t __ieee754_rem_pio2(double x, double *y) {
174   /*
175    * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
176    */
177   static const int32_t two_over_pi[] = {
178       0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
179       0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
180       0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
181       0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
182       0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
183       0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
184       0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
185       0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
186       0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
187       0x73A8C9, 0x60E27B, 0xC08C6B,
188   };
189 
190   static const int32_t npio2_hw[] = {
191       0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
192       0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
193       0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
194       0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
195       0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
196       0x404858EB, 0x404921FB,
197   };
198 
199   /*
200    * invpio2:  53 bits of 2/pi
201    * pio2_1:   first  33 bit of pi/2
202    * pio2_1t:  pi/2 - pio2_1
203    * pio2_2:   second 33 bit of pi/2
204    * pio2_2t:  pi/2 - (pio2_1+pio2_2)
205    * pio2_3:   third  33 bit of pi/2
206    * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
207    */
208 
209   static const double
210       zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
211       half = 5.00000000000000000000e-01,    /* 0x3FE00000, 0x00000000 */
212       two24 = 1.67772160000000000000e+07,   /* 0x41700000, 0x00000000 */
213       invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
214       pio2_1 = 1.57079632673412561417e+00,  /* 0x3FF921FB, 0x54400000 */
215       pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
216       pio2_2 = 6.07710050630396597660e-11,  /* 0x3DD0B461, 0x1A600000 */
217       pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
218       pio2_3 = 2.02226624871116645580e-21,  /* 0x3BA3198A, 0x2E000000 */
219       pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
220 
221   double z, w, t, r, fn;
222   double tx[3];
223   int32_t e0, i, j, nx, n, ix, hx;
224   uint32_t low;
225 
226   z = 0;
227   GET_HIGH_WORD(hx, x); /* high word of x */
228   ix = hx & 0x7fffffff;
229   if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */
230     y[0] = x;
231     y[1] = 0;
232     return 0;
233   }
234   if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
235     if (hx > 0) {
236       z = x - pio2_1;
237       if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
238         y[0] = z - pio2_1t;
239         y[1] = (z - y[0]) - pio2_1t;
240       } else { /* near pi/2, use 33+33+53 bit pi */
241         z -= pio2_2;
242         y[0] = z - pio2_2t;
243         y[1] = (z - y[0]) - pio2_2t;
244       }
245       return 1;
246     } else { /* negative x */
247       z = x + pio2_1;
248       if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
249         y[0] = z + pio2_1t;
250         y[1] = (z - y[0]) + pio2_1t;
251       } else { /* near pi/2, use 33+33+53 bit pi */
252         z += pio2_2;
253         y[0] = z + pio2_2t;
254         y[1] = (z - y[0]) + pio2_2t;
255       }
256       return -1;
257     }
258   }
259   if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
260     t = fabs(x);
261     n = static_cast<int32_t>(t * invpio2 + half);
262     fn = static_cast<double>(n);
263     r = t - fn * pio2_1;
264     w = fn * pio2_1t; /* 1st round good to 85 bit */
265     if (n < 32 && ix != npio2_hw[n - 1]) {
266       y[0] = r - w; /* quick check no cancellation */
267     } else {
268       uint32_t high;
269       j = ix >> 20;
270       y[0] = r - w;
271       GET_HIGH_WORD(high, y[0]);
272       i = j - ((high >> 20) & 0x7ff);
273       if (i > 16) { /* 2nd iteration needed, good to 118 */
274         t = r;
275         w = fn * pio2_2;
276         r = t - w;
277         w = fn * pio2_2t - ((t - r) - w);
278         y[0] = r - w;
279         GET_HIGH_WORD(high, y[0]);
280         i = j - ((high >> 20) & 0x7ff);
281         if (i > 49) { /* 3rd iteration need, 151 bits acc */
282           t = r;      /* will cover all possible cases */
283           w = fn * pio2_3;
284           r = t - w;
285           w = fn * pio2_3t - ((t - r) - w);
286           y[0] = r - w;
287         }
288       }
289     }
290     y[1] = (r - y[0]) - w;
291     if (hx < 0) {
292       y[0] = -y[0];
293       y[1] = -y[1];
294       return -n;
295     } else {
296       return n;
297     }
298   }
299   /*
300    * all other (large) arguments
301    */
302   if (ix >= 0x7ff00000) { /* x is inf or NaN */
303     y[0] = y[1] = x - x;
304     return 0;
305   }
306   /* set z = scalbn(|x|,ilogb(x)-23) */
307   GET_LOW_WORD(low, x);
308   SET_LOW_WORD(z, low);
309   e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
310   SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
311   for (i = 0; i < 2; i++) {
312     tx[i] = static_cast<double>(static_cast<int32_t>(z));
313     z = (z - tx[i]) * two24;
314   }
315   tx[2] = z;
316   nx = 3;
317   while (tx[nx - 1] == zero) nx--; /* skip zero term */
318   n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
319   if (hx < 0) {
320     y[0] = -y[0];
321     y[1] = -y[1];
322     return -n;
323   }
324   return n;
325 }
326 
327 /* __kernel_cos( x,  y )
328  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
329  * Input x is assumed to be bounded by ~pi/4 in magnitude.
330  * Input y is the tail of x.
331  *
332  * Algorithm
333  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
334  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
335  *      3. cos(x) is approximated by a polynomial of degree 14 on
336  *         [0,pi/4]
337  *                                       4            14
338  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
339  *         where the remez error is
340  *
341  *      |              2     4     6     8     10    12     14 |     -58
342  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
343  *      |                                                      |
344  *
345  *                     4     6     8     10    12     14
346  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
347  *             cos(x) = 1 - x*x/2 + r
348  *         since cos(x+y) ~ cos(x) - sin(x)*y
349  *                        ~ cos(x) - x*y,
350  *         a correction term is necessary in cos(x) and hence
351  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
352  *         For better accuracy when x > 0.3, let qx = |x|/4 with
353  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
354  *         Then
355  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
356  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
357  *         magnitude of the latter is at least a quarter of x*x/2,
358  *         thus, reducing the rounding error in the subtraction.
359  */
__kernel_cos(double x,double y)360 V8_INLINE double __kernel_cos(double x, double y) {
361   static const double
362       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
363       C1 = 4.16666666666666019037e-02,  /* 0x3FA55555, 0x5555554C */
364       C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
365       C3 = 2.48015872894767294178e-05,  /* 0x3EFA01A0, 0x19CB1590 */
366       C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
367       C5 = 2.08757232129817482790e-09,  /* 0x3E21EE9E, 0xBDB4B1C4 */
368       C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
369 
370   double a, iz, z, r, qx;
371   int32_t ix;
372   GET_HIGH_WORD(ix, x);
373   ix &= 0x7fffffff;                           /* ix = |x|'s high word*/
374   if (ix < 0x3e400000) {                      /* if x < 2**27 */
375     if (static_cast<int>(x) == 0) return one; /* generate inexact */
376   }
377   z = x * x;
378   r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
379   if (ix < 0x3FD33333) { /* if |x| < 0.3 */
380     return one - (0.5 * z - (z * r - x * y));
381   } else {
382     if (ix > 0x3fe90000) { /* x > 0.78125 */
383       qx = 0.28125;
384     } else {
385       INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
386     }
387     iz = 0.5 * z - qx;
388     a = one - qx;
389     return a - (iz - (z * r - x * y));
390   }
391 }
392 
393 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
394  * double x[],y[]; int e0,nx,prec; int ipio2[];
395  *
396  * __kernel_rem_pio2 return the last three digits of N with
397  *              y = x - N*pi/2
398  * so that |y| < pi/2.
399  *
400  * The method is to compute the integer (mod 8) and fraction parts of
401  * (2/pi)*x without doing the full multiplication. In general we
402  * skip the part of the product that are known to be a huge integer (
403  * more accurately, = 0 mod 8 ). Thus the number of operations are
404  * independent of the exponent of the input.
405  *
406  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
407  *
408  * Input parameters:
409  *      x[]     The input value (must be positive) is broken into nx
410  *              pieces of 24-bit integers in double precision format.
411  *              x[i] will be the i-th 24 bit of x. The scaled exponent
412  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
413  *              match x's up to 24 bits.
414  *
415  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
416  *                      e0 = ilogb(z)-23
417  *                      z  = scalbn(z,-e0)
418  *              for i = 0,1,2
419  *                      x[i] = floor(z)
420  *                      z    = (z-x[i])*2**24
421  *
422  *
423  *      y[]     output result in an array of double precision numbers.
424  *              The dimension of y[] is:
425  *                      24-bit  precision       1
426  *                      53-bit  precision       2
427  *                      64-bit  precision       2
428  *                      113-bit precision       3
429  *              The actual value is the sum of them. Thus for 113-bit
430  *              precison, one may have to do something like:
431  *
432  *              long double t,w,r_head, r_tail;
433  *              t = (long double)y[2] + (long double)y[1];
434  *              w = (long double)y[0];
435  *              r_head = t+w;
436  *              r_tail = w - (r_head - t);
437  *
438  *      e0      The exponent of x[0]
439  *
440  *      nx      dimension of x[]
441  *
442  *      prec    an integer indicating the precision:
443  *                      0       24  bits (single)
444  *                      1       53  bits (double)
445  *                      2       64  bits (extended)
446  *                      3       113 bits (quad)
447  *
448  *      ipio2[]
449  *              integer array, contains the (24*i)-th to (24*i+23)-th
450  *              bit of 2/pi after binary point. The corresponding
451  *              floating value is
452  *
453  *                      ipio2[i] * 2^(-24(i+1)).
454  *
455  * External function:
456  *      double scalbn(), floor();
457  *
458  *
459  * Here is the description of some local variables:
460  *
461  *      jk      jk+1 is the initial number of terms of ipio2[] needed
462  *              in the computation. The recommended value is 2,3,4,
463  *              6 for single, double, extended,and quad.
464  *
465  *      jz      local integer variable indicating the number of
466  *              terms of ipio2[] used.
467  *
468  *      jx      nx - 1
469  *
470  *      jv      index for pointing to the suitable ipio2[] for the
471  *              computation. In general, we want
472  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
473  *              is an integer. Thus
474  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
475  *              Hence jv = max(0,(e0-3)/24).
476  *
477  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
478  *
479  *      q[]     double array with integral value, representing the
480  *              24-bits chunk of the product of x and 2/pi.
481  *
482  *      q0      the corresponding exponent of q[0]. Note that the
483  *              exponent for q[i] would be q0-24*i.
484  *
485  *      PIo2[]  double precision array, obtained by cutting pi/2
486  *              into 24 bits chunks.
487  *
488  *      f[]     ipio2[] in floating point
489  *
490  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
491  *
492  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
493  *
494  *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
495  *              it also indicates the *sign* of the result.
496  *
497  */
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec,const int32_t * ipio2)498 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
499                       const int32_t *ipio2) {
500   /* Constants:
501    * The hexadecimal values are the intended ones for the following
502    * constants. The decimal values may be used, provided that the
503    * compiler will convert from decimal to binary accurately enough
504    * to produce the hexadecimal values shown.
505    */
506   static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
507 
508   static const double PIo2[] = {
509       1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
510       7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
511       5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
512       3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
513       1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
514       1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
515       2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
516       2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
517   };
518 
519   static const double
520       zero = 0.0,
521       one = 1.0,
522       two24 = 1.67772160000000000000e+07,  /* 0x41700000, 0x00000000 */
523       twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
524 
525   int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
526   double z, fw, f[20], fq[20], q[20];
527 
528   /* initialize jk*/
529   jk = init_jk[prec];
530   jp = jk;
531 
532   /* determine jx,jv,q0, note that 3>q0 */
533   jx = nx - 1;
534   jv = (e0 - 3) / 24;
535   if (jv < 0) jv = 0;
536   q0 = e0 - 24 * (jv + 1);
537 
538   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
539   j = jv - jx;
540   m = jx + jk;
541   for (i = 0; i <= m; i++, j++) {
542     f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
543   }
544 
545   /* compute q[0],q[1],...q[jk] */
546   for (i = 0; i <= jk; i++) {
547     for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
548     q[i] = fw;
549   }
550 
551   jz = jk;
552 recompute:
553   /* distill q[] into iq[] reversingly */
554   for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
555     fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
556     iq[i] = static_cast<int32_t>(z - two24 * fw);
557     z = q[j - 1] + fw;
558   }
559 
560   /* compute n */
561   z = scalbn(z, q0);           /* actual value of z */
562   z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
563   n = static_cast<int32_t>(z);
564   z -= static_cast<double>(n);
565   ih = 0;
566   if (q0 > 0) { /* need iq[jz-1] to determine n */
567     i = (iq[jz - 1] >> (24 - q0));
568     n += i;
569     iq[jz - 1] -= i << (24 - q0);
570     ih = iq[jz - 1] >> (23 - q0);
571   } else if (q0 == 0) {
572     ih = iq[jz - 1] >> 23;
573   } else if (z >= 0.5) {
574     ih = 2;
575   }
576 
577   if (ih > 0) { /* q > 0.5 */
578     n += 1;
579     carry = 0;
580     for (i = 0; i < jz; i++) { /* compute 1-q */
581       j = iq[i];
582       if (carry == 0) {
583         if (j != 0) {
584           carry = 1;
585           iq[i] = 0x1000000 - j;
586         }
587       } else {
588         iq[i] = 0xffffff - j;
589       }
590     }
591     if (q0 > 0) { /* rare case: chance is 1 in 12 */
592       switch (q0) {
593         case 1:
594           iq[jz - 1] &= 0x7fffff;
595           break;
596         case 2:
597           iq[jz - 1] &= 0x3fffff;
598           break;
599       }
600     }
601     if (ih == 2) {
602       z = one - z;
603       if (carry != 0) z -= scalbn(one, q0);
604     }
605   }
606 
607   /* check if recomputation is needed */
608   if (z == zero) {
609     j = 0;
610     for (i = jz - 1; i >= jk; i--) j |= iq[i];
611     if (j == 0) { /* need recomputation */
612       for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
613         /* k = no. of terms needed */
614       }
615 
616       for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
617         f[jx + i] = ipio2[jv + i];
618         for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
619         q[i] = fw;
620       }
621       jz += k;
622       goto recompute;
623     }
624   }
625 
626   /* chop off zero terms */
627   if (z == 0.0) {
628     jz -= 1;
629     q0 -= 24;
630     while (iq[jz] == 0) {
631       jz--;
632       q0 -= 24;
633     }
634   } else { /* break z into 24-bit if necessary */
635     z = scalbn(z, -q0);
636     if (z >= two24) {
637       fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
638       iq[jz] = z - two24 * fw;
639       jz += 1;
640       q0 += 24;
641       iq[jz] = fw;
642     } else {
643       iq[jz] = z;
644     }
645   }
646 
647   /* convert integer "bit" chunk to floating-point value */
648   fw = scalbn(one, q0);
649   for (i = jz; i >= 0; i--) {
650     q[i] = fw * iq[i];
651     fw *= twon24;
652   }
653 
654   /* compute PIo2[0,...,jp]*q[jz,...,0] */
655   for (i = jz; i >= 0; i--) {
656     for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
657     fq[jz - i] = fw;
658   }
659 
660   /* compress fq[] into y[] */
661   switch (prec) {
662     case 0:
663       fw = 0.0;
664       for (i = jz; i >= 0; i--) fw += fq[i];
665       y[0] = (ih == 0) ? fw : -fw;
666       break;
667     case 1:
668     case 2:
669       fw = 0.0;
670       for (i = jz; i >= 0; i--) fw += fq[i];
671       y[0] = (ih == 0) ? fw : -fw;
672       fw = fq[0] - fw;
673       for (i = 1; i <= jz; i++) fw += fq[i];
674       y[1] = (ih == 0) ? fw : -fw;
675       break;
676     case 3: /* painful */
677       for (i = jz; i > 0; i--) {
678         fw = fq[i - 1] + fq[i];
679         fq[i] += fq[i - 1] - fw;
680         fq[i - 1] = fw;
681       }
682       for (i = jz; i > 1; i--) {
683         fw = fq[i - 1] + fq[i];
684         fq[i] += fq[i - 1] - fw;
685         fq[i - 1] = fw;
686       }
687       for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
688       if (ih == 0) {
689         y[0] = fq[0];
690         y[1] = fq[1];
691         y[2] = fw;
692       } else {
693         y[0] = -fq[0];
694         y[1] = -fq[1];
695         y[2] = -fw;
696       }
697   }
698   return n & 7;
699 }
700 
701 /* __kernel_sin( x, y, iy)
702  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
703  * Input x is assumed to be bounded by ~pi/4 in magnitude.
704  * Input y is the tail of x.
705  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
706  *
707  * Algorithm
708  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
709  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
710  *      3. sin(x) is approximated by a polynomial of degree 13 on
711  *         [0,pi/4]
712  *                               3            13
713  *              sin(x) ~ x + S1*x + ... + S6*x
714  *         where
715  *
716  *      |sin(x)         2     4     6     8     10     12  |     -58
717  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
718  *      |  x                                               |
719  *
720  *      4. sin(x+y) = sin(x) + sin'(x')*y
721  *                  ~ sin(x) + (1-x*x/2)*y
722  *         For better accuracy, let
723  *                   3      2      2      2      2
724  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
725  *         then                   3    2
726  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
727  */
__kernel_sin(double x,double y,int iy)728 V8_INLINE double __kernel_sin(double x, double y, int iy) {
729   static const double
730       half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
731       S1 = -1.66666666666666324348e-01,  /* 0xBFC55555, 0x55555549 */
732       S2 = 8.33333333332248946124e-03,   /* 0x3F811111, 0x1110F8A6 */
733       S3 = -1.98412698298579493134e-04,  /* 0xBF2A01A0, 0x19C161D5 */
734       S4 = 2.75573137070700676789e-06,   /* 0x3EC71DE3, 0x57B1FE7D */
735       S5 = -2.50507602534068634195e-08,  /* 0xBE5AE5E6, 0x8A2B9CEB */
736       S6 = 1.58969099521155010221e-10;   /* 0x3DE5D93A, 0x5ACFD57C */
737 
738   double z, r, v;
739   int32_t ix;
740   GET_HIGH_WORD(ix, x);
741   ix &= 0x7fffffff;      /* high word of x */
742   if (ix < 0x3e400000) { /* |x| < 2**-27 */
743     if (static_cast<int>(x) == 0) return x;
744   } /* generate inexact */
745   z = x * x;
746   v = z * x;
747   r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
748   if (iy == 0) {
749     return x + v * (S1 + z * r);
750   } else {
751     return x - ((z * (half * y - v * r) - y) - v * S1);
752   }
753 }
754 
755 /* __kernel_tan( x, y, k )
756  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
757  * Input x is assumed to be bounded by ~pi/4 in magnitude.
758  * Input y is the tail of x.
759  * Input k indicates whether tan (if k=1) or
760  * -1/tan (if k= -1) is returned.
761  *
762  * Algorithm
763  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
764  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
765  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
766  *         [0,0.67434]
767  *                               3             27
768  *              tan(x) ~ x + T1*x + ... + T13*x
769  *         where
770  *
771  *              |tan(x)         2     4            26   |     -59.2
772  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
773  *              |  x                                    |
774  *
775  *         Note: tan(x+y) = tan(x) + tan'(x)*y
776  *                        ~ tan(x) + (1+x*x)*y
777  *         Therefore, for better accuracy in computing tan(x+y), let
778  *                   3      2      2       2       2
779  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
780  *         then
781  *                                  3    2
782  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
783  *
784  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
785  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
786  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
787  */
__kernel_tan(double x,double y,int iy)788 double __kernel_tan(double x, double y, int iy) {
789   static const double xxx[] = {
790       3.33333333333334091986e-01,             /* 3FD55555, 55555563 */
791       1.33333333333201242699e-01,             /* 3FC11111, 1110FE7A */
792       5.39682539762260521377e-02,             /* 3FABA1BA, 1BB341FE */
793       2.18694882948595424599e-02,             /* 3F9664F4, 8406D637 */
794       8.86323982359930005737e-03,             /* 3F8226E3, E96E8493 */
795       3.59207910759131235356e-03,             /* 3F6D6D22, C9560328 */
796       1.45620945432529025516e-03,             /* 3F57DBC8, FEE08315 */
797       5.88041240820264096874e-04,             /* 3F4344D8, F2F26501 */
798       2.46463134818469906812e-04,             /* 3F3026F7, 1A8D1068 */
799       7.81794442939557092300e-05,             /* 3F147E88, A03792A6 */
800       7.14072491382608190305e-05,             /* 3F12B80F, 32F0A7E9 */
801       -1.85586374855275456654e-05,            /* BEF375CB, DB605373 */
802       2.59073051863633712884e-05,             /* 3EFB2A70, 74BF7AD4 */
803       /* one */ 1.00000000000000000000e+00,   /* 3FF00000, 00000000 */
804       /* pio4 */ 7.85398163397448278999e-01,  /* 3FE921FB, 54442D18 */
805       /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
806   };
807 #define one xxx[13]
808 #define pio4 xxx[14]
809 #define pio4lo xxx[15]
810 #define T xxx
811 
812   double z, r, v, w, s;
813   int32_t ix, hx;
814 
815   GET_HIGH_WORD(hx, x);             /* high word of x */
816   ix = hx & 0x7fffffff;             /* high word of |x| */
817   if (ix < 0x3e300000) {            /* x < 2**-28 */
818     if (static_cast<int>(x) == 0) { /* generate inexact */
819       uint32_t low;
820       GET_LOW_WORD(low, x);
821       if (((ix | low) | (iy + 1)) == 0) {
822         return one / fabs(x);
823       } else {
824         if (iy == 1) {
825           return x;
826         } else { /* compute -1 / (x+y) carefully */
827           double a, t;
828 
829           z = w = x + y;
830           SET_LOW_WORD(z, 0);
831           v = y - (z - x);
832           t = a = -one / w;
833           SET_LOW_WORD(t, 0);
834           s = one + t * z;
835           return t + a * (s + t * v);
836         }
837       }
838     }
839   }
840   if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
841     if (hx < 0) {
842       x = -x;
843       y = -y;
844     }
845     z = pio4 - x;
846     w = pio4lo - y;
847     x = z + w;
848     y = 0.0;
849   }
850   z = x * x;
851   w = z * z;
852   /*
853    * Break x^5*(T[1]+x^2*T[2]+...) into
854    * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
855    * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
856    */
857   r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
858   v = z *
859       (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
860   s = z * x;
861   r = y + z * (s * (r + v) + y);
862   r += T[0] * s;
863   w = x + r;
864   if (ix >= 0x3FE59428) {
865     v = iy;
866     return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
867   }
868   if (iy == 1) {
869     return w;
870   } else {
871     /*
872      * if allow error up to 2 ulp, simply return
873      * -1.0 / (x+r) here
874      */
875     /* compute -1.0 / (x+r) accurately */
876     double a, t;
877     z = w;
878     SET_LOW_WORD(z, 0);
879     v = r - (z - x);  /* z+v = r+x */
880     t = a = -1.0 / w; /* a = -1.0/w */
881     SET_LOW_WORD(t, 0);
882     s = 1.0 + t * z;
883     return t + a * (s + t * v);
884   }
885 
886 #undef one
887 #undef pio4
888 #undef pio4lo
889 #undef T
890 }
891 
892 }  // namespace
893 
894 /* acos(x)
895  * Method :
896  *      acos(x)  = pi/2 - asin(x)
897  *      acos(-x) = pi/2 + asin(x)
898  * For |x|<=0.5
899  *      acos(x) = pi/2 - (x + x*x^2*R(x^2))     (see asin.c)
900  * For x>0.5
901  *      acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
902  *              = 2asin(sqrt((1-x)/2))
903  *              = 2s + 2s*z*R(z)        ...z=(1-x)/2, s=sqrt(z)
904  *              = 2f + (2c + 2s*z*R(z))
905  *     where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
906  *     for f so that f+c ~ sqrt(z).
907  * For x<-0.5
908  *      acos(x) = pi - 2asin(sqrt((1-|x|)/2))
909  *              = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
910  *
911  * Special cases:
912  *      if x is NaN, return x itself;
913  *      if |x|>1, return NaN with invalid signal.
914  *
915  * Function needed: sqrt
916  */
acos(double x)917 double acos(double x) {
918   static const double
919       one = 1.00000000000000000000e+00,     /* 0x3FF00000, 0x00000000 */
920       pi = 3.14159265358979311600e+00,      /* 0x400921FB, 0x54442D18 */
921       pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
922       pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
923       pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
924       pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
925       pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
926       pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
927       pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
928       pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
929       qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
930       qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
931       qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
932       qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
933 
934   double z, p, q, r, w, s, c, df;
935   int32_t hx, ix;
936   GET_HIGH_WORD(hx, x);
937   ix = hx & 0x7fffffff;
938   if (ix >= 0x3ff00000) { /* |x| >= 1 */
939     uint32_t lx;
940     GET_LOW_WORD(lx, x);
941     if (((ix - 0x3ff00000) | lx) == 0) { /* |x|==1 */
942       if (hx > 0)
943         return 0.0; /* acos(1) = 0  */
944       else
945         return pi + 2.0 * pio2_lo; /* acos(-1)= pi */
946     }
947     return (x - x) / (x - x); /* acos(|x|>1) is NaN */
948   }
949   if (ix < 0x3fe00000) {                            /* |x| < 0.5 */
950     if (ix <= 0x3c600000) return pio2_hi + pio2_lo; /*if|x|<2**-57*/
951     z = x * x;
952     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
953     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
954     r = p / q;
955     return pio2_hi - (x - (pio2_lo - x * r));
956   } else if (hx < 0) { /* x < -0.5 */
957     z = (one + x) * 0.5;
958     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
959     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
960     s = sqrt(z);
961     r = p / q;
962     w = r * s - pio2_lo;
963     return pi - 2.0 * (s + w);
964   } else { /* x > 0.5 */
965     z = (one - x) * 0.5;
966     s = sqrt(z);
967     df = s;
968     SET_LOW_WORD(df, 0);
969     c = (z - df * df) / (s + df);
970     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
971     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
972     r = p / q;
973     w = r * s + c;
974     return 2.0 * (df + w);
975   }
976 }
977 
978 /* acosh(x)
979  * Method :
980  *      Based on
981  *              acosh(x) = log [ x + sqrt(x*x-1) ]
982  *      we have
983  *              acosh(x) := log(x)+ln2, if x is large; else
984  *              acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
985  *              acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
986  *
987  * Special cases:
988  *      acosh(x) is NaN with signal if x<1.
989  *      acosh(NaN) is NaN without signal.
990  */
acosh(double x)991 double acosh(double x) {
992   static const double
993       one = 1.0,
994       ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
995   double t;
996   int32_t hx;
997   uint32_t lx;
998   EXTRACT_WORDS(hx, lx, x);
999   if (hx < 0x3ff00000) { /* x < 1 */
1000     return (x - x) / (x - x);
1001   } else if (hx >= 0x41b00000) { /* x > 2**28 */
1002     if (hx >= 0x7ff00000) {      /* x is inf of NaN */
1003       return x + x;
1004     } else {
1005       return log(x) + ln2; /* acosh(huge)=log(2x) */
1006     }
1007   } else if (((hx - 0x3ff00000) | lx) == 0) {
1008     return 0.0;                 /* acosh(1) = 0 */
1009   } else if (hx > 0x40000000) { /* 2**28 > x > 2 */
1010     t = x * x;
1011     return log(2.0 * x - one / (x + sqrt(t - one)));
1012   } else { /* 1<x<2 */
1013     t = x - one;
1014     return log1p(t + sqrt(2.0 * t + t * t));
1015   }
1016 }
1017 
1018 /* asin(x)
1019  * Method :
1020  *      Since  asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
1021  *      we approximate asin(x) on [0,0.5] by
1022  *              asin(x) = x + x*x^2*R(x^2)
1023  *      where
1024  *              R(x^2) is a rational approximation of (asin(x)-x)/x^3
1025  *      and its remez error is bounded by
1026  *              |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
1027  *
1028  *      For x in [0.5,1]
1029  *              asin(x) = pi/2-2*asin(sqrt((1-x)/2))
1030  *      Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
1031  *      then for x>0.98
1032  *              asin(x) = pi/2 - 2*(s+s*z*R(z))
1033  *                      = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
1034  *      For x<=0.98, let pio4_hi = pio2_hi/2, then
1035  *              f = hi part of s;
1036  *              c = sqrt(z) - f = (z-f*f)/(s+f)         ...f+c=sqrt(z)
1037  *      and
1038  *              asin(x) = pi/2 - 2*(s+s*z*R(z))
1039  *                      = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
1040  *                      = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
1041  *
1042  * Special cases:
1043  *      if x is NaN, return x itself;
1044  *      if |x|>1, return NaN with invalid signal.
1045  */
asin(double x)1046 double asin(double x) {
1047   static const double
1048       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1049       huge = 1.000e+300,
1050       pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
1051       pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
1052       pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
1053                                             /* coefficient for R(x^2) */
1054       pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
1055       pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
1056       pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
1057       pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
1058       pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
1059       pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
1060       qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
1061       qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
1062       qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
1063       qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
1064 
1065   double t, w, p, q, c, r, s;
1066   int32_t hx, ix;
1067 
1068   t = 0;
1069   GET_HIGH_WORD(hx, x);
1070   ix = hx & 0x7fffffff;
1071   if (ix >= 0x3ff00000) { /* |x|>= 1 */
1072     uint32_t lx;
1073     GET_LOW_WORD(lx, x);
1074     if (((ix - 0x3ff00000) | lx) == 0) /* asin(1)=+-pi/2 with inexact */
1075       return x * pio2_hi + x * pio2_lo;
1076     return (x - x) / (x - x);       /* asin(|x|>1) is NaN */
1077   } else if (ix < 0x3fe00000) {     /* |x|<0.5 */
1078     if (ix < 0x3e400000) {          /* if |x| < 2**-27 */
1079       if (huge + x > one) return x; /* return x with inexact if x!=0*/
1080     } else {
1081       t = x * x;
1082     }
1083     p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1084     q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1085     w = p / q;
1086     return x + x * w;
1087   }
1088   /* 1> |x|>= 0.5 */
1089   w = one - fabs(x);
1090   t = w * 0.5;
1091   p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1092   q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1093   s = sqrt(t);
1094   if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */
1095     w = p / q;
1096     t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
1097   } else {
1098     w = s;
1099     SET_LOW_WORD(w, 0);
1100     c = (t - w * w) / (s + w);
1101     r = p / q;
1102     p = 2.0 * s * r - (pio2_lo - 2.0 * c);
1103     q = pio4_hi - 2.0 * w;
1104     t = pio4_hi - (p - q);
1105   }
1106   if (hx > 0)
1107     return t;
1108   else
1109     return -t;
1110 }
1111 /* asinh(x)
1112  * Method :
1113  *      Based on
1114  *              asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
1115  *      we have
1116  *      asinh(x) := x  if  1+x*x=1,
1117  *               := sign(x)*(log(x)+ln2)) for large |x|, else
1118  *               := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
1119  *               := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
1120  */
asinh(double x)1121 double asinh(double x) {
1122   static const double
1123       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1124       ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
1125       huge = 1.00000000000000000000e+300;
1126 
1127   double t, w;
1128   int32_t hx, ix;
1129   GET_HIGH_WORD(hx, x);
1130   ix = hx & 0x7fffffff;
1131   if (ix >= 0x7ff00000) return x + x; /* x is inf or NaN */
1132   if (ix < 0x3e300000) {              /* |x|<2**-28 */
1133     if (huge + x > one) return x;     /* return x inexact except 0 */
1134   }
1135   if (ix > 0x41b00000) { /* |x| > 2**28 */
1136     w = log(fabs(x)) + ln2;
1137   } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */
1138     t = fabs(x);
1139     w = log(2.0 * t + one / (sqrt(x * x + one) + t));
1140   } else { /* 2.0 > |x| > 2**-28 */
1141     t = x * x;
1142     w = log1p(fabs(x) + t / (one + sqrt(one + t)));
1143   }
1144   if (hx > 0) {
1145     return w;
1146   } else {
1147     return -w;
1148   }
1149 }
1150 
1151 /* atan(x)
1152  * Method
1153  *   1. Reduce x to positive by atan(x) = -atan(-x).
1154  *   2. According to the integer k=4t+0.25 chopped, t=x, the argument
1155  *      is further reduced to one of the following intervals and the
1156  *      arctangent of t is evaluated by the corresponding formula:
1157  *
1158  *      [0,7/16]      atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
1159  *      [7/16,11/16]  atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
1160  *      [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
1161  *      [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
1162  *      [39/16,INF]   atan(x) = atan(INF) + atan( -1/t )
1163  *
1164  * Constants:
1165  * The hexadecimal values are the intended ones for the following
1166  * constants. The decimal values may be used, provided that the
1167  * compiler will convert from decimal to binary accurately enough
1168  * to produce the hexadecimal values shown.
1169  */
atan(double x)1170 double atan(double x) {
1171   static const double atanhi[] = {
1172       4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
1173       7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
1174       9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1175       1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
1176   };
1177 
1178   static const double atanlo[] = {
1179       2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
1180       3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1181       1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
1182       6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
1183   };
1184 
1185   static const double aT[] = {
1186       3.33333333333329318027e-01,  /* 0x3FD55555, 0x5555550D */
1187       -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1188       1.42857142725034663711e-01,  /* 0x3FC24924, 0x920083FF */
1189       -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
1190       9.09088713343650656196e-02,  /* 0x3FB745CD, 0xC54C206E */
1191       -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
1192       6.66107313738753120669e-02,  /* 0x3FB10D66, 0xA0D03D51 */
1193       -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
1194       4.97687799461593236017e-02,  /* 0x3FA97B4B, 0x24760DEB */
1195       -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1196       1.62858201153657823623e-02,  /* 0x3F90AD3A, 0xE322DA11 */
1197   };
1198 
1199   static const double one = 1.0, huge = 1.0e300;
1200 
1201   double w, s1, s2, z;
1202   int32_t ix, hx, id;
1203 
1204   GET_HIGH_WORD(hx, x);
1205   ix = hx & 0x7fffffff;
1206   if (ix >= 0x44100000) { /* if |x| >= 2^66 */
1207     uint32_t low;
1208     GET_LOW_WORD(low, x);
1209     if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (low != 0)))
1210       return x + x; /* NaN */
1211     if (hx > 0)
1212       return atanhi[3] + *(volatile double *)&atanlo[3];
1213     else
1214       return -atanhi[3] - *(volatile double *)&atanlo[3];
1215   }
1216   if (ix < 0x3fdc0000) {            /* |x| < 0.4375 */
1217     if (ix < 0x3e400000) {          /* |x| < 2^-27 */
1218       if (huge + x > one) return x; /* raise inexact */
1219     }
1220     id = -1;
1221   } else {
1222     x = fabs(x);
1223     if (ix < 0x3ff30000) {   /* |x| < 1.1875 */
1224       if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */
1225         id = 0;
1226         x = (2.0 * x - one) / (2.0 + x);
1227       } else { /* 11/16<=|x|< 19/16 */
1228         id = 1;
1229         x = (x - one) / (x + one);
1230       }
1231     } else {
1232       if (ix < 0x40038000) { /* |x| < 2.4375 */
1233         id = 2;
1234         x = (x - 1.5) / (one + 1.5 * x);
1235       } else { /* 2.4375 <= |x| < 2^66 */
1236         id = 3;
1237         x = -1.0 / x;
1238       }
1239     }
1240   }
1241   /* end of argument reduction */
1242   z = x * x;
1243   w = z * z;
1244   /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
1245   s1 = z * (aT[0] +
1246             w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
1247   s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
1248   if (id < 0) {
1249     return x - x * (s1 + s2);
1250   } else {
1251     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
1252     return (hx < 0) ? -z : z;
1253   }
1254 }
1255 
1256 /* atan2(y,x)
1257  * Method :
1258  *  1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1259  *  2. Reduce x to positive by (if x and y are unexceptional):
1260  *    ARG (x+iy) = arctan(y/x)       ... if x > 0,
1261  *    ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
1262  *
1263  * Special cases:
1264  *
1265  *  ATAN2((anything), NaN ) is NaN;
1266  *  ATAN2(NAN , (anything) ) is NaN;
1267  *  ATAN2(+-0, +(anything but NaN)) is +-0  ;
1268  *  ATAN2(+-0, -(anything but NaN)) is +-pi ;
1269  *  ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1270  *  ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1271  *  ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1272  *  ATAN2(+-INF,+INF ) is +-pi/4 ;
1273  *  ATAN2(+-INF,-INF ) is +-3pi/4;
1274  *  ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1275  *
1276  * Constants:
1277  * The hexadecimal values are the intended ones for the following
1278  * constants. The decimal values may be used, provided that the
1279  * compiler will convert from decimal to binary accurately enough
1280  * to produce the hexadecimal values shown.
1281  */
atan2(double y,double x)1282 double atan2(double y, double x) {
1283   static volatile double tiny = 1.0e-300;
1284   static const double
1285       zero = 0.0,
1286       pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1287       pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1288       pi = 3.1415926535897931160E+00;     /* 0x400921FB, 0x54442D18 */
1289   static volatile double pi_lo =
1290       1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1291 
1292   double z;
1293   int32_t k, m, hx, hy, ix, iy;
1294   uint32_t lx, ly;
1295 
1296   EXTRACT_WORDS(hx, lx, x);
1297   ix = hx & 0x7fffffff;
1298   EXTRACT_WORDS(hy, ly, y);
1299   iy = hy & 0x7fffffff;
1300   if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7ff00000) ||
1301       ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7ff00000)) {
1302     return x + y; /* x or y is NaN */
1303   }
1304   if (((hx - 0x3ff00000) | lx) == 0) return atan(y); /* x=1.0 */
1305   m = ((hy >> 31) & 1) | ((hx >> 30) & 2);           /* 2*sign(x)+sign(y) */
1306 
1307   /* when y = 0 */
1308   if ((iy | ly) == 0) {
1309     switch (m) {
1310       case 0:
1311       case 1:
1312         return y; /* atan(+-0,+anything)=+-0 */
1313       case 2:
1314         return pi + tiny; /* atan(+0,-anything) = pi */
1315       case 3:
1316         return -pi - tiny; /* atan(-0,-anything) =-pi */
1317     }
1318   }
1319   /* when x = 0 */
1320   if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1321 
1322   /* when x is INF */
1323   if (ix == 0x7ff00000) {
1324     if (iy == 0x7ff00000) {
1325       switch (m) {
1326         case 0:
1327           return pi_o_4 + tiny; /* atan(+INF,+INF) */
1328         case 1:
1329           return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1330         case 2:
1331           return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1332         case 3:
1333           return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1334       }
1335     } else {
1336       switch (m) {
1337         case 0:
1338           return zero; /* atan(+...,+INF) */
1339         case 1:
1340           return -zero; /* atan(-...,+INF) */
1341         case 2:
1342           return pi + tiny; /* atan(+...,-INF) */
1343         case 3:
1344           return -pi - tiny; /* atan(-...,-INF) */
1345       }
1346     }
1347   }
1348   /* when y is INF */
1349   if (iy == 0x7ff00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1350 
1351   /* compute y/x */
1352   k = (iy - ix) >> 20;
1353   if (k > 60) { /* |y/x| >  2**60 */
1354     z = pi_o_2 + 0.5 * pi_lo;
1355     m &= 1;
1356   } else if (hx < 0 && k < -60) {
1357     z = 0.0; /* 0 > |y|/x > -2**-60 */
1358   } else {
1359     z = atan(fabs(y / x)); /* safe to do y/x */
1360   }
1361   switch (m) {
1362     case 0:
1363       return z; /* atan(+,+) */
1364     case 1:
1365       return -z; /* atan(-,+) */
1366     case 2:
1367       return pi - (z - pi_lo); /* atan(+,-) */
1368     default:                   /* case 3 */
1369       return (z - pi_lo) - pi; /* atan(-,-) */
1370   }
1371 }
1372 
1373 /* cos(x)
1374  * Return cosine function of x.
1375  *
1376  * kernel function:
1377  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
1378  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
1379  *      __ieee754_rem_pio2      ... argument reduction routine
1380  *
1381  * Method.
1382  *      Let S,C and T denote the sin, cos and tan respectively on
1383  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1384  *      in [-pi/4 , +pi/4], and let n = k mod 4.
1385  *      We have
1386  *
1387  *          n        sin(x)      cos(x)        tan(x)
1388  *     ----------------------------------------------------------
1389  *          0          S           C             T
1390  *          1          C          -S            -1/T
1391  *          2         -S          -C             T
1392  *          3         -C           S            -1/T
1393  *     ----------------------------------------------------------
1394  *
1395  * Special cases:
1396  *      Let trig be any of sin, cos, or tan.
1397  *      trig(+-INF)  is NaN, with signals;
1398  *      trig(NaN)    is that NaN;
1399  *
1400  * Accuracy:
1401  *      TRIG(x) returns trig(x) nearly rounded
1402  */
cos(double x)1403 double cos(double x) {
1404   double y[2], z = 0.0;
1405   int32_t n, ix;
1406 
1407   /* High word of x. */
1408   GET_HIGH_WORD(ix, x);
1409 
1410   /* |x| ~< pi/4 */
1411   ix &= 0x7fffffff;
1412   if (ix <= 0x3fe921fb) {
1413     return __kernel_cos(x, z);
1414   } else if (ix >= 0x7ff00000) {
1415     /* cos(Inf or NaN) is NaN */
1416     return x - x;
1417   } else {
1418     /* argument reduction needed */
1419     n = __ieee754_rem_pio2(x, y);
1420     switch (n & 3) {
1421       case 0:
1422         return __kernel_cos(y[0], y[1]);
1423       case 1:
1424         return -__kernel_sin(y[0], y[1], 1);
1425       case 2:
1426         return -__kernel_cos(y[0], y[1]);
1427       default:
1428         return __kernel_sin(y[0], y[1], 1);
1429     }
1430   }
1431 }
1432 
1433 /* exp(x)
1434  * Returns the exponential of x.
1435  *
1436  * Method
1437  *   1. Argument reduction:
1438  *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1439  *      Given x, find r and integer k such that
1440  *
1441  *               x = k*ln2 + r,  |r| <= 0.5*ln2.
1442  *
1443  *      Here r will be represented as r = hi-lo for better
1444  *      accuracy.
1445  *
1446  *   2. Approximation of exp(r) by a special rational function on
1447  *      the interval [0,0.34658]:
1448  *      Write
1449  *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1450  *      We use a special Remes algorithm on [0,0.34658] to generate
1451  *      a polynomial of degree 5 to approximate R. The maximum error
1452  *      of this polynomial approximation is bounded by 2**-59. In
1453  *      other words,
1454  *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1455  *      (where z=r*r, and the values of P1 to P5 are listed below)
1456  *      and
1457  *          |                  5          |     -59
1458  *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
1459  *          |                             |
1460  *      The computation of exp(r) thus becomes
1461  *                             2*r
1462  *              exp(r) = 1 + -------
1463  *                            R - r
1464  *                                 r*R1(r)
1465  *                     = 1 + r + ----------- (for better accuracy)
1466  *                                2 - R1(r)
1467  *      where
1468  *                               2       4             10
1469  *              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
1470  *
1471  *   3. Scale back to obtain exp(x):
1472  *      From step 1, we have
1473  *         exp(x) = 2^k * exp(r)
1474  *
1475  * Special cases:
1476  *      exp(INF) is INF, exp(NaN) is NaN;
1477  *      exp(-INF) is 0, and
1478  *      for finite argument, only exp(0)=1 is exact.
1479  *
1480  * Accuracy:
1481  *      according to an error analysis, the error is always less than
1482  *      1 ulp (unit in the last place).
1483  *
1484  * Misc. info.
1485  *      For IEEE double
1486  *          if x >  7.09782712893383973096e+02 then exp(x) overflow
1487  *          if x < -7.45133219101941108420e+02 then exp(x) underflow
1488  *
1489  * Constants:
1490  * The hexadecimal values are the intended ones for the following
1491  * constants. The decimal values may be used, provided that the
1492  * compiler will convert from decimal to binary accurately enough
1493  * to produce the hexadecimal values shown.
1494  */
exp(double x)1495 double exp(double x) {
1496   static const double
1497       one = 1.0,
1498       halF[2] = {0.5, -0.5},
1499       o_threshold = 7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
1500       u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
1501       ln2HI[2] = {6.93147180369123816490e-01,    /* 0x3fe62e42, 0xfee00000 */
1502                   -6.93147180369123816490e-01},  /* 0xbfe62e42, 0xfee00000 */
1503       ln2LO[2] = {1.90821492927058770002e-10,    /* 0x3dea39ef, 0x35793c76 */
1504                   -1.90821492927058770002e-10},  /* 0xbdea39ef, 0x35793c76 */
1505       invln2 = 1.44269504088896338700e+00,       /* 0x3ff71547, 0x652b82fe */
1506       P1 = 1.66666666666666019037e-01,           /* 0x3FC55555, 0x5555553E */
1507       P2 = -2.77777777770155933842e-03,          /* 0xBF66C16C, 0x16BEBD93 */
1508       P3 = 6.61375632143793436117e-05,           /* 0x3F11566A, 0xAF25DE2C */
1509       P4 = -1.65339022054652515390e-06,          /* 0xBEBBBD41, 0xC5D26BF1 */
1510       P5 = 4.13813679705723846039e-08,           /* 0x3E663769, 0x72BEA4D0 */
1511       E = 2.718281828459045;                     /* 0x4005bf0a, 0x8b145769 */
1512 
1513   static volatile double
1514       huge = 1.0e+300,
1515       twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1516       two1023 = 8.988465674311579539e307;     /* 0x1p1023 */
1517 
1518   double y, hi = 0.0, lo = 0.0, c, t, twopk;
1519   int32_t k = 0, xsb;
1520   uint32_t hx;
1521 
1522   GET_HIGH_WORD(hx, x);
1523   xsb = (hx >> 31) & 1; /* sign bit of x */
1524   hx &= 0x7fffffff;     /* high word of |x| */
1525 
1526   /* filter out non-finite argument */
1527   if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1528     if (hx >= 0x7ff00000) {
1529       uint32_t lx;
1530       GET_LOW_WORD(lx, x);
1531       if (((hx & 0xfffff) | lx) != 0)
1532         return x + x; /* NaN */
1533       else
1534         return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1535     }
1536     if (x > o_threshold) return huge * huge;         /* overflow */
1537     if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1538   }
1539 
1540   /* argument reduction */
1541   if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
1542     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1543       /* TODO(rtoy): We special case exp(1) here to return the correct
1544        * value of E, as the computation below would get the last bit
1545        * wrong. We should probably fix the algorithm instead.
1546        */
1547       if (x == 1.0) return E;
1548       hi = x - ln2HI[xsb];
1549       lo = ln2LO[xsb];
1550       k = 1 - xsb - xsb;
1551     } else {
1552       k = static_cast<int>(invln2 * x + halF[xsb]);
1553       t = k;
1554       hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1555       lo = t * ln2LO[0];
1556     }
1557     STRICT_ASSIGN(double, x, hi - lo);
1558   } else if (hx < 0x3e300000) {         /* when |x|<2**-28 */
1559     if (huge + x > one) return one + x; /* trigger inexact */
1560   } else {
1561     k = 0;
1562   }
1563 
1564   /* x is now in primary range */
1565   t = x * x;
1566   if (k >= -1021) {
1567     INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);
1568   } else {
1569     INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0);
1570   }
1571   c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1572   if (k == 0) {
1573     return one - ((x * c) / (c - 2.0) - x);
1574   } else {
1575     y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1576   }
1577   if (k >= -1021) {
1578     if (k == 1024) return y * 2.0 * two1023;
1579     return y * twopk;
1580   } else {
1581     return y * twopk * twom1000;
1582   }
1583 }
1584 
1585 /*
1586  * Method :
1587  *    1.Reduced x to positive by atanh(-x) = -atanh(x)
1588  *    2.For x>=0.5
1589  *              1              2x                          x
1590  *  atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1591  *              2             1 - x                      1 - x
1592  *
1593  *   For x<0.5
1594  *  atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1595  *
1596  * Special cases:
1597  *  atanh(x) is NaN if |x| > 1 with signal;
1598  *  atanh(NaN) is that NaN with no signal;
1599  *  atanh(+-1) is +-INF with signal.
1600  *
1601  */
atanh(double x)1602 double atanh(double x) {
1603   static const double one = 1.0, huge = 1e300;
1604   static const double zero = 0.0;
1605 
1606   double t;
1607   int32_t hx, ix;
1608   uint32_t lx;
1609   EXTRACT_WORDS(hx, lx, x);
1610   ix = hx & 0x7fffffff;
1611   if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* |x|>1 */
1612     return (x - x) / (x - x);
1613   if (ix == 0x3ff00000) return x / zero;
1614   if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2**-28 */
1615   SET_HIGH_WORD(x, ix);
1616   if (ix < 0x3fe00000) { /* x < 0.5 */
1617     t = x + x;
1618     t = 0.5 * log1p(t + t * x / (one - x));
1619   } else {
1620     t = 0.5 * log1p((x + x) / (one - x));
1621   }
1622   if (hx >= 0)
1623     return t;
1624   else
1625     return -t;
1626 }
1627 
1628 /* log(x)
1629  * Return the logrithm of x
1630  *
1631  * Method :
1632  *   1. Argument Reduction: find k and f such that
1633  *     x = 2^k * (1+f),
1634  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1635  *
1636  *   2. Approximation of log(1+f).
1637  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1638  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1639  *         = 2s + s*R
1640  *      We use a special Reme algorithm on [0,0.1716] to generate
1641  *  a polynomial of degree 14 to approximate R The maximum error
1642  *  of this polynomial approximation is bounded by 2**-58.45. In
1643  *  other words,
1644  *            2      4      6      8      10      12      14
1645  *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1646  *    (the values of Lg1 to Lg7 are listed in the program)
1647  *  and
1648  *      |      2          14          |     -58.45
1649  *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1650  *      |                             |
1651  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1652  *  In order to guarantee error in log below 1ulp, we compute log
1653  *  by
1654  *    log(1+f) = f - s*(f - R)  (if f is not too large)
1655  *    log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1656  *
1657  *  3. Finally,  log(x) = k*ln2 + log(1+f).
1658  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1659  *     Here ln2 is split into two floating point number:
1660  *      ln2_hi + ln2_lo,
1661  *     where n*ln2_hi is always exact for |n| < 2000.
1662  *
1663  * Special cases:
1664  *  log(x) is NaN with signal if x < 0 (including -INF) ;
1665  *  log(+INF) is +INF; log(0) is -INF with signal;
1666  *  log(NaN) is that NaN with no signal.
1667  *
1668  * Accuracy:
1669  *  according to an error analysis, the error is always less than
1670  *  1 ulp (unit in the last place).
1671  *
1672  * Constants:
1673  * The hexadecimal values are the intended ones for the following
1674  * constants. The decimal values may be used, provided that the
1675  * compiler will convert from decimal to binary accurately enough
1676  * to produce the hexadecimal values shown.
1677  */
log(double x)1678 double log(double x) {
1679   static const double                      /* -- */
1680       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1681       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1682       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1683       Lg1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1684       Lg2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1685       Lg3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1686       Lg4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1687       Lg5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1688       Lg6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1689       Lg7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1690 
1691   static const double zero = 0.0;
1692   static volatile double vzero = 0.0;
1693 
1694   double hfsq, f, s, z, R, w, t1, t2, dk;
1695   int32_t k, hx, i, j;
1696   uint32_t lx;
1697 
1698   EXTRACT_WORDS(hx, lx, x);
1699 
1700   k = 0;
1701   if (hx < 0x00100000) { /* x < 2**-1022  */
1702     if (((hx & 0x7fffffff) | lx) == 0)
1703       return -two54 / vzero;           /* log(+-0)=-inf */
1704     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1705     k -= 54;
1706     x *= two54; /* subnormal number, scale up x */
1707     GET_HIGH_WORD(hx, x);
1708   }
1709   if (hx >= 0x7ff00000) return x + x;
1710   k += (hx >> 20) - 1023;
1711   hx &= 0x000fffff;
1712   i = (hx + 0x95f64) & 0x100000;
1713   SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1714   k += (i >> 20);
1715   f = x - 1.0;
1716   if ((0x000fffff & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1717     if (f == zero) {
1718       if (k == 0) {
1719         return zero;
1720       } else {
1721         dk = static_cast<double>(k);
1722         return dk * ln2_hi + dk * ln2_lo;
1723       }
1724     }
1725     R = f * f * (0.5 - 0.33333333333333333 * f);
1726     if (k == 0) {
1727       return f - R;
1728     } else {
1729       dk = static_cast<double>(k);
1730       return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1731     }
1732   }
1733   s = f / (2.0 + f);
1734   dk = static_cast<double>(k);
1735   z = s * s;
1736   i = hx - 0x6147a;
1737   w = z * z;
1738   j = 0x6b851 - hx;
1739   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1740   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1741   i |= j;
1742   R = t2 + t1;
1743   if (i > 0) {
1744     hfsq = 0.5 * f * f;
1745     if (k == 0)
1746       return f - (hfsq - s * (hfsq + R));
1747     else
1748       return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1749   } else {
1750     if (k == 0)
1751       return f - s * (f - R);
1752     else
1753       return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1754   }
1755 }
1756 
1757 /* double log1p(double x)
1758  *
1759  * Method :
1760  *   1. Argument Reduction: find k and f such that
1761  *      1+x = 2^k * (1+f),
1762  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1763  *
1764  *      Note. If k=0, then f=x is exact. However, if k!=0, then f
1765  *  may not be representable exactly. In that case, a correction
1766  *  term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1767  *  log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1768  *  and add back the correction term c/u.
1769  *  (Note: when x > 2**53, one can simply return log(x))
1770  *
1771  *   2. Approximation of log1p(f).
1772  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1773  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1774  *         = 2s + s*R
1775  *      We use a special Reme algorithm on [0,0.1716] to generate
1776  *  a polynomial of degree 14 to approximate R The maximum error
1777  *  of this polynomial approximation is bounded by 2**-58.45. In
1778  *  other words,
1779  *            2      4      6      8      10      12      14
1780  *      R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
1781  *    (the values of Lp1 to Lp7 are listed in the program)
1782  *  and
1783  *      |      2          14          |     -58.45
1784  *      | Lp1*s +...+Lp7*s    -  R(z) | <= 2
1785  *      |                             |
1786  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1787  *  In order to guarantee error in log below 1ulp, we compute log
1788  *  by
1789  *    log1p(f) = f - (hfsq - s*(hfsq+R)).
1790  *
1791  *  3. Finally, log1p(x) = k*ln2 + log1p(f).
1792  *           = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1793  *     Here ln2 is split into two floating point number:
1794  *      ln2_hi + ln2_lo,
1795  *     where n*ln2_hi is always exact for |n| < 2000.
1796  *
1797  * Special cases:
1798  *  log1p(x) is NaN with signal if x < -1 (including -INF) ;
1799  *  log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1800  *  log1p(NaN) is that NaN with no signal.
1801  *
1802  * Accuracy:
1803  *  according to an error analysis, the error is always less than
1804  *  1 ulp (unit in the last place).
1805  *
1806  * Constants:
1807  * The hexadecimal values are the intended ones for the following
1808  * constants. The decimal values may be used, provided that the
1809  * compiler will convert from decimal to binary accurately enough
1810  * to produce the hexadecimal values shown.
1811  *
1812  * Note: Assuming log() return accurate answer, the following
1813  *   algorithm can be used to compute log1p(x) to within a few ULP:
1814  *
1815  *    u = 1+x;
1816  *    if(u==1.0) return x ; else
1817  *         return log(u)*(x/(u-1.0));
1818  *
1819  *   See HP-15C Advanced Functions Handbook, p.193.
1820  */
log1p(double x)1821 double log1p(double x) {
1822   static const double                      /* -- */
1823       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1824       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1825       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1826       Lp1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1827       Lp2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1828       Lp3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1829       Lp4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1830       Lp5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1831       Lp6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1832       Lp7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1833 
1834   static const double zero = 0.0;
1835   static volatile double vzero = 0.0;
1836 
1837   double hfsq, f, c, s, z, R, u;
1838   int32_t k, hx, hu, ax;
1839 
1840   GET_HIGH_WORD(hx, x);
1841   ax = hx & 0x7fffffff;
1842 
1843   k = 1;
1844   if (hx < 0x3FDA827A) {    /* 1+x < sqrt(2)+ */
1845     if (ax >= 0x3ff00000) { /* x <= -1.0 */
1846       if (x == -1.0)
1847         return -two54 / vzero; /* log1p(-1)=+inf */
1848       else
1849         return (x - x) / (x - x); /* log1p(x<-1)=NaN */
1850     }
1851     if (ax < 0x3e200000) {    /* |x| < 2**-29 */
1852       if (two54 + x > zero    /* raise inexact */
1853           && ax < 0x3c900000) /* |x| < 2**-54 */
1854         return x;
1855       else
1856         return x - x * x * 0.5;
1857     }
1858     if (hx > 0 || hx <= static_cast<int32_t>(0xbfd2bec4)) {
1859       k = 0;
1860       f = x;
1861       hu = 1;
1862     } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1863   }
1864   if (hx >= 0x7ff00000) return x + x;
1865   if (k != 0) {
1866     if (hx < 0x43400000) {
1867       STRICT_ASSIGN(double, u, 1.0 + x);
1868       GET_HIGH_WORD(hu, u);
1869       k = (hu >> 20) - 1023;
1870       c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1871       c /= u;
1872     } else {
1873       u = x;
1874       GET_HIGH_WORD(hu, u);
1875       k = (hu >> 20) - 1023;
1876       c = 0;
1877     }
1878     hu &= 0x000fffff;
1879     /*
1880      * The approximation to sqrt(2) used in thresholds is not
1881      * critical.  However, the ones used above must give less
1882      * strict bounds than the one here so that the k==0 case is
1883      * never reached from here, since here we have committed to
1884      * using the correction term but don't use it if k==0.
1885      */
1886     if (hu < 0x6a09e) {                  /* u ~< sqrt(2) */
1887       SET_HIGH_WORD(u, hu | 0x3ff00000); /* normalize u */
1888     } else {
1889       k += 1;
1890       SET_HIGH_WORD(u, hu | 0x3fe00000); /* normalize u/2 */
1891       hu = (0x00100000 - hu) >> 2;
1892     }
1893     f = u - 1.0;
1894   }
1895   hfsq = 0.5 * f * f;
1896   if (hu == 0) { /* |f| < 2**-20 */
1897     if (f == zero) {
1898       if (k == 0) {
1899         return zero;
1900       } else {
1901         c += k * ln2_lo;
1902         return k * ln2_hi + c;
1903       }
1904     }
1905     R = hfsq * (1.0 - 0.66666666666666666 * f);
1906     if (k == 0)
1907       return f - R;
1908     else
1909       return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1910   }
1911   s = f / (2.0 + f);
1912   z = s * s;
1913   R = z * (Lp1 +
1914            z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1915   if (k == 0)
1916     return f - (hfsq - s * (hfsq + R));
1917   else
1918     return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1919 }
1920 
1921 /*
1922  * k_log1p(f):
1923  * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1924  *
1925  * The following describes the overall strategy for computing
1926  * logarithms in base e.  The argument reduction and adding the final
1927  * term of the polynomial are done by the caller for increased accuracy
1928  * when different bases are used.
1929  *
1930  * Method :
1931  *   1. Argument Reduction: find k and f such that
1932  *         x = 2^k * (1+f),
1933  *         where  sqrt(2)/2 < 1+f < sqrt(2) .
1934  *
1935  *   2. Approximation of log(1+f).
1936  *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1937  *            = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1938  *            = 2s + s*R
1939  *      We use a special Reme algorithm on [0,0.1716] to generate
1940  *      a polynomial of degree 14 to approximate R The maximum error
1941  *      of this polynomial approximation is bounded by 2**-58.45. In
1942  *      other words,
1943  *          2      4      6      8      10      12      14
1944  *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1945  *      (the values of Lg1 to Lg7 are listed in the program)
1946  *      and
1947  *          |      2          14          |     -58.45
1948  *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1949  *          |                             |
1950  *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1951  *      In order to guarantee error in log below 1ulp, we compute log
1952  *      by
1953  *          log(1+f) = f - s*(f - R)            (if f is not too large)
1954  *          log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1955  *
1956  *   3. Finally,  log(x) = k*ln2 + log(1+f).
1957  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1958  *      Here ln2 is split into two floating point number:
1959  *          ln2_hi + ln2_lo,
1960  *      where n*ln2_hi is always exact for |n| < 2000.
1961  *
1962  * Special cases:
1963  *      log(x) is NaN with signal if x < 0 (including -INF) ;
1964  *      log(+INF) is +INF; log(0) is -INF with signal;
1965  *      log(NaN) is that NaN with no signal.
1966  *
1967  * Accuracy:
1968  *      according to an error analysis, the error is always less than
1969  *      1 ulp (unit in the last place).
1970  *
1971  * Constants:
1972  * The hexadecimal values are the intended ones for the following
1973  * constants. The decimal values may be used, provided that the
1974  * compiler will convert from decimal to binary accurately enough
1975  * to produce the hexadecimal values shown.
1976  */
1977 
1978 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1979     Lg2 = 3.999999999940941908e-01,                 /* 3FD99999 9997FA04 */
1980     Lg3 = 2.857142874366239149e-01,                 /* 3FD24924 94229359 */
1981     Lg4 = 2.222219843214978396e-01,                 /* 3FCC71C5 1D8E78AF */
1982     Lg5 = 1.818357216161805012e-01,                 /* 3FC74664 96CB03DE */
1983     Lg6 = 1.531383769920937332e-01,                 /* 3FC39A09 D078C69F */
1984     Lg7 = 1.479819860511658591e-01;                 /* 3FC2F112 DF3E5244 */
1985 
1986 /*
1987  * We always inline k_log1p(), since doing so produces a
1988  * substantial performance improvement (~40% on amd64).
1989  */
k_log1p(double f)1990 static inline double k_log1p(double f) {
1991   double hfsq, s, z, R, w, t1, t2;
1992 
1993   s = f / (2.0 + f);
1994   z = s * s;
1995   w = z * z;
1996   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1997   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1998   R = t2 + t1;
1999   hfsq = 0.5 * f * f;
2000   return s * (hfsq + R);
2001 }
2002 
2003 /*
2004  * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
2005  * comments.
2006  *
2007  * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
2008  * then does the combining and scaling steps
2009  *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
2010  * in not-quite-routine extra precision.
2011  */
log2(double x)2012 double log2(double x) {
2013   static const double
2014       two54 = 1.80143985094819840000e+16,   /* 0x43500000, 0x00000000 */
2015       ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
2016       ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
2017 
2018   static const double zero = 0.0;
2019   static volatile double vzero = 0.0;
2020 
2021   double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
2022   int32_t i, k, hx;
2023   uint32_t lx;
2024 
2025   EXTRACT_WORDS(hx, lx, x);
2026 
2027   k = 0;
2028   if (hx < 0x00100000) { /* x < 2**-1022  */
2029     if (((hx & 0x7fffffff) | lx) == 0)
2030       return -two54 / vzero;           /* log(+-0)=-inf */
2031     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2032     k -= 54;
2033     x *= two54; /* subnormal number, scale up x */
2034     GET_HIGH_WORD(hx, x);
2035   }
2036   if (hx >= 0x7ff00000) return x + x;
2037   if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
2038   k += (hx >> 20) - 1023;
2039   hx &= 0x000fffff;
2040   i = (hx + 0x95f64) & 0x100000;
2041   SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
2042   k += (i >> 20);
2043   y = static_cast<double>(k);
2044   f = x - 1.0;
2045   hfsq = 0.5 * f * f;
2046   r = k_log1p(f);
2047 
2048   /*
2049    * f-hfsq must (for args near 1) be evaluated in extra precision
2050    * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
2051    * This is fairly efficient since f-hfsq only depends on f, so can
2052    * be evaluated in parallel with R.  Not combining hfsq with R also
2053    * keeps R small (though not as small as a true `lo' term would be),
2054    * so that extra precision is not needed for terms involving R.
2055    *
2056    * Compiler bugs involving extra precision used to break Dekker's
2057    * theorem for spitting f-hfsq as hi+lo, unless double_t was used
2058    * or the multi-precision calculations were avoided when double_t
2059    * has extra precision.  These problems are now automatically
2060    * avoided as a side effect of the optimization of combining the
2061    * Dekker splitting step with the clear-low-bits step.
2062    *
2063    * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
2064    * precision to avoid a very large cancellation when x is very near
2065    * these values.  Unlike the above cancellations, this problem is
2066    * specific to base 2.  It is strange that adding +-1 is so much
2067    * harder than adding +-ln2 or +-log10_2.
2068    *
2069    * This uses Dekker's theorem to normalize y+val_hi, so the
2070    * compiler bugs are back in some configurations, sigh.  And I
2071    * don't want to used double_t to avoid them, since that gives a
2072    * pessimization and the support for avoiding the pessimization
2073    * is not yet available.
2074    *
2075    * The multi-precision calculations for the multiplications are
2076    * routine.
2077    */
2078   hi = f - hfsq;
2079   SET_LOW_WORD(hi, 0);
2080   lo = (f - hi) - hfsq + r;
2081   val_hi = hi * ivln2hi;
2082   val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
2083 
2084   /* spadd(val_hi, val_lo, y), except for not using double_t: */
2085   w = y + val_hi;
2086   val_lo += (y - w) + val_hi;
2087   val_hi = w;
2088 
2089   return val_lo + val_hi;
2090 }
2091 
2092 /*
2093  * Return the base 10 logarithm of x
2094  *
2095  * Method :
2096  *      Let log10_2hi = leading 40 bits of log10(2) and
2097  *          log10_2lo = log10(2) - log10_2hi,
2098  *          ivln10   = 1/log(10) rounded.
2099  *      Then
2100  *              n = ilogb(x),
2101  *              if(n<0)  n = n+1;
2102  *              x = scalbn(x,-n);
2103  *              log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
2104  *
2105  *  Note 1:
2106  *     To guarantee log10(10**n)=n, where 10**n is normal, the rounding
2107  *     mode must set to Round-to-Nearest.
2108  *  Note 2:
2109  *      [1/log(10)] rounded to 53 bits has error .198 ulps;
2110  *      log10 is monotonic at all binary break points.
2111  *
2112  *  Special cases:
2113  *      log10(x) is NaN if x < 0;
2114  *      log10(+INF) is +INF; log10(0) is -INF;
2115  *      log10(NaN) is that NaN;
2116  *      log10(10**N) = N  for N=0,1,...,22.
2117  */
log10(double x)2118 double log10(double x) {
2119   static const double
2120       two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2121       ivln10 = 4.34294481903251816668e-01,
2122       log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
2123       log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
2124 
2125   static const double zero = 0.0;
2126   static volatile double vzero = 0.0;
2127 
2128   double y;
2129   int32_t i, k, hx;
2130   uint32_t lx;
2131 
2132   EXTRACT_WORDS(hx, lx, x);
2133 
2134   k = 0;
2135   if (hx < 0x00100000) { /* x < 2**-1022  */
2136     if (((hx & 0x7fffffff) | lx) == 0)
2137       return -two54 / vzero;           /* log(+-0)=-inf */
2138     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2139     k -= 54;
2140     x *= two54; /* subnormal number, scale up x */
2141     GET_HIGH_WORD(hx, x);
2142     GET_LOW_WORD(lx, x);
2143   }
2144   if (hx >= 0x7ff00000) return x + x;
2145   if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
2146   k += (hx >> 20) - 1023;
2147 
2148   i = (k & 0x80000000) >> 31;
2149   hx = (hx & 0x000fffff) | ((0x3ff - i) << 20);
2150   y = k + i;
2151   SET_HIGH_WORD(x, hx);
2152   SET_LOW_WORD(x, lx);
2153 
2154   double z = y * log10_2lo + ivln10 * log(x);
2155   return z + y * log10_2hi;
2156 }
2157 
2158 /* expm1(x)
2159  * Returns exp(x)-1, the exponential of x minus 1.
2160  *
2161  * Method
2162  *   1. Argument reduction:
2163  *  Given x, find r and integer k such that
2164  *
2165  *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
2166  *
2167  *      Here a correction term c will be computed to compensate
2168  *  the error in r when rounded to a floating-point number.
2169  *
2170  *   2. Approximating expm1(r) by a special rational function on
2171  *  the interval [0,0.34658]:
2172  *  Since
2173  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
2174  *  we define R1(r*r) by
2175  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
2176  *  That is,
2177  *      R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
2178  *         = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
2179  *         = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
2180  *      We use a special Reme algorithm on [0,0.347] to generate
2181  *   a polynomial of degree 5 in r*r to approximate R1. The
2182  *  maximum error of this polynomial approximation is bounded
2183  *  by 2**-61. In other words,
2184  *      R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
2185  *  where   Q1  =  -1.6666666666666567384E-2,
2186  *     Q2  =   3.9682539681370365873E-4,
2187  *     Q3  =  -9.9206344733435987357E-6,
2188  *     Q4  =   2.5051361420808517002E-7,
2189  *     Q5  =  -6.2843505682382617102E-9;
2190  *    z   =  r*r,
2191  *  with error bounded by
2192  *      |                  5           |     -61
2193  *      | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
2194  *      |                              |
2195  *
2196  *  expm1(r) = exp(r)-1 is then computed by the following
2197  *   specific way which minimize the accumulation rounding error:
2198  *             2     3
2199  *            r     r    [ 3 - (R1 + R1*r/2)  ]
2200  *        expm1(r) = r + --- + --- * [--------------------]
2201  *                  2     2    [ 6 - r*(3 - R1*r/2) ]
2202  *
2203  *  To compensate the error in the argument reduction, we use
2204  *    expm1(r+c) = expm1(r) + c + expm1(r)*c
2205  *         ~ expm1(r) + c + r*c
2206  *  Thus c+r*c will be added in as the correction terms for
2207  *  expm1(r+c). Now rearrange the term to avoid optimization
2208  *   screw up:
2209  *            (      2                                    2 )
2210  *            ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
2211  *   expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
2212  *                  ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
2213  *                      (                                             )
2214  *
2215  *       = r - E
2216  *   3. Scale back to obtain expm1(x):
2217  *  From step 1, we have
2218  *     expm1(x) = either 2^k*[expm1(r)+1] - 1
2219  *        = or     2^k*[expm1(r) + (1-2^-k)]
2220  *   4. Implementation notes:
2221  *  (A). To save one multiplication, we scale the coefficient Qi
2222  *       to Qi*2^i, and replace z by (x^2)/2.
2223  *  (B). To achieve maximum accuracy, we compute expm1(x) by
2224  *    (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
2225  *    (ii)  if k=0, return r-E
2226  *    (iii) if k=-1, return 0.5*(r-E)-0.5
2227  *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
2228  *                  else       return  1.0+2.0*(r-E);
2229  *    (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
2230  *    (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
2231  *    (vii) return 2^k(1-((E+2^-k)-r))
2232  *
2233  * Special cases:
2234  *  expm1(INF) is INF, expm1(NaN) is NaN;
2235  *  expm1(-INF) is -1, and
2236  *  for finite argument, only expm1(0)=0 is exact.
2237  *
2238  * Accuracy:
2239  *  according to an error analysis, the error is always less than
2240  *  1 ulp (unit in the last place).
2241  *
2242  * Misc. info.
2243  *  For IEEE double
2244  *      if x >  7.09782712893383973096e+02 then expm1(x) overflow
2245  *
2246  * Constants:
2247  * The hexadecimal values are the intended ones for the following
2248  * constants. The decimal values may be used, provided that the
2249  * compiler will convert from decimal to binary accurately enough
2250  * to produce the hexadecimal values shown.
2251  */
expm1(double x)2252 double expm1(double x) {
2253   static const double
2254       one = 1.0,
2255       tiny = 1.0e-300,
2256       o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2257       ln2_hi = 6.93147180369123816490e-01,      /* 0x3fe62e42, 0xfee00000 */
2258       ln2_lo = 1.90821492927058770002e-10,      /* 0x3dea39ef, 0x35793c76 */
2259       invln2 = 1.44269504088896338700e+00,      /* 0x3ff71547, 0x652b82fe */
2260       /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2261          x*x/2: */
2262       Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2263       Q2 = 1.58730158725481460165e-03,  /* 3F5A01A0 19FE5585 */
2264       Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2265       Q4 = 4.00821782732936239552e-06,  /* 3ED0CFCA 86E65239 */
2266       Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2267 
2268   static volatile double huge = 1.0e+300;
2269 
2270   double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2271   int32_t k, xsb;
2272   uint32_t hx;
2273 
2274   GET_HIGH_WORD(hx, x);
2275   xsb = hx & 0x80000000; /* sign bit of x */
2276   hx &= 0x7fffffff;      /* high word of |x| */
2277 
2278   /* filter out huge and non-finite argument */
2279   if (hx >= 0x4043687A) {   /* if |x|>=56*ln2 */
2280     if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2281       if (hx >= 0x7ff00000) {
2282         uint32_t low;
2283         GET_LOW_WORD(low, x);
2284         if (((hx & 0xfffff) | low) != 0)
2285           return x + x; /* NaN */
2286         else
2287           return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2288       }
2289       if (x > o_threshold) return huge * huge; /* overflow */
2290     }
2291     if (xsb != 0) {        /* x < -56*ln2, return -1.0 with inexact */
2292       if (x + tiny < 0.0)  /* raise inexact */
2293         return tiny - one; /* return -1 */
2294     }
2295   }
2296 
2297   /* argument reduction */
2298   if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
2299     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2300       if (xsb == 0) {
2301         hi = x - ln2_hi;
2302         lo = ln2_lo;
2303         k = 1;
2304       } else {
2305         hi = x + ln2_hi;
2306         lo = -ln2_lo;
2307         k = -1;
2308       }
2309     } else {
2310       k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2311       t = k;
2312       hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2313       lo = t * ln2_lo;
2314     }
2315     STRICT_ASSIGN(double, x, hi - lo);
2316     c = (hi - x) - lo;
2317   } else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */
2318     t = huge + x;               /* return x with inexact flags when x!=0 */
2319     return x - (t - (huge + x));
2320   } else {
2321     k = 0;
2322   }
2323 
2324   /* x is now in primary range */
2325   hfx = 0.5 * x;
2326   hxs = x * hfx;
2327   r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2328   t = 3.0 - r1 * hfx;
2329   e = hxs * ((r1 - t) / (6.0 - x * t));
2330   if (k == 0) {
2331     return x - (x * e - hxs); /* c is 0 */
2332   } else {
2333     INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */
2334     e = (x * (e - c) - c);
2335     e -= hxs;
2336     if (k == -1) return 0.5 * (x - e) - 0.5;
2337     if (k == 1) {
2338       if (x < -0.25)
2339         return -2.0 * (e - (x + 0.5));
2340       else
2341         return one + 2.0 * (x - e);
2342     }
2343     if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2344       y = one - (e - x);
2345       // TODO(mvstanton): is this replacement for the hex float
2346       // sufficient?
2347       // if (k == 1024) y = y*2.0*0x1p1023;
2348       if (k == 1024)
2349         y = y * 2.0 * 8.98846567431158e+307;
2350       else
2351         y = y * twopk;
2352       return y - one;
2353     }
2354     t = one;
2355     if (k < 20) {
2356       SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
2357       y = t - (e - x);
2358       y = y * twopk;
2359     } else {
2360       SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */
2361       y = x - (e + t);
2362       y += one;
2363       y = y * twopk;
2364     }
2365   }
2366   return y;
2367 }
2368 
cbrt(double x)2369 double cbrt(double x) {
2370   static const uint32_t
2371       B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2372       B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2373 
2374   /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2375   static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
2376       P1 = -1.88497979543377169875,                /* 0xbffe28e0, 0x92f02420 */
2377       P2 = 1.621429720105354466140,                /* 0x3ff9f160, 0x4a49d6c2 */
2378       P3 = -0.758397934778766047437,               /* 0xbfe844cb, 0xbee751d9 */
2379       P4 = 0.145996192886612446982;                /* 0x3fc2b000, 0xd4e4edd7 */
2380 
2381   int32_t hx;
2382   union {
2383     double value;
2384     uint64_t bits;
2385   } u;
2386   double r, s, t = 0.0, w;
2387   uint32_t sign;
2388   uint32_t high, low;
2389 
2390   EXTRACT_WORDS(hx, low, x);
2391   sign = hx & 0x80000000; /* sign= sign(x) */
2392   hx ^= sign;
2393   if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */
2394 
2395   /*
2396    * Rough cbrt to 5 bits:
2397    *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2398    * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2399    * "%" are integer division and modulus with rounding towards minus
2400    * infinity.  The RHS is always >= the LHS and has a maximum relative
2401    * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
2402    * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2403    * floating point representation, for finite positive normal values,
2404    * ordinary integer divison of the value in bits magically gives
2405    * almost exactly the RHS of the above provided we first subtract the
2406    * exponent bias (1023 for doubles) and later add it back.  We do the
2407    * subtraction virtually to keep e >= 0 so that ordinary integer
2408    * division rounds towards minus infinity; this is also efficient.
2409    */
2410   if (hx < 0x00100000) {             /* zero or subnormal? */
2411     if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2412     SET_HIGH_WORD(t, 0x43500000);    /* set t= 2**54 */
2413     t *= x;
2414     GET_HIGH_WORD(high, t);
2415     INSERT_WORDS(t, sign | ((high & 0x7fffffff) / 3 + B2), 0);
2416   } else {
2417     INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2418   }
2419 
2420   /*
2421    * New cbrt to 23 bits:
2422    *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2423    * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2424    * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
2425    * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2426    * gives us bounds for r = t**3/x.
2427    *
2428    * Try to optimize for parallel evaluation as in k_tanf.c.
2429    */
2430   r = (t * t) * (t / x);
2431   t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2432 
2433   /*
2434    * Round t away from zero to 23 bits (sloppily except for ensuring that
2435    * the result is larger in magnitude than cbrt(x) but not much more than
2436    * 2 23-bit ulps larger).  With rounding towards zero, the error bound
2437    * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
2438    * in the rounded t, the infinite-precision error in the Newton
2439    * approximation barely affects third digit in the final error
2440    * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2441    * before the final error is larger than 0.667 ulps.
2442    */
2443   u.value = t;
2444   u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
2445   t = u.value;
2446 
2447   /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2448   s = t * t;             /* t*t is exact */
2449   r = x / s;             /* error <= 0.5 ulps; |r| < |t| */
2450   w = t + t;             /* t+t is exact */
2451   r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2452   t = t + t * r;         /* error <= 0.5 + 0.5/3 + epsilon */
2453 
2454   return (t);
2455 }
2456 
2457 /* sin(x)
2458  * Return sine function of x.
2459  *
2460  * kernel function:
2461  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
2462  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
2463  *      __ieee754_rem_pio2      ... argument reduction routine
2464  *
2465  * Method.
2466  *      Let S,C and T denote the sin, cos and tan respectively on
2467  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2468  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2469  *      We have
2470  *
2471  *          n        sin(x)      cos(x)        tan(x)
2472  *     ----------------------------------------------------------
2473  *          0          S           C             T
2474  *          1          C          -S            -1/T
2475  *          2         -S          -C             T
2476  *          3         -C           S            -1/T
2477  *     ----------------------------------------------------------
2478  *
2479  * Special cases:
2480  *      Let trig be any of sin, cos, or tan.
2481  *      trig(+-INF)  is NaN, with signals;
2482  *      trig(NaN)    is that NaN;
2483  *
2484  * Accuracy:
2485  *      TRIG(x) returns trig(x) nearly rounded
2486  */
sin(double x)2487 double sin(double x) {
2488   double y[2], z = 0.0;
2489   int32_t n, ix;
2490 
2491   /* High word of x. */
2492   GET_HIGH_WORD(ix, x);
2493 
2494   /* |x| ~< pi/4 */
2495   ix &= 0x7fffffff;
2496   if (ix <= 0x3fe921fb) {
2497     return __kernel_sin(x, z, 0);
2498   } else if (ix >= 0x7ff00000) {
2499     /* sin(Inf or NaN) is NaN */
2500     return x - x;
2501   } else {
2502     /* argument reduction needed */
2503     n = __ieee754_rem_pio2(x, y);
2504     switch (n & 3) {
2505       case 0:
2506         return __kernel_sin(y[0], y[1], 1);
2507       case 1:
2508         return __kernel_cos(y[0], y[1]);
2509       case 2:
2510         return -__kernel_sin(y[0], y[1], 1);
2511       default:
2512         return -__kernel_cos(y[0], y[1]);
2513     }
2514   }
2515 }
2516 
2517 /* tan(x)
2518  * Return tangent function of x.
2519  *
2520  * kernel function:
2521  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
2522  *      __ieee754_rem_pio2      ... argument reduction routine
2523  *
2524  * Method.
2525  *      Let S,C and T denote the sin, cos and tan respectively on
2526  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2527  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2528  *      We have
2529  *
2530  *          n        sin(x)      cos(x)        tan(x)
2531  *     ----------------------------------------------------------
2532  *          0          S           C             T
2533  *          1          C          -S            -1/T
2534  *          2         -S          -C             T
2535  *          3         -C           S            -1/T
2536  *     ----------------------------------------------------------
2537  *
2538  * Special cases:
2539  *      Let trig be any of sin, cos, or tan.
2540  *      trig(+-INF)  is NaN, with signals;
2541  *      trig(NaN)    is that NaN;
2542  *
2543  * Accuracy:
2544  *      TRIG(x) returns trig(x) nearly rounded
2545  */
tan(double x)2546 double tan(double x) {
2547   double y[2], z = 0.0;
2548   int32_t n, ix;
2549 
2550   /* High word of x. */
2551   GET_HIGH_WORD(ix, x);
2552 
2553   /* |x| ~< pi/4 */
2554   ix &= 0x7fffffff;
2555   if (ix <= 0x3fe921fb) {
2556     return __kernel_tan(x, z, 1);
2557   } else if (ix >= 0x7ff00000) {
2558     /* tan(Inf or NaN) is NaN */
2559     return x - x; /* NaN */
2560   } else {
2561     /* argument reduction needed */
2562     n = __ieee754_rem_pio2(x, y);
2563     /* 1 -> n even, -1 -> n odd */
2564     return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2565   }
2566 }
2567 
2568 /*
2569  * ES6 draft 09-27-13, section 20.2.2.12.
2570  * Math.cosh
2571  * Method :
2572  * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2
2573  *      1. Replace x by |x| (cosh(x) = cosh(-x)).
2574  *      2.
2575  *                                                      [ exp(x) - 1 ]^2
2576  *          0        <= x <= ln2/2  :  cosh(x) := 1 + -------------------
2577  *                                                         2*exp(x)
2578  *
2579  *                                                 exp(x) + 1/exp(x)
2580  *          ln2/2    <= x <= 22     :  cosh(x) := -------------------
2581  *                                                        2
2582  *          22       <= x <= lnovft :  cosh(x) := exp(x)/2
2583  *          lnovft   <= x <= ln2ovft:  cosh(x) := exp(x/2)/2 * exp(x/2)
2584  *          ln2ovft  <  x           :  cosh(x) := huge*huge (overflow)
2585  *
2586  * Special cases:
2587  *      cosh(x) is |x| if x is +INF, -INF, or NaN.
2588  *      only cosh(0)=1 is exact for finite x.
2589  */
cosh(double x)2590 double cosh(double x) {
2591   static const double KCOSH_OVERFLOW = 710.4758600739439;
2592   static const double one = 1.0, half = 0.5;
2593   static volatile double huge = 1.0e+300;
2594 
2595   int32_t ix;
2596 
2597   /* High word of |x|. */
2598   GET_HIGH_WORD(ix, x);
2599   ix &= 0x7fffffff;
2600 
2601   // |x| in [0,0.5*log2], return 1+expm1(|x|)^2/(2*exp(|x|))
2602   if (ix < 0x3fd62e43) {
2603     double t = expm1(fabs(x));
2604     double w = one + t;
2605     // For |x| < 2^-55, cosh(x) = 1
2606     if (ix < 0x3c800000) return w;
2607     return one + (t * t) / (w + w);
2608   }
2609 
2610   // |x| in [0.5*log2, 22], return (exp(|x|)+1/exp(|x|)/2
2611   if (ix < 0x40360000) {
2612     double t = exp(fabs(x));
2613     return half * t + half / t;
2614   }
2615 
2616   // |x| in [22, log(maxdouble)], return half*exp(|x|)
2617   if (ix < 0x40862e42) return half * exp(fabs(x));
2618 
2619   // |x| in [log(maxdouble), overflowthreshold]
2620   if (fabs(x) <= KCOSH_OVERFLOW) {
2621     double w = exp(half * fabs(x));
2622     double t = half * w;
2623     return t * w;
2624   }
2625 
2626   /* x is INF or NaN */
2627   if (ix >= 0x7ff00000) return x * x;
2628 
2629   // |x| > overflowthreshold.
2630   return huge * huge;
2631 }
2632 
2633 /*
2634  * ES6 draft 09-27-13, section 20.2.2.30.
2635  * Math.sinh
2636  * Method :
2637  * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2
2638  *      1. Replace x by |x| (sinh(-x) = -sinh(x)).
2639  *      2.
2640  *                                                  E + E/(E+1)
2641  *          0        <= x <= 22     :  sinh(x) := --------------, E=expm1(x)
2642  *                                                      2
2643  *
2644  *          22       <= x <= lnovft :  sinh(x) := exp(x)/2
2645  *          lnovft   <= x <= ln2ovft:  sinh(x) := exp(x/2)/2 * exp(x/2)
2646  *          ln2ovft  <  x           :  sinh(x) := x*shuge (overflow)
2647  *
2648  * Special cases:
2649  *      sinh(x) is |x| if x is +Infinity, -Infinity, or NaN.
2650  *      only sinh(0)=0 is exact for finite x.
2651  */
sinh(double x)2652 double sinh(double x) {
2653   static const double KSINH_OVERFLOW = 710.4758600739439,
2654                       TWO_M28 =
2655                           3.725290298461914e-9,  // 2^-28, empty lower half
2656       LOG_MAXD = 709.7822265625;  // 0x40862e42 00000000, empty lower half
2657   static const double shuge = 1.0e307;
2658 
2659   double h = (x < 0) ? -0.5 : 0.5;
2660   // |x| in [0, 22]. return sign(x)*0.5*(E+E/(E+1))
2661   double ax = fabs(x);
2662   if (ax < 22) {
2663     // For |x| < 2^-28, sinh(x) = x
2664     if (ax < TWO_M28) return x;
2665     double t = expm1(ax);
2666     if (ax < 1) {
2667       return h * (2 * t - t * t / (t + 1));
2668     }
2669     return h * (t + t / (t + 1));
2670   }
2671   // |x| in [22, log(maxdouble)], return 0.5 * exp(|x|)
2672   if (ax < LOG_MAXD) return h * exp(ax);
2673   // |x| in [log(maxdouble), overflowthreshold]
2674   // overflowthreshold = 710.4758600739426
2675   if (ax <= KSINH_OVERFLOW) {
2676     double w = exp(0.5 * ax);
2677     double t = h * w;
2678     return t * w;
2679   }
2680   // |x| > overflowthreshold or is NaN.
2681   // Return Infinity of the appropriate sign or NaN.
2682   return x * shuge;
2683 }
2684 
2685 /* Tanh(x)
2686  * Return the Hyperbolic Tangent of x
2687  *
2688  * Method :
2689  *                                 x    -x
2690  *                                e  - e
2691  *  0. tanh(x) is defined to be -----------
2692  *                                 x    -x
2693  *                                e  + e
2694  *  1. reduce x to non-negative by tanh(-x) = -tanh(x).
2695  *  2.  0      <= x <  2**-28 : tanh(x) := x with inexact if x != 0
2696  *                                          -t
2697  *      2**-28 <= x <  1      : tanh(x) := -----; t = expm1(-2x)
2698  *                                         t + 2
2699  *                                               2
2700  *      1      <= x <  22     : tanh(x) := 1 - -----; t = expm1(2x)
2701  *                                             t + 2
2702  *      22     <= x <= INF    : tanh(x) := 1.
2703  *
2704  * Special cases:
2705  *      tanh(NaN) is NaN;
2706  *      only tanh(0)=0 is exact for finite argument.
2707  */
tanh(double x)2708 double tanh(double x) {
2709   static const volatile double tiny = 1.0e-300;
2710   static const double one = 1.0, two = 2.0, huge = 1.0e300;
2711   double t, z;
2712   int32_t jx, ix;
2713 
2714   GET_HIGH_WORD(jx, x);
2715   ix = jx & 0x7fffffff;
2716 
2717   /* x is INF or NaN */
2718   if (ix >= 0x7ff00000) {
2719     if (jx >= 0)
2720       return one / x + one; /* tanh(+-inf)=+-1 */
2721     else
2722       return one / x - one; /* tanh(NaN) = NaN */
2723   }
2724 
2725   /* |x| < 22 */
2726   if (ix < 0x40360000) {            /* |x|<22 */
2727     if (ix < 0x3e300000) {          /* |x|<2**-28 */
2728       if (huge + x > one) return x; /* tanh(tiny) = tiny with inexact */
2729     }
2730     if (ix >= 0x3ff00000) { /* |x|>=1  */
2731       t = expm1(two * fabs(x));
2732       z = one - two / (t + two);
2733     } else {
2734       t = expm1(-two * fabs(x));
2735       z = -t / (t + two);
2736     }
2737     /* |x| >= 22, return +-1 */
2738   } else {
2739     z = one - tiny; /* raise inexact flag */
2740   }
2741   return (jx >= 0) ? z : -z;
2742 }
2743 
2744 }  // namespace ieee754
2745 }  // namespace base
2746 }  // namespace v8
2747