1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 //
3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice
9 // is preserved.
10 // ====================================================
11 //
12 // The original source code covered by the above license above has been
13 // modified significantly by Google Inc.
14 // Copyright 2016 the V8 project authors. All rights reserved.
15
16 #include "src/base/ieee754.h"
17
18 #include <cmath>
19 #include <limits>
20
21 #include "src/base/build_config.h"
22 #include "src/base/macros.h"
23
24 namespace v8 {
25 namespace base {
26 namespace ieee754 {
27
28 namespace {
29
30 /* Disable "potential divide by 0" warning in Visual Studio compiler. */
31
32 #if V8_CC_MSVC
33
34 #pragma warning(disable : 4723)
35
36 #endif
37
38 /*
39 * The original fdlibm code used statements like:
40 * n0 = ((*(int*)&one)>>29)^1; * index of high word *
41 * ix0 = *(n0+(int*)&x); * high word of x *
42 * ix1 = *((1-n0)+(int*)&x); * low word of x *
43 * to dig two 32 bit words out of the 64 bit IEEE floating point
44 * value. That is non-ANSI, and, moreover, the gcc instruction
45 * scheduler gets it wrong. We instead use the following macros.
46 * Unlike the original code, we determine the endianness at compile
47 * time, not at run time; I don't see much benefit to selecting
48 * endianness at run time.
49 */
50
51 /*
52 * A union which permits us to convert between a double and two 32 bit
53 * ints.
54 */
55
56 #if V8_TARGET_LITTLE_ENDIAN
57
58 typedef union {
59 double value;
60 struct {
61 uint32_t lsw;
62 uint32_t msw;
63 } parts;
64 struct {
65 uint64_t w;
66 } xparts;
67 } ieee_double_shape_type;
68
69 #else
70
71 typedef union {
72 double value;
73 struct {
74 uint32_t msw;
75 uint32_t lsw;
76 } parts;
77 struct {
78 uint64_t w;
79 } xparts;
80 } ieee_double_shape_type;
81
82 #endif
83
84 /* Get two 32 bit ints from a double. */
85
86 #define EXTRACT_WORDS(ix0, ix1, d) \
87 do { \
88 ieee_double_shape_type ew_u; \
89 ew_u.value = (d); \
90 (ix0) = ew_u.parts.msw; \
91 (ix1) = ew_u.parts.lsw; \
92 } while (0)
93
94 /* Get a 64-bit int from a double. */
95 #define EXTRACT_WORD64(ix, d) \
96 do { \
97 ieee_double_shape_type ew_u; \
98 ew_u.value = (d); \
99 (ix) = ew_u.xparts.w; \
100 } while (0)
101
102 /* Get the more significant 32 bit int from a double. */
103
104 #define GET_HIGH_WORD(i, d) \
105 do { \
106 ieee_double_shape_type gh_u; \
107 gh_u.value = (d); \
108 (i) = gh_u.parts.msw; \
109 } while (0)
110
111 /* Get the less significant 32 bit int from a double. */
112
113 #define GET_LOW_WORD(i, d) \
114 do { \
115 ieee_double_shape_type gl_u; \
116 gl_u.value = (d); \
117 (i) = gl_u.parts.lsw; \
118 } while (0)
119
120 /* Set a double from two 32 bit ints. */
121
122 #define INSERT_WORDS(d, ix0, ix1) \
123 do { \
124 ieee_double_shape_type iw_u; \
125 iw_u.parts.msw = (ix0); \
126 iw_u.parts.lsw = (ix1); \
127 (d) = iw_u.value; \
128 } while (0)
129
130 /* Set a double from a 64-bit int. */
131 #define INSERT_WORD64(d, ix) \
132 do { \
133 ieee_double_shape_type iw_u; \
134 iw_u.xparts.w = (ix); \
135 (d) = iw_u.value; \
136 } while (0)
137
138 /* Set the more significant 32 bits of a double from an int. */
139
140 #define SET_HIGH_WORD(d, v) \
141 do { \
142 ieee_double_shape_type sh_u; \
143 sh_u.value = (d); \
144 sh_u.parts.msw = (v); \
145 (d) = sh_u.value; \
146 } while (0)
147
148 /* Set the less significant 32 bits of a double from an int. */
149
150 #define SET_LOW_WORD(d, v) \
151 do { \
152 ieee_double_shape_type sl_u; \
153 sl_u.value = (d); \
154 sl_u.parts.lsw = (v); \
155 (d) = sl_u.value; \
156 } while (0)
157
158 /* Support macro. */
159
160 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
161
162 int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;
163 double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;
164 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
165 const int32_t *ipio2) WARN_UNUSED_RESULT;
166 double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;
167
168 /* __ieee754_rem_pio2(x,y)
169 *
170 * return the remainder of x rem pi/2 in y[0]+y[1]
171 * use __kernel_rem_pio2()
172 */
__ieee754_rem_pio2(double x,double * y)173 int32_t __ieee754_rem_pio2(double x, double *y) {
174 /*
175 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
176 */
177 static const int32_t two_over_pi[] = {
178 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
179 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
180 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
181 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
182 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
183 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
184 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
185 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
186 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
187 0x73A8C9, 0x60E27B, 0xC08C6B,
188 };
189
190 static const int32_t npio2_hw[] = {
191 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
192 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
193 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
194 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
195 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
196 0x404858EB, 0x404921FB,
197 };
198
199 /*
200 * invpio2: 53 bits of 2/pi
201 * pio2_1: first 33 bit of pi/2
202 * pio2_1t: pi/2 - pio2_1
203 * pio2_2: second 33 bit of pi/2
204 * pio2_2t: pi/2 - (pio2_1+pio2_2)
205 * pio2_3: third 33 bit of pi/2
206 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
207 */
208
209 static const double
210 zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
211 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
212 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
213 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
214 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
215 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
216 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
217 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
218 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
219 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
220
221 double z, w, t, r, fn;
222 double tx[3];
223 int32_t e0, i, j, nx, n, ix, hx;
224 uint32_t low;
225
226 z = 0;
227 GET_HIGH_WORD(hx, x); /* high word of x */
228 ix = hx & 0x7fffffff;
229 if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */
230 y[0] = x;
231 y[1] = 0;
232 return 0;
233 }
234 if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
235 if (hx > 0) {
236 z = x - pio2_1;
237 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
238 y[0] = z - pio2_1t;
239 y[1] = (z - y[0]) - pio2_1t;
240 } else { /* near pi/2, use 33+33+53 bit pi */
241 z -= pio2_2;
242 y[0] = z - pio2_2t;
243 y[1] = (z - y[0]) - pio2_2t;
244 }
245 return 1;
246 } else { /* negative x */
247 z = x + pio2_1;
248 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
249 y[0] = z + pio2_1t;
250 y[1] = (z - y[0]) + pio2_1t;
251 } else { /* near pi/2, use 33+33+53 bit pi */
252 z += pio2_2;
253 y[0] = z + pio2_2t;
254 y[1] = (z - y[0]) + pio2_2t;
255 }
256 return -1;
257 }
258 }
259 if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
260 t = fabs(x);
261 n = static_cast<int32_t>(t * invpio2 + half);
262 fn = static_cast<double>(n);
263 r = t - fn * pio2_1;
264 w = fn * pio2_1t; /* 1st round good to 85 bit */
265 if (n < 32 && ix != npio2_hw[n - 1]) {
266 y[0] = r - w; /* quick check no cancellation */
267 } else {
268 uint32_t high;
269 j = ix >> 20;
270 y[0] = r - w;
271 GET_HIGH_WORD(high, y[0]);
272 i = j - ((high >> 20) & 0x7ff);
273 if (i > 16) { /* 2nd iteration needed, good to 118 */
274 t = r;
275 w = fn * pio2_2;
276 r = t - w;
277 w = fn * pio2_2t - ((t - r) - w);
278 y[0] = r - w;
279 GET_HIGH_WORD(high, y[0]);
280 i = j - ((high >> 20) & 0x7ff);
281 if (i > 49) { /* 3rd iteration need, 151 bits acc */
282 t = r; /* will cover all possible cases */
283 w = fn * pio2_3;
284 r = t - w;
285 w = fn * pio2_3t - ((t - r) - w);
286 y[0] = r - w;
287 }
288 }
289 }
290 y[1] = (r - y[0]) - w;
291 if (hx < 0) {
292 y[0] = -y[0];
293 y[1] = -y[1];
294 return -n;
295 } else {
296 return n;
297 }
298 }
299 /*
300 * all other (large) arguments
301 */
302 if (ix >= 0x7ff00000) { /* x is inf or NaN */
303 y[0] = y[1] = x - x;
304 return 0;
305 }
306 /* set z = scalbn(|x|,ilogb(x)-23) */
307 GET_LOW_WORD(low, x);
308 SET_LOW_WORD(z, low);
309 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
310 SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
311 for (i = 0; i < 2; i++) {
312 tx[i] = static_cast<double>(static_cast<int32_t>(z));
313 z = (z - tx[i]) * two24;
314 }
315 tx[2] = z;
316 nx = 3;
317 while (tx[nx - 1] == zero) nx--; /* skip zero term */
318 n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
319 if (hx < 0) {
320 y[0] = -y[0];
321 y[1] = -y[1];
322 return -n;
323 }
324 return n;
325 }
326
327 /* __kernel_cos( x, y )
328 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
329 * Input x is assumed to be bounded by ~pi/4 in magnitude.
330 * Input y is the tail of x.
331 *
332 * Algorithm
333 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
334 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
335 * 3. cos(x) is approximated by a polynomial of degree 14 on
336 * [0,pi/4]
337 * 4 14
338 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
339 * where the remez error is
340 *
341 * | 2 4 6 8 10 12 14 | -58
342 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
343 * | |
344 *
345 * 4 6 8 10 12 14
346 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
347 * cos(x) = 1 - x*x/2 + r
348 * since cos(x+y) ~ cos(x) - sin(x)*y
349 * ~ cos(x) - x*y,
350 * a correction term is necessary in cos(x) and hence
351 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
352 * For better accuracy when x > 0.3, let qx = |x|/4 with
353 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
354 * Then
355 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
356 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
357 * magnitude of the latter is at least a quarter of x*x/2,
358 * thus, reducing the rounding error in the subtraction.
359 */
__kernel_cos(double x,double y)360 V8_INLINE double __kernel_cos(double x, double y) {
361 static const double
362 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
363 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
364 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
365 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
366 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
367 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
368 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
369
370 double a, iz, z, r, qx;
371 int32_t ix;
372 GET_HIGH_WORD(ix, x);
373 ix &= 0x7fffffff; /* ix = |x|'s high word*/
374 if (ix < 0x3e400000) { /* if x < 2**27 */
375 if (static_cast<int>(x) == 0) return one; /* generate inexact */
376 }
377 z = x * x;
378 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
379 if (ix < 0x3FD33333) { /* if |x| < 0.3 */
380 return one - (0.5 * z - (z * r - x * y));
381 } else {
382 if (ix > 0x3fe90000) { /* x > 0.78125 */
383 qx = 0.28125;
384 } else {
385 INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
386 }
387 iz = 0.5 * z - qx;
388 a = one - qx;
389 return a - (iz - (z * r - x * y));
390 }
391 }
392
393 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
394 * double x[],y[]; int e0,nx,prec; int ipio2[];
395 *
396 * __kernel_rem_pio2 return the last three digits of N with
397 * y = x - N*pi/2
398 * so that |y| < pi/2.
399 *
400 * The method is to compute the integer (mod 8) and fraction parts of
401 * (2/pi)*x without doing the full multiplication. In general we
402 * skip the part of the product that are known to be a huge integer (
403 * more accurately, = 0 mod 8 ). Thus the number of operations are
404 * independent of the exponent of the input.
405 *
406 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
407 *
408 * Input parameters:
409 * x[] The input value (must be positive) is broken into nx
410 * pieces of 24-bit integers in double precision format.
411 * x[i] will be the i-th 24 bit of x. The scaled exponent
412 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
413 * match x's up to 24 bits.
414 *
415 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
416 * e0 = ilogb(z)-23
417 * z = scalbn(z,-e0)
418 * for i = 0,1,2
419 * x[i] = floor(z)
420 * z = (z-x[i])*2**24
421 *
422 *
423 * y[] output result in an array of double precision numbers.
424 * The dimension of y[] is:
425 * 24-bit precision 1
426 * 53-bit precision 2
427 * 64-bit precision 2
428 * 113-bit precision 3
429 * The actual value is the sum of them. Thus for 113-bit
430 * precison, one may have to do something like:
431 *
432 * long double t,w,r_head, r_tail;
433 * t = (long double)y[2] + (long double)y[1];
434 * w = (long double)y[0];
435 * r_head = t+w;
436 * r_tail = w - (r_head - t);
437 *
438 * e0 The exponent of x[0]
439 *
440 * nx dimension of x[]
441 *
442 * prec an integer indicating the precision:
443 * 0 24 bits (single)
444 * 1 53 bits (double)
445 * 2 64 bits (extended)
446 * 3 113 bits (quad)
447 *
448 * ipio2[]
449 * integer array, contains the (24*i)-th to (24*i+23)-th
450 * bit of 2/pi after binary point. The corresponding
451 * floating value is
452 *
453 * ipio2[i] * 2^(-24(i+1)).
454 *
455 * External function:
456 * double scalbn(), floor();
457 *
458 *
459 * Here is the description of some local variables:
460 *
461 * jk jk+1 is the initial number of terms of ipio2[] needed
462 * in the computation. The recommended value is 2,3,4,
463 * 6 for single, double, extended,and quad.
464 *
465 * jz local integer variable indicating the number of
466 * terms of ipio2[] used.
467 *
468 * jx nx - 1
469 *
470 * jv index for pointing to the suitable ipio2[] for the
471 * computation. In general, we want
472 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
473 * is an integer. Thus
474 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
475 * Hence jv = max(0,(e0-3)/24).
476 *
477 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
478 *
479 * q[] double array with integral value, representing the
480 * 24-bits chunk of the product of x and 2/pi.
481 *
482 * q0 the corresponding exponent of q[0]. Note that the
483 * exponent for q[i] would be q0-24*i.
484 *
485 * PIo2[] double precision array, obtained by cutting pi/2
486 * into 24 bits chunks.
487 *
488 * f[] ipio2[] in floating point
489 *
490 * iq[] integer array by breaking up q[] in 24-bits chunk.
491 *
492 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
493 *
494 * ih integer. If >0 it indicates q[] is >= 0.5, hence
495 * it also indicates the *sign* of the result.
496 *
497 */
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec,const int32_t * ipio2)498 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
499 const int32_t *ipio2) {
500 /* Constants:
501 * The hexadecimal values are the intended ones for the following
502 * constants. The decimal values may be used, provided that the
503 * compiler will convert from decimal to binary accurately enough
504 * to produce the hexadecimal values shown.
505 */
506 static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
507
508 static const double PIo2[] = {
509 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
510 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
511 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
512 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
513 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
514 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
515 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
516 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
517 };
518
519 static const double
520 zero = 0.0,
521 one = 1.0,
522 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
523 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
524
525 int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
526 double z, fw, f[20], fq[20], q[20];
527
528 /* initialize jk*/
529 jk = init_jk[prec];
530 jp = jk;
531
532 /* determine jx,jv,q0, note that 3>q0 */
533 jx = nx - 1;
534 jv = (e0 - 3) / 24;
535 if (jv < 0) jv = 0;
536 q0 = e0 - 24 * (jv + 1);
537
538 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
539 j = jv - jx;
540 m = jx + jk;
541 for (i = 0; i <= m; i++, j++) {
542 f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
543 }
544
545 /* compute q[0],q[1],...q[jk] */
546 for (i = 0; i <= jk; i++) {
547 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
548 q[i] = fw;
549 }
550
551 jz = jk;
552 recompute:
553 /* distill q[] into iq[] reversingly */
554 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
555 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
556 iq[i] = static_cast<int32_t>(z - two24 * fw);
557 z = q[j - 1] + fw;
558 }
559
560 /* compute n */
561 z = scalbn(z, q0); /* actual value of z */
562 z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
563 n = static_cast<int32_t>(z);
564 z -= static_cast<double>(n);
565 ih = 0;
566 if (q0 > 0) { /* need iq[jz-1] to determine n */
567 i = (iq[jz - 1] >> (24 - q0));
568 n += i;
569 iq[jz - 1] -= i << (24 - q0);
570 ih = iq[jz - 1] >> (23 - q0);
571 } else if (q0 == 0) {
572 ih = iq[jz - 1] >> 23;
573 } else if (z >= 0.5) {
574 ih = 2;
575 }
576
577 if (ih > 0) { /* q > 0.5 */
578 n += 1;
579 carry = 0;
580 for (i = 0; i < jz; i++) { /* compute 1-q */
581 j = iq[i];
582 if (carry == 0) {
583 if (j != 0) {
584 carry = 1;
585 iq[i] = 0x1000000 - j;
586 }
587 } else {
588 iq[i] = 0xffffff - j;
589 }
590 }
591 if (q0 > 0) { /* rare case: chance is 1 in 12 */
592 switch (q0) {
593 case 1:
594 iq[jz - 1] &= 0x7fffff;
595 break;
596 case 2:
597 iq[jz - 1] &= 0x3fffff;
598 break;
599 }
600 }
601 if (ih == 2) {
602 z = one - z;
603 if (carry != 0) z -= scalbn(one, q0);
604 }
605 }
606
607 /* check if recomputation is needed */
608 if (z == zero) {
609 j = 0;
610 for (i = jz - 1; i >= jk; i--) j |= iq[i];
611 if (j == 0) { /* need recomputation */
612 for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
613 /* k = no. of terms needed */
614 }
615
616 for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
617 f[jx + i] = ipio2[jv + i];
618 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
619 q[i] = fw;
620 }
621 jz += k;
622 goto recompute;
623 }
624 }
625
626 /* chop off zero terms */
627 if (z == 0.0) {
628 jz -= 1;
629 q0 -= 24;
630 while (iq[jz] == 0) {
631 jz--;
632 q0 -= 24;
633 }
634 } else { /* break z into 24-bit if necessary */
635 z = scalbn(z, -q0);
636 if (z >= two24) {
637 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
638 iq[jz] = z - two24 * fw;
639 jz += 1;
640 q0 += 24;
641 iq[jz] = fw;
642 } else {
643 iq[jz] = z;
644 }
645 }
646
647 /* convert integer "bit" chunk to floating-point value */
648 fw = scalbn(one, q0);
649 for (i = jz; i >= 0; i--) {
650 q[i] = fw * iq[i];
651 fw *= twon24;
652 }
653
654 /* compute PIo2[0,...,jp]*q[jz,...,0] */
655 for (i = jz; i >= 0; i--) {
656 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
657 fq[jz - i] = fw;
658 }
659
660 /* compress fq[] into y[] */
661 switch (prec) {
662 case 0:
663 fw = 0.0;
664 for (i = jz; i >= 0; i--) fw += fq[i];
665 y[0] = (ih == 0) ? fw : -fw;
666 break;
667 case 1:
668 case 2:
669 fw = 0.0;
670 for (i = jz; i >= 0; i--) fw += fq[i];
671 y[0] = (ih == 0) ? fw : -fw;
672 fw = fq[0] - fw;
673 for (i = 1; i <= jz; i++) fw += fq[i];
674 y[1] = (ih == 0) ? fw : -fw;
675 break;
676 case 3: /* painful */
677 for (i = jz; i > 0; i--) {
678 fw = fq[i - 1] + fq[i];
679 fq[i] += fq[i - 1] - fw;
680 fq[i - 1] = fw;
681 }
682 for (i = jz; i > 1; i--) {
683 fw = fq[i - 1] + fq[i];
684 fq[i] += fq[i - 1] - fw;
685 fq[i - 1] = fw;
686 }
687 for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
688 if (ih == 0) {
689 y[0] = fq[0];
690 y[1] = fq[1];
691 y[2] = fw;
692 } else {
693 y[0] = -fq[0];
694 y[1] = -fq[1];
695 y[2] = -fw;
696 }
697 }
698 return n & 7;
699 }
700
701 /* __kernel_sin( x, y, iy)
702 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
703 * Input x is assumed to be bounded by ~pi/4 in magnitude.
704 * Input y is the tail of x.
705 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
706 *
707 * Algorithm
708 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
709 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
710 * 3. sin(x) is approximated by a polynomial of degree 13 on
711 * [0,pi/4]
712 * 3 13
713 * sin(x) ~ x + S1*x + ... + S6*x
714 * where
715 *
716 * |sin(x) 2 4 6 8 10 12 | -58
717 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
718 * | x |
719 *
720 * 4. sin(x+y) = sin(x) + sin'(x')*y
721 * ~ sin(x) + (1-x*x/2)*y
722 * For better accuracy, let
723 * 3 2 2 2 2
724 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
725 * then 3 2
726 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
727 */
__kernel_sin(double x,double y,int iy)728 V8_INLINE double __kernel_sin(double x, double y, int iy) {
729 static const double
730 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
731 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
732 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
733 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
734 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
735 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
736 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
737
738 double z, r, v;
739 int32_t ix;
740 GET_HIGH_WORD(ix, x);
741 ix &= 0x7fffffff; /* high word of x */
742 if (ix < 0x3e400000) { /* |x| < 2**-27 */
743 if (static_cast<int>(x) == 0) return x;
744 } /* generate inexact */
745 z = x * x;
746 v = z * x;
747 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
748 if (iy == 0) {
749 return x + v * (S1 + z * r);
750 } else {
751 return x - ((z * (half * y - v * r) - y) - v * S1);
752 }
753 }
754
755 /* __kernel_tan( x, y, k )
756 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
757 * Input x is assumed to be bounded by ~pi/4 in magnitude.
758 * Input y is the tail of x.
759 * Input k indicates whether tan (if k=1) or
760 * -1/tan (if k= -1) is returned.
761 *
762 * Algorithm
763 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
764 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
765 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
766 * [0,0.67434]
767 * 3 27
768 * tan(x) ~ x + T1*x + ... + T13*x
769 * where
770 *
771 * |tan(x) 2 4 26 | -59.2
772 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
773 * | x |
774 *
775 * Note: tan(x+y) = tan(x) + tan'(x)*y
776 * ~ tan(x) + (1+x*x)*y
777 * Therefore, for better accuracy in computing tan(x+y), let
778 * 3 2 2 2 2
779 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
780 * then
781 * 3 2
782 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
783 *
784 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
785 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
786 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
787 */
__kernel_tan(double x,double y,int iy)788 double __kernel_tan(double x, double y, int iy) {
789 static const double xxx[] = {
790 3.33333333333334091986e-01, /* 3FD55555, 55555563 */
791 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */
792 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */
793 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */
794 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */
795 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */
796 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */
797 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */
798 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */
799 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */
800 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */
801 -1.85586374855275456654e-05, /* BEF375CB, DB605373 */
802 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */
803 /* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */
804 /* pio4 */ 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */
805 /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
806 };
807 #define one xxx[13]
808 #define pio4 xxx[14]
809 #define pio4lo xxx[15]
810 #define T xxx
811
812 double z, r, v, w, s;
813 int32_t ix, hx;
814
815 GET_HIGH_WORD(hx, x); /* high word of x */
816 ix = hx & 0x7fffffff; /* high word of |x| */
817 if (ix < 0x3e300000) { /* x < 2**-28 */
818 if (static_cast<int>(x) == 0) { /* generate inexact */
819 uint32_t low;
820 GET_LOW_WORD(low, x);
821 if (((ix | low) | (iy + 1)) == 0) {
822 return one / fabs(x);
823 } else {
824 if (iy == 1) {
825 return x;
826 } else { /* compute -1 / (x+y) carefully */
827 double a, t;
828
829 z = w = x + y;
830 SET_LOW_WORD(z, 0);
831 v = y - (z - x);
832 t = a = -one / w;
833 SET_LOW_WORD(t, 0);
834 s = one + t * z;
835 return t + a * (s + t * v);
836 }
837 }
838 }
839 }
840 if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
841 if (hx < 0) {
842 x = -x;
843 y = -y;
844 }
845 z = pio4 - x;
846 w = pio4lo - y;
847 x = z + w;
848 y = 0.0;
849 }
850 z = x * x;
851 w = z * z;
852 /*
853 * Break x^5*(T[1]+x^2*T[2]+...) into
854 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
855 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
856 */
857 r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
858 v = z *
859 (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
860 s = z * x;
861 r = y + z * (s * (r + v) + y);
862 r += T[0] * s;
863 w = x + r;
864 if (ix >= 0x3FE59428) {
865 v = iy;
866 return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
867 }
868 if (iy == 1) {
869 return w;
870 } else {
871 /*
872 * if allow error up to 2 ulp, simply return
873 * -1.0 / (x+r) here
874 */
875 /* compute -1.0 / (x+r) accurately */
876 double a, t;
877 z = w;
878 SET_LOW_WORD(z, 0);
879 v = r - (z - x); /* z+v = r+x */
880 t = a = -1.0 / w; /* a = -1.0/w */
881 SET_LOW_WORD(t, 0);
882 s = 1.0 + t * z;
883 return t + a * (s + t * v);
884 }
885
886 #undef one
887 #undef pio4
888 #undef pio4lo
889 #undef T
890 }
891
892 } // namespace
893
894 /* acos(x)
895 * Method :
896 * acos(x) = pi/2 - asin(x)
897 * acos(-x) = pi/2 + asin(x)
898 * For |x|<=0.5
899 * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
900 * For x>0.5
901 * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
902 * = 2asin(sqrt((1-x)/2))
903 * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
904 * = 2f + (2c + 2s*z*R(z))
905 * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
906 * for f so that f+c ~ sqrt(z).
907 * For x<-0.5
908 * acos(x) = pi - 2asin(sqrt((1-|x|)/2))
909 * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
910 *
911 * Special cases:
912 * if x is NaN, return x itself;
913 * if |x|>1, return NaN with invalid signal.
914 *
915 * Function needed: sqrt
916 */
acos(double x)917 double acos(double x) {
918 static const double
919 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
920 pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
921 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
922 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
923 pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
924 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
925 pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
926 pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
927 pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
928 pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
929 qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
930 qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
931 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
932 qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
933
934 double z, p, q, r, w, s, c, df;
935 int32_t hx, ix;
936 GET_HIGH_WORD(hx, x);
937 ix = hx & 0x7fffffff;
938 if (ix >= 0x3ff00000) { /* |x| >= 1 */
939 uint32_t lx;
940 GET_LOW_WORD(lx, x);
941 if (((ix - 0x3ff00000) | lx) == 0) { /* |x|==1 */
942 if (hx > 0)
943 return 0.0; /* acos(1) = 0 */
944 else
945 return pi + 2.0 * pio2_lo; /* acos(-1)= pi */
946 }
947 return (x - x) / (x - x); /* acos(|x|>1) is NaN */
948 }
949 if (ix < 0x3fe00000) { /* |x| < 0.5 */
950 if (ix <= 0x3c600000) return pio2_hi + pio2_lo; /*if|x|<2**-57*/
951 z = x * x;
952 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
953 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
954 r = p / q;
955 return pio2_hi - (x - (pio2_lo - x * r));
956 } else if (hx < 0) { /* x < -0.5 */
957 z = (one + x) * 0.5;
958 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
959 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
960 s = sqrt(z);
961 r = p / q;
962 w = r * s - pio2_lo;
963 return pi - 2.0 * (s + w);
964 } else { /* x > 0.5 */
965 z = (one - x) * 0.5;
966 s = sqrt(z);
967 df = s;
968 SET_LOW_WORD(df, 0);
969 c = (z - df * df) / (s + df);
970 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
971 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
972 r = p / q;
973 w = r * s + c;
974 return 2.0 * (df + w);
975 }
976 }
977
978 /* acosh(x)
979 * Method :
980 * Based on
981 * acosh(x) = log [ x + sqrt(x*x-1) ]
982 * we have
983 * acosh(x) := log(x)+ln2, if x is large; else
984 * acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
985 * acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
986 *
987 * Special cases:
988 * acosh(x) is NaN with signal if x<1.
989 * acosh(NaN) is NaN without signal.
990 */
acosh(double x)991 double acosh(double x) {
992 static const double
993 one = 1.0,
994 ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
995 double t;
996 int32_t hx;
997 uint32_t lx;
998 EXTRACT_WORDS(hx, lx, x);
999 if (hx < 0x3ff00000) { /* x < 1 */
1000 return (x - x) / (x - x);
1001 } else if (hx >= 0x41b00000) { /* x > 2**28 */
1002 if (hx >= 0x7ff00000) { /* x is inf of NaN */
1003 return x + x;
1004 } else {
1005 return log(x) + ln2; /* acosh(huge)=log(2x) */
1006 }
1007 } else if (((hx - 0x3ff00000) | lx) == 0) {
1008 return 0.0; /* acosh(1) = 0 */
1009 } else if (hx > 0x40000000) { /* 2**28 > x > 2 */
1010 t = x * x;
1011 return log(2.0 * x - one / (x + sqrt(t - one)));
1012 } else { /* 1<x<2 */
1013 t = x - one;
1014 return log1p(t + sqrt(2.0 * t + t * t));
1015 }
1016 }
1017
1018 /* asin(x)
1019 * Method :
1020 * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
1021 * we approximate asin(x) on [0,0.5] by
1022 * asin(x) = x + x*x^2*R(x^2)
1023 * where
1024 * R(x^2) is a rational approximation of (asin(x)-x)/x^3
1025 * and its remez error is bounded by
1026 * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
1027 *
1028 * For x in [0.5,1]
1029 * asin(x) = pi/2-2*asin(sqrt((1-x)/2))
1030 * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
1031 * then for x>0.98
1032 * asin(x) = pi/2 - 2*(s+s*z*R(z))
1033 * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
1034 * For x<=0.98, let pio4_hi = pio2_hi/2, then
1035 * f = hi part of s;
1036 * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
1037 * and
1038 * asin(x) = pi/2 - 2*(s+s*z*R(z))
1039 * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
1040 * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
1041 *
1042 * Special cases:
1043 * if x is NaN, return x itself;
1044 * if |x|>1, return NaN with invalid signal.
1045 */
asin(double x)1046 double asin(double x) {
1047 static const double
1048 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1049 huge = 1.000e+300,
1050 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
1051 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
1052 pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
1053 /* coefficient for R(x^2) */
1054 pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
1055 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
1056 pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
1057 pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
1058 pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
1059 pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
1060 qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
1061 qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
1062 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
1063 qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
1064
1065 double t, w, p, q, c, r, s;
1066 int32_t hx, ix;
1067
1068 t = 0;
1069 GET_HIGH_WORD(hx, x);
1070 ix = hx & 0x7fffffff;
1071 if (ix >= 0x3ff00000) { /* |x|>= 1 */
1072 uint32_t lx;
1073 GET_LOW_WORD(lx, x);
1074 if (((ix - 0x3ff00000) | lx) == 0) /* asin(1)=+-pi/2 with inexact */
1075 return x * pio2_hi + x * pio2_lo;
1076 return (x - x) / (x - x); /* asin(|x|>1) is NaN */
1077 } else if (ix < 0x3fe00000) { /* |x|<0.5 */
1078 if (ix < 0x3e400000) { /* if |x| < 2**-27 */
1079 if (huge + x > one) return x; /* return x with inexact if x!=0*/
1080 } else {
1081 t = x * x;
1082 }
1083 p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1084 q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1085 w = p / q;
1086 return x + x * w;
1087 }
1088 /* 1> |x|>= 0.5 */
1089 w = one - fabs(x);
1090 t = w * 0.5;
1091 p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1092 q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1093 s = sqrt(t);
1094 if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */
1095 w = p / q;
1096 t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
1097 } else {
1098 w = s;
1099 SET_LOW_WORD(w, 0);
1100 c = (t - w * w) / (s + w);
1101 r = p / q;
1102 p = 2.0 * s * r - (pio2_lo - 2.0 * c);
1103 q = pio4_hi - 2.0 * w;
1104 t = pio4_hi - (p - q);
1105 }
1106 if (hx > 0)
1107 return t;
1108 else
1109 return -t;
1110 }
1111 /* asinh(x)
1112 * Method :
1113 * Based on
1114 * asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
1115 * we have
1116 * asinh(x) := x if 1+x*x=1,
1117 * := sign(x)*(log(x)+ln2)) for large |x|, else
1118 * := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
1119 * := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
1120 */
asinh(double x)1121 double asinh(double x) {
1122 static const double
1123 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1124 ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
1125 huge = 1.00000000000000000000e+300;
1126
1127 double t, w;
1128 int32_t hx, ix;
1129 GET_HIGH_WORD(hx, x);
1130 ix = hx & 0x7fffffff;
1131 if (ix >= 0x7ff00000) return x + x; /* x is inf or NaN */
1132 if (ix < 0x3e300000) { /* |x|<2**-28 */
1133 if (huge + x > one) return x; /* return x inexact except 0 */
1134 }
1135 if (ix > 0x41b00000) { /* |x| > 2**28 */
1136 w = log(fabs(x)) + ln2;
1137 } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */
1138 t = fabs(x);
1139 w = log(2.0 * t + one / (sqrt(x * x + one) + t));
1140 } else { /* 2.0 > |x| > 2**-28 */
1141 t = x * x;
1142 w = log1p(fabs(x) + t / (one + sqrt(one + t)));
1143 }
1144 if (hx > 0) {
1145 return w;
1146 } else {
1147 return -w;
1148 }
1149 }
1150
1151 /* atan(x)
1152 * Method
1153 * 1. Reduce x to positive by atan(x) = -atan(-x).
1154 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument
1155 * is further reduced to one of the following intervals and the
1156 * arctangent of t is evaluated by the corresponding formula:
1157 *
1158 * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
1159 * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
1160 * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
1161 * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
1162 * [39/16,INF] atan(x) = atan(INF) + atan( -1/t )
1163 *
1164 * Constants:
1165 * The hexadecimal values are the intended ones for the following
1166 * constants. The decimal values may be used, provided that the
1167 * compiler will convert from decimal to binary accurately enough
1168 * to produce the hexadecimal values shown.
1169 */
atan(double x)1170 double atan(double x) {
1171 static const double atanhi[] = {
1172 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
1173 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
1174 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1175 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
1176 };
1177
1178 static const double atanlo[] = {
1179 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
1180 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1181 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
1182 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
1183 };
1184
1185 static const double aT[] = {
1186 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
1187 -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1188 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
1189 -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
1190 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
1191 -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
1192 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
1193 -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
1194 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
1195 -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1196 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
1197 };
1198
1199 static const double one = 1.0, huge = 1.0e300;
1200
1201 double w, s1, s2, z;
1202 int32_t ix, hx, id;
1203
1204 GET_HIGH_WORD(hx, x);
1205 ix = hx & 0x7fffffff;
1206 if (ix >= 0x44100000) { /* if |x| >= 2^66 */
1207 uint32_t low;
1208 GET_LOW_WORD(low, x);
1209 if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (low != 0)))
1210 return x + x; /* NaN */
1211 if (hx > 0)
1212 return atanhi[3] + *(volatile double *)&atanlo[3];
1213 else
1214 return -atanhi[3] - *(volatile double *)&atanlo[3];
1215 }
1216 if (ix < 0x3fdc0000) { /* |x| < 0.4375 */
1217 if (ix < 0x3e400000) { /* |x| < 2^-27 */
1218 if (huge + x > one) return x; /* raise inexact */
1219 }
1220 id = -1;
1221 } else {
1222 x = fabs(x);
1223 if (ix < 0x3ff30000) { /* |x| < 1.1875 */
1224 if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */
1225 id = 0;
1226 x = (2.0 * x - one) / (2.0 + x);
1227 } else { /* 11/16<=|x|< 19/16 */
1228 id = 1;
1229 x = (x - one) / (x + one);
1230 }
1231 } else {
1232 if (ix < 0x40038000) { /* |x| < 2.4375 */
1233 id = 2;
1234 x = (x - 1.5) / (one + 1.5 * x);
1235 } else { /* 2.4375 <= |x| < 2^66 */
1236 id = 3;
1237 x = -1.0 / x;
1238 }
1239 }
1240 }
1241 /* end of argument reduction */
1242 z = x * x;
1243 w = z * z;
1244 /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
1245 s1 = z * (aT[0] +
1246 w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
1247 s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
1248 if (id < 0) {
1249 return x - x * (s1 + s2);
1250 } else {
1251 z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
1252 return (hx < 0) ? -z : z;
1253 }
1254 }
1255
1256 /* atan2(y,x)
1257 * Method :
1258 * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1259 * 2. Reduce x to positive by (if x and y are unexceptional):
1260 * ARG (x+iy) = arctan(y/x) ... if x > 0,
1261 * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
1262 *
1263 * Special cases:
1264 *
1265 * ATAN2((anything), NaN ) is NaN;
1266 * ATAN2(NAN , (anything) ) is NaN;
1267 * ATAN2(+-0, +(anything but NaN)) is +-0 ;
1268 * ATAN2(+-0, -(anything but NaN)) is +-pi ;
1269 * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1270 * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1271 * ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1272 * ATAN2(+-INF,+INF ) is +-pi/4 ;
1273 * ATAN2(+-INF,-INF ) is +-3pi/4;
1274 * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1275 *
1276 * Constants:
1277 * The hexadecimal values are the intended ones for the following
1278 * constants. The decimal values may be used, provided that the
1279 * compiler will convert from decimal to binary accurately enough
1280 * to produce the hexadecimal values shown.
1281 */
atan2(double y,double x)1282 double atan2(double y, double x) {
1283 static volatile double tiny = 1.0e-300;
1284 static const double
1285 zero = 0.0,
1286 pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1287 pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1288 pi = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */
1289 static volatile double pi_lo =
1290 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1291
1292 double z;
1293 int32_t k, m, hx, hy, ix, iy;
1294 uint32_t lx, ly;
1295
1296 EXTRACT_WORDS(hx, lx, x);
1297 ix = hx & 0x7fffffff;
1298 EXTRACT_WORDS(hy, ly, y);
1299 iy = hy & 0x7fffffff;
1300 if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7ff00000) ||
1301 ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7ff00000)) {
1302 return x + y; /* x or y is NaN */
1303 }
1304 if (((hx - 0x3ff00000) | lx) == 0) return atan(y); /* x=1.0 */
1305 m = ((hy >> 31) & 1) | ((hx >> 30) & 2); /* 2*sign(x)+sign(y) */
1306
1307 /* when y = 0 */
1308 if ((iy | ly) == 0) {
1309 switch (m) {
1310 case 0:
1311 case 1:
1312 return y; /* atan(+-0,+anything)=+-0 */
1313 case 2:
1314 return pi + tiny; /* atan(+0,-anything) = pi */
1315 case 3:
1316 return -pi - tiny; /* atan(-0,-anything) =-pi */
1317 }
1318 }
1319 /* when x = 0 */
1320 if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1321
1322 /* when x is INF */
1323 if (ix == 0x7ff00000) {
1324 if (iy == 0x7ff00000) {
1325 switch (m) {
1326 case 0:
1327 return pi_o_4 + tiny; /* atan(+INF,+INF) */
1328 case 1:
1329 return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1330 case 2:
1331 return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1332 case 3:
1333 return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1334 }
1335 } else {
1336 switch (m) {
1337 case 0:
1338 return zero; /* atan(+...,+INF) */
1339 case 1:
1340 return -zero; /* atan(-...,+INF) */
1341 case 2:
1342 return pi + tiny; /* atan(+...,-INF) */
1343 case 3:
1344 return -pi - tiny; /* atan(-...,-INF) */
1345 }
1346 }
1347 }
1348 /* when y is INF */
1349 if (iy == 0x7ff00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1350
1351 /* compute y/x */
1352 k = (iy - ix) >> 20;
1353 if (k > 60) { /* |y/x| > 2**60 */
1354 z = pi_o_2 + 0.5 * pi_lo;
1355 m &= 1;
1356 } else if (hx < 0 && k < -60) {
1357 z = 0.0; /* 0 > |y|/x > -2**-60 */
1358 } else {
1359 z = atan(fabs(y / x)); /* safe to do y/x */
1360 }
1361 switch (m) {
1362 case 0:
1363 return z; /* atan(+,+) */
1364 case 1:
1365 return -z; /* atan(-,+) */
1366 case 2:
1367 return pi - (z - pi_lo); /* atan(+,-) */
1368 default: /* case 3 */
1369 return (z - pi_lo) - pi; /* atan(-,-) */
1370 }
1371 }
1372
1373 /* cos(x)
1374 * Return cosine function of x.
1375 *
1376 * kernel function:
1377 * __kernel_sin ... sine function on [-pi/4,pi/4]
1378 * __kernel_cos ... cosine function on [-pi/4,pi/4]
1379 * __ieee754_rem_pio2 ... argument reduction routine
1380 *
1381 * Method.
1382 * Let S,C and T denote the sin, cos and tan respectively on
1383 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1384 * in [-pi/4 , +pi/4], and let n = k mod 4.
1385 * We have
1386 *
1387 * n sin(x) cos(x) tan(x)
1388 * ----------------------------------------------------------
1389 * 0 S C T
1390 * 1 C -S -1/T
1391 * 2 -S -C T
1392 * 3 -C S -1/T
1393 * ----------------------------------------------------------
1394 *
1395 * Special cases:
1396 * Let trig be any of sin, cos, or tan.
1397 * trig(+-INF) is NaN, with signals;
1398 * trig(NaN) is that NaN;
1399 *
1400 * Accuracy:
1401 * TRIG(x) returns trig(x) nearly rounded
1402 */
cos(double x)1403 double cos(double x) {
1404 double y[2], z = 0.0;
1405 int32_t n, ix;
1406
1407 /* High word of x. */
1408 GET_HIGH_WORD(ix, x);
1409
1410 /* |x| ~< pi/4 */
1411 ix &= 0x7fffffff;
1412 if (ix <= 0x3fe921fb) {
1413 return __kernel_cos(x, z);
1414 } else if (ix >= 0x7ff00000) {
1415 /* cos(Inf or NaN) is NaN */
1416 return x - x;
1417 } else {
1418 /* argument reduction needed */
1419 n = __ieee754_rem_pio2(x, y);
1420 switch (n & 3) {
1421 case 0:
1422 return __kernel_cos(y[0], y[1]);
1423 case 1:
1424 return -__kernel_sin(y[0], y[1], 1);
1425 case 2:
1426 return -__kernel_cos(y[0], y[1]);
1427 default:
1428 return __kernel_sin(y[0], y[1], 1);
1429 }
1430 }
1431 }
1432
1433 /* exp(x)
1434 * Returns the exponential of x.
1435 *
1436 * Method
1437 * 1. Argument reduction:
1438 * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1439 * Given x, find r and integer k such that
1440 *
1441 * x = k*ln2 + r, |r| <= 0.5*ln2.
1442 *
1443 * Here r will be represented as r = hi-lo for better
1444 * accuracy.
1445 *
1446 * 2. Approximation of exp(r) by a special rational function on
1447 * the interval [0,0.34658]:
1448 * Write
1449 * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1450 * We use a special Remes algorithm on [0,0.34658] to generate
1451 * a polynomial of degree 5 to approximate R. The maximum error
1452 * of this polynomial approximation is bounded by 2**-59. In
1453 * other words,
1454 * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1455 * (where z=r*r, and the values of P1 to P5 are listed below)
1456 * and
1457 * | 5 | -59
1458 * | 2.0+P1*z+...+P5*z - R(z) | <= 2
1459 * | |
1460 * The computation of exp(r) thus becomes
1461 * 2*r
1462 * exp(r) = 1 + -------
1463 * R - r
1464 * r*R1(r)
1465 * = 1 + r + ----------- (for better accuracy)
1466 * 2 - R1(r)
1467 * where
1468 * 2 4 10
1469 * R1(r) = r - (P1*r + P2*r + ... + P5*r ).
1470 *
1471 * 3. Scale back to obtain exp(x):
1472 * From step 1, we have
1473 * exp(x) = 2^k * exp(r)
1474 *
1475 * Special cases:
1476 * exp(INF) is INF, exp(NaN) is NaN;
1477 * exp(-INF) is 0, and
1478 * for finite argument, only exp(0)=1 is exact.
1479 *
1480 * Accuracy:
1481 * according to an error analysis, the error is always less than
1482 * 1 ulp (unit in the last place).
1483 *
1484 * Misc. info.
1485 * For IEEE double
1486 * if x > 7.09782712893383973096e+02 then exp(x) overflow
1487 * if x < -7.45133219101941108420e+02 then exp(x) underflow
1488 *
1489 * Constants:
1490 * The hexadecimal values are the intended ones for the following
1491 * constants. The decimal values may be used, provided that the
1492 * compiler will convert from decimal to binary accurately enough
1493 * to produce the hexadecimal values shown.
1494 */
exp(double x)1495 double exp(double x) {
1496 static const double
1497 one = 1.0,
1498 halF[2] = {0.5, -0.5},
1499 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
1500 u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
1501 ln2HI[2] = {6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
1502 -6.93147180369123816490e-01}, /* 0xbfe62e42, 0xfee00000 */
1503 ln2LO[2] = {1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
1504 -1.90821492927058770002e-10}, /* 0xbdea39ef, 0x35793c76 */
1505 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
1506 P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
1507 P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
1508 P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
1509 P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
1510 P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
1511 E = 2.718281828459045; /* 0x4005bf0a, 0x8b145769 */
1512
1513 static volatile double
1514 huge = 1.0e+300,
1515 twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1516 two1023 = 8.988465674311579539e307; /* 0x1p1023 */
1517
1518 double y, hi = 0.0, lo = 0.0, c, t, twopk;
1519 int32_t k = 0, xsb;
1520 uint32_t hx;
1521
1522 GET_HIGH_WORD(hx, x);
1523 xsb = (hx >> 31) & 1; /* sign bit of x */
1524 hx &= 0x7fffffff; /* high word of |x| */
1525
1526 /* filter out non-finite argument */
1527 if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1528 if (hx >= 0x7ff00000) {
1529 uint32_t lx;
1530 GET_LOW_WORD(lx, x);
1531 if (((hx & 0xfffff) | lx) != 0)
1532 return x + x; /* NaN */
1533 else
1534 return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1535 }
1536 if (x > o_threshold) return huge * huge; /* overflow */
1537 if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1538 }
1539
1540 /* argument reduction */
1541 if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
1542 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1543 /* TODO(rtoy): We special case exp(1) here to return the correct
1544 * value of E, as the computation below would get the last bit
1545 * wrong. We should probably fix the algorithm instead.
1546 */
1547 if (x == 1.0) return E;
1548 hi = x - ln2HI[xsb];
1549 lo = ln2LO[xsb];
1550 k = 1 - xsb - xsb;
1551 } else {
1552 k = static_cast<int>(invln2 * x + halF[xsb]);
1553 t = k;
1554 hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1555 lo = t * ln2LO[0];
1556 }
1557 STRICT_ASSIGN(double, x, hi - lo);
1558 } else if (hx < 0x3e300000) { /* when |x|<2**-28 */
1559 if (huge + x > one) return one + x; /* trigger inexact */
1560 } else {
1561 k = 0;
1562 }
1563
1564 /* x is now in primary range */
1565 t = x * x;
1566 if (k >= -1021) {
1567 INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);
1568 } else {
1569 INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0);
1570 }
1571 c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1572 if (k == 0) {
1573 return one - ((x * c) / (c - 2.0) - x);
1574 } else {
1575 y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1576 }
1577 if (k >= -1021) {
1578 if (k == 1024) return y * 2.0 * two1023;
1579 return y * twopk;
1580 } else {
1581 return y * twopk * twom1000;
1582 }
1583 }
1584
1585 /*
1586 * Method :
1587 * 1.Reduced x to positive by atanh(-x) = -atanh(x)
1588 * 2.For x>=0.5
1589 * 1 2x x
1590 * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1591 * 2 1 - x 1 - x
1592 *
1593 * For x<0.5
1594 * atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1595 *
1596 * Special cases:
1597 * atanh(x) is NaN if |x| > 1 with signal;
1598 * atanh(NaN) is that NaN with no signal;
1599 * atanh(+-1) is +-INF with signal.
1600 *
1601 */
atanh(double x)1602 double atanh(double x) {
1603 static const double one = 1.0, huge = 1e300;
1604 static const double zero = 0.0;
1605
1606 double t;
1607 int32_t hx, ix;
1608 uint32_t lx;
1609 EXTRACT_WORDS(hx, lx, x);
1610 ix = hx & 0x7fffffff;
1611 if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* |x|>1 */
1612 return (x - x) / (x - x);
1613 if (ix == 0x3ff00000) return x / zero;
1614 if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2**-28 */
1615 SET_HIGH_WORD(x, ix);
1616 if (ix < 0x3fe00000) { /* x < 0.5 */
1617 t = x + x;
1618 t = 0.5 * log1p(t + t * x / (one - x));
1619 } else {
1620 t = 0.5 * log1p((x + x) / (one - x));
1621 }
1622 if (hx >= 0)
1623 return t;
1624 else
1625 return -t;
1626 }
1627
1628 /* log(x)
1629 * Return the logrithm of x
1630 *
1631 * Method :
1632 * 1. Argument Reduction: find k and f such that
1633 * x = 2^k * (1+f),
1634 * where sqrt(2)/2 < 1+f < sqrt(2) .
1635 *
1636 * 2. Approximation of log(1+f).
1637 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1638 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1639 * = 2s + s*R
1640 * We use a special Reme algorithm on [0,0.1716] to generate
1641 * a polynomial of degree 14 to approximate R The maximum error
1642 * of this polynomial approximation is bounded by 2**-58.45. In
1643 * other words,
1644 * 2 4 6 8 10 12 14
1645 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
1646 * (the values of Lg1 to Lg7 are listed in the program)
1647 * and
1648 * | 2 14 | -58.45
1649 * | Lg1*s +...+Lg7*s - R(z) | <= 2
1650 * | |
1651 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1652 * In order to guarantee error in log below 1ulp, we compute log
1653 * by
1654 * log(1+f) = f - s*(f - R) (if f is not too large)
1655 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1656 *
1657 * 3. Finally, log(x) = k*ln2 + log(1+f).
1658 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1659 * Here ln2 is split into two floating point number:
1660 * ln2_hi + ln2_lo,
1661 * where n*ln2_hi is always exact for |n| < 2000.
1662 *
1663 * Special cases:
1664 * log(x) is NaN with signal if x < 0 (including -INF) ;
1665 * log(+INF) is +INF; log(0) is -INF with signal;
1666 * log(NaN) is that NaN with no signal.
1667 *
1668 * Accuracy:
1669 * according to an error analysis, the error is always less than
1670 * 1 ulp (unit in the last place).
1671 *
1672 * Constants:
1673 * The hexadecimal values are the intended ones for the following
1674 * constants. The decimal values may be used, provided that the
1675 * compiler will convert from decimal to binary accurately enough
1676 * to produce the hexadecimal values shown.
1677 */
log(double x)1678 double log(double x) {
1679 static const double /* -- */
1680 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1681 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1682 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
1683 Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1684 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1685 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1686 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1687 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1688 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1689 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1690
1691 static const double zero = 0.0;
1692 static volatile double vzero = 0.0;
1693
1694 double hfsq, f, s, z, R, w, t1, t2, dk;
1695 int32_t k, hx, i, j;
1696 uint32_t lx;
1697
1698 EXTRACT_WORDS(hx, lx, x);
1699
1700 k = 0;
1701 if (hx < 0x00100000) { /* x < 2**-1022 */
1702 if (((hx & 0x7fffffff) | lx) == 0)
1703 return -two54 / vzero; /* log(+-0)=-inf */
1704 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1705 k -= 54;
1706 x *= two54; /* subnormal number, scale up x */
1707 GET_HIGH_WORD(hx, x);
1708 }
1709 if (hx >= 0x7ff00000) return x + x;
1710 k += (hx >> 20) - 1023;
1711 hx &= 0x000fffff;
1712 i = (hx + 0x95f64) & 0x100000;
1713 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1714 k += (i >> 20);
1715 f = x - 1.0;
1716 if ((0x000fffff & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1717 if (f == zero) {
1718 if (k == 0) {
1719 return zero;
1720 } else {
1721 dk = static_cast<double>(k);
1722 return dk * ln2_hi + dk * ln2_lo;
1723 }
1724 }
1725 R = f * f * (0.5 - 0.33333333333333333 * f);
1726 if (k == 0) {
1727 return f - R;
1728 } else {
1729 dk = static_cast<double>(k);
1730 return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1731 }
1732 }
1733 s = f / (2.0 + f);
1734 dk = static_cast<double>(k);
1735 z = s * s;
1736 i = hx - 0x6147a;
1737 w = z * z;
1738 j = 0x6b851 - hx;
1739 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1740 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1741 i |= j;
1742 R = t2 + t1;
1743 if (i > 0) {
1744 hfsq = 0.5 * f * f;
1745 if (k == 0)
1746 return f - (hfsq - s * (hfsq + R));
1747 else
1748 return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1749 } else {
1750 if (k == 0)
1751 return f - s * (f - R);
1752 else
1753 return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1754 }
1755 }
1756
1757 /* double log1p(double x)
1758 *
1759 * Method :
1760 * 1. Argument Reduction: find k and f such that
1761 * 1+x = 2^k * (1+f),
1762 * where sqrt(2)/2 < 1+f < sqrt(2) .
1763 *
1764 * Note. If k=0, then f=x is exact. However, if k!=0, then f
1765 * may not be representable exactly. In that case, a correction
1766 * term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1767 * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1768 * and add back the correction term c/u.
1769 * (Note: when x > 2**53, one can simply return log(x))
1770 *
1771 * 2. Approximation of log1p(f).
1772 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1773 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1774 * = 2s + s*R
1775 * We use a special Reme algorithm on [0,0.1716] to generate
1776 * a polynomial of degree 14 to approximate R The maximum error
1777 * of this polynomial approximation is bounded by 2**-58.45. In
1778 * other words,
1779 * 2 4 6 8 10 12 14
1780 * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
1781 * (the values of Lp1 to Lp7 are listed in the program)
1782 * and
1783 * | 2 14 | -58.45
1784 * | Lp1*s +...+Lp7*s - R(z) | <= 2
1785 * | |
1786 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1787 * In order to guarantee error in log below 1ulp, we compute log
1788 * by
1789 * log1p(f) = f - (hfsq - s*(hfsq+R)).
1790 *
1791 * 3. Finally, log1p(x) = k*ln2 + log1p(f).
1792 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1793 * Here ln2 is split into two floating point number:
1794 * ln2_hi + ln2_lo,
1795 * where n*ln2_hi is always exact for |n| < 2000.
1796 *
1797 * Special cases:
1798 * log1p(x) is NaN with signal if x < -1 (including -INF) ;
1799 * log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1800 * log1p(NaN) is that NaN with no signal.
1801 *
1802 * Accuracy:
1803 * according to an error analysis, the error is always less than
1804 * 1 ulp (unit in the last place).
1805 *
1806 * Constants:
1807 * The hexadecimal values are the intended ones for the following
1808 * constants. The decimal values may be used, provided that the
1809 * compiler will convert from decimal to binary accurately enough
1810 * to produce the hexadecimal values shown.
1811 *
1812 * Note: Assuming log() return accurate answer, the following
1813 * algorithm can be used to compute log1p(x) to within a few ULP:
1814 *
1815 * u = 1+x;
1816 * if(u==1.0) return x ; else
1817 * return log(u)*(x/(u-1.0));
1818 *
1819 * See HP-15C Advanced Functions Handbook, p.193.
1820 */
log1p(double x)1821 double log1p(double x) {
1822 static const double /* -- */
1823 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1824 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1825 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
1826 Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1827 Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1828 Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1829 Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1830 Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1831 Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1832 Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1833
1834 static const double zero = 0.0;
1835 static volatile double vzero = 0.0;
1836
1837 double hfsq, f, c, s, z, R, u;
1838 int32_t k, hx, hu, ax;
1839
1840 GET_HIGH_WORD(hx, x);
1841 ax = hx & 0x7fffffff;
1842
1843 k = 1;
1844 if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */
1845 if (ax >= 0x3ff00000) { /* x <= -1.0 */
1846 if (x == -1.0)
1847 return -two54 / vzero; /* log1p(-1)=+inf */
1848 else
1849 return (x - x) / (x - x); /* log1p(x<-1)=NaN */
1850 }
1851 if (ax < 0x3e200000) { /* |x| < 2**-29 */
1852 if (two54 + x > zero /* raise inexact */
1853 && ax < 0x3c900000) /* |x| < 2**-54 */
1854 return x;
1855 else
1856 return x - x * x * 0.5;
1857 }
1858 if (hx > 0 || hx <= static_cast<int32_t>(0xbfd2bec4)) {
1859 k = 0;
1860 f = x;
1861 hu = 1;
1862 } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1863 }
1864 if (hx >= 0x7ff00000) return x + x;
1865 if (k != 0) {
1866 if (hx < 0x43400000) {
1867 STRICT_ASSIGN(double, u, 1.0 + x);
1868 GET_HIGH_WORD(hu, u);
1869 k = (hu >> 20) - 1023;
1870 c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1871 c /= u;
1872 } else {
1873 u = x;
1874 GET_HIGH_WORD(hu, u);
1875 k = (hu >> 20) - 1023;
1876 c = 0;
1877 }
1878 hu &= 0x000fffff;
1879 /*
1880 * The approximation to sqrt(2) used in thresholds is not
1881 * critical. However, the ones used above must give less
1882 * strict bounds than the one here so that the k==0 case is
1883 * never reached from here, since here we have committed to
1884 * using the correction term but don't use it if k==0.
1885 */
1886 if (hu < 0x6a09e) { /* u ~< sqrt(2) */
1887 SET_HIGH_WORD(u, hu | 0x3ff00000); /* normalize u */
1888 } else {
1889 k += 1;
1890 SET_HIGH_WORD(u, hu | 0x3fe00000); /* normalize u/2 */
1891 hu = (0x00100000 - hu) >> 2;
1892 }
1893 f = u - 1.0;
1894 }
1895 hfsq = 0.5 * f * f;
1896 if (hu == 0) { /* |f| < 2**-20 */
1897 if (f == zero) {
1898 if (k == 0) {
1899 return zero;
1900 } else {
1901 c += k * ln2_lo;
1902 return k * ln2_hi + c;
1903 }
1904 }
1905 R = hfsq * (1.0 - 0.66666666666666666 * f);
1906 if (k == 0)
1907 return f - R;
1908 else
1909 return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1910 }
1911 s = f / (2.0 + f);
1912 z = s * s;
1913 R = z * (Lp1 +
1914 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1915 if (k == 0)
1916 return f - (hfsq - s * (hfsq + R));
1917 else
1918 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1919 }
1920
1921 /*
1922 * k_log1p(f):
1923 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1924 *
1925 * The following describes the overall strategy for computing
1926 * logarithms in base e. The argument reduction and adding the final
1927 * term of the polynomial are done by the caller for increased accuracy
1928 * when different bases are used.
1929 *
1930 * Method :
1931 * 1. Argument Reduction: find k and f such that
1932 * x = 2^k * (1+f),
1933 * where sqrt(2)/2 < 1+f < sqrt(2) .
1934 *
1935 * 2. Approximation of log(1+f).
1936 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1937 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1938 * = 2s + s*R
1939 * We use a special Reme algorithm on [0,0.1716] to generate
1940 * a polynomial of degree 14 to approximate R The maximum error
1941 * of this polynomial approximation is bounded by 2**-58.45. In
1942 * other words,
1943 * 2 4 6 8 10 12 14
1944 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
1945 * (the values of Lg1 to Lg7 are listed in the program)
1946 * and
1947 * | 2 14 | -58.45
1948 * | Lg1*s +...+Lg7*s - R(z) | <= 2
1949 * | |
1950 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1951 * In order to guarantee error in log below 1ulp, we compute log
1952 * by
1953 * log(1+f) = f - s*(f - R) (if f is not too large)
1954 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1955 *
1956 * 3. Finally, log(x) = k*ln2 + log(1+f).
1957 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1958 * Here ln2 is split into two floating point number:
1959 * ln2_hi + ln2_lo,
1960 * where n*ln2_hi is always exact for |n| < 2000.
1961 *
1962 * Special cases:
1963 * log(x) is NaN with signal if x < 0 (including -INF) ;
1964 * log(+INF) is +INF; log(0) is -INF with signal;
1965 * log(NaN) is that NaN with no signal.
1966 *
1967 * Accuracy:
1968 * according to an error analysis, the error is always less than
1969 * 1 ulp (unit in the last place).
1970 *
1971 * Constants:
1972 * The hexadecimal values are the intended ones for the following
1973 * constants. The decimal values may be used, provided that the
1974 * compiler will convert from decimal to binary accurately enough
1975 * to produce the hexadecimal values shown.
1976 */
1977
1978 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1979 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1980 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1981 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1982 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1983 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1984 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1985
1986 /*
1987 * We always inline k_log1p(), since doing so produces a
1988 * substantial performance improvement (~40% on amd64).
1989 */
k_log1p(double f)1990 static inline double k_log1p(double f) {
1991 double hfsq, s, z, R, w, t1, t2;
1992
1993 s = f / (2.0 + f);
1994 z = s * s;
1995 w = z * z;
1996 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1997 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1998 R = t2 + t1;
1999 hfsq = 0.5 * f * f;
2000 return s * (hfsq + R);
2001 }
2002
2003 /*
2004 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most
2005 * comments.
2006 *
2007 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
2008 * then does the combining and scaling steps
2009 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
2010 * in not-quite-routine extra precision.
2011 */
log2(double x)2012 double log2(double x) {
2013 static const double
2014 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2015 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
2016 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
2017
2018 static const double zero = 0.0;
2019 static volatile double vzero = 0.0;
2020
2021 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
2022 int32_t i, k, hx;
2023 uint32_t lx;
2024
2025 EXTRACT_WORDS(hx, lx, x);
2026
2027 k = 0;
2028 if (hx < 0x00100000) { /* x < 2**-1022 */
2029 if (((hx & 0x7fffffff) | lx) == 0)
2030 return -two54 / vzero; /* log(+-0)=-inf */
2031 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2032 k -= 54;
2033 x *= two54; /* subnormal number, scale up x */
2034 GET_HIGH_WORD(hx, x);
2035 }
2036 if (hx >= 0x7ff00000) return x + x;
2037 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
2038 k += (hx >> 20) - 1023;
2039 hx &= 0x000fffff;
2040 i = (hx + 0x95f64) & 0x100000;
2041 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
2042 k += (i >> 20);
2043 y = static_cast<double>(k);
2044 f = x - 1.0;
2045 hfsq = 0.5 * f * f;
2046 r = k_log1p(f);
2047
2048 /*
2049 * f-hfsq must (for args near 1) be evaluated in extra precision
2050 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
2051 * This is fairly efficient since f-hfsq only depends on f, so can
2052 * be evaluated in parallel with R. Not combining hfsq with R also
2053 * keeps R small (though not as small as a true `lo' term would be),
2054 * so that extra precision is not needed for terms involving R.
2055 *
2056 * Compiler bugs involving extra precision used to break Dekker's
2057 * theorem for spitting f-hfsq as hi+lo, unless double_t was used
2058 * or the multi-precision calculations were avoided when double_t
2059 * has extra precision. These problems are now automatically
2060 * avoided as a side effect of the optimization of combining the
2061 * Dekker splitting step with the clear-low-bits step.
2062 *
2063 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
2064 * precision to avoid a very large cancellation when x is very near
2065 * these values. Unlike the above cancellations, this problem is
2066 * specific to base 2. It is strange that adding +-1 is so much
2067 * harder than adding +-ln2 or +-log10_2.
2068 *
2069 * This uses Dekker's theorem to normalize y+val_hi, so the
2070 * compiler bugs are back in some configurations, sigh. And I
2071 * don't want to used double_t to avoid them, since that gives a
2072 * pessimization and the support for avoiding the pessimization
2073 * is not yet available.
2074 *
2075 * The multi-precision calculations for the multiplications are
2076 * routine.
2077 */
2078 hi = f - hfsq;
2079 SET_LOW_WORD(hi, 0);
2080 lo = (f - hi) - hfsq + r;
2081 val_hi = hi * ivln2hi;
2082 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
2083
2084 /* spadd(val_hi, val_lo, y), except for not using double_t: */
2085 w = y + val_hi;
2086 val_lo += (y - w) + val_hi;
2087 val_hi = w;
2088
2089 return val_lo + val_hi;
2090 }
2091
2092 /*
2093 * Return the base 10 logarithm of x
2094 *
2095 * Method :
2096 * Let log10_2hi = leading 40 bits of log10(2) and
2097 * log10_2lo = log10(2) - log10_2hi,
2098 * ivln10 = 1/log(10) rounded.
2099 * Then
2100 * n = ilogb(x),
2101 * if(n<0) n = n+1;
2102 * x = scalbn(x,-n);
2103 * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
2104 *
2105 * Note 1:
2106 * To guarantee log10(10**n)=n, where 10**n is normal, the rounding
2107 * mode must set to Round-to-Nearest.
2108 * Note 2:
2109 * [1/log(10)] rounded to 53 bits has error .198 ulps;
2110 * log10 is monotonic at all binary break points.
2111 *
2112 * Special cases:
2113 * log10(x) is NaN if x < 0;
2114 * log10(+INF) is +INF; log10(0) is -INF;
2115 * log10(NaN) is that NaN;
2116 * log10(10**N) = N for N=0,1,...,22.
2117 */
log10(double x)2118 double log10(double x) {
2119 static const double
2120 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2121 ivln10 = 4.34294481903251816668e-01,
2122 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
2123 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
2124
2125 static const double zero = 0.0;
2126 static volatile double vzero = 0.0;
2127
2128 double y;
2129 int32_t i, k, hx;
2130 uint32_t lx;
2131
2132 EXTRACT_WORDS(hx, lx, x);
2133
2134 k = 0;
2135 if (hx < 0x00100000) { /* x < 2**-1022 */
2136 if (((hx & 0x7fffffff) | lx) == 0)
2137 return -two54 / vzero; /* log(+-0)=-inf */
2138 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2139 k -= 54;
2140 x *= two54; /* subnormal number, scale up x */
2141 GET_HIGH_WORD(hx, x);
2142 GET_LOW_WORD(lx, x);
2143 }
2144 if (hx >= 0x7ff00000) return x + x;
2145 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
2146 k += (hx >> 20) - 1023;
2147
2148 i = (k & 0x80000000) >> 31;
2149 hx = (hx & 0x000fffff) | ((0x3ff - i) << 20);
2150 y = k + i;
2151 SET_HIGH_WORD(x, hx);
2152 SET_LOW_WORD(x, lx);
2153
2154 double z = y * log10_2lo + ivln10 * log(x);
2155 return z + y * log10_2hi;
2156 }
2157
2158 /* expm1(x)
2159 * Returns exp(x)-1, the exponential of x minus 1.
2160 *
2161 * Method
2162 * 1. Argument reduction:
2163 * Given x, find r and integer k such that
2164 *
2165 * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658
2166 *
2167 * Here a correction term c will be computed to compensate
2168 * the error in r when rounded to a floating-point number.
2169 *
2170 * 2. Approximating expm1(r) by a special rational function on
2171 * the interval [0,0.34658]:
2172 * Since
2173 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
2174 * we define R1(r*r) by
2175 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
2176 * That is,
2177 * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
2178 * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
2179 * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
2180 * We use a special Reme algorithm on [0,0.347] to generate
2181 * a polynomial of degree 5 in r*r to approximate R1. The
2182 * maximum error of this polynomial approximation is bounded
2183 * by 2**-61. In other words,
2184 * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
2185 * where Q1 = -1.6666666666666567384E-2,
2186 * Q2 = 3.9682539681370365873E-4,
2187 * Q3 = -9.9206344733435987357E-6,
2188 * Q4 = 2.5051361420808517002E-7,
2189 * Q5 = -6.2843505682382617102E-9;
2190 * z = r*r,
2191 * with error bounded by
2192 * | 5 | -61
2193 * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2
2194 * | |
2195 *
2196 * expm1(r) = exp(r)-1 is then computed by the following
2197 * specific way which minimize the accumulation rounding error:
2198 * 2 3
2199 * r r [ 3 - (R1 + R1*r/2) ]
2200 * expm1(r) = r + --- + --- * [--------------------]
2201 * 2 2 [ 6 - r*(3 - R1*r/2) ]
2202 *
2203 * To compensate the error in the argument reduction, we use
2204 * expm1(r+c) = expm1(r) + c + expm1(r)*c
2205 * ~ expm1(r) + c + r*c
2206 * Thus c+r*c will be added in as the correction terms for
2207 * expm1(r+c). Now rearrange the term to avoid optimization
2208 * screw up:
2209 * ( 2 2 )
2210 * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )
2211 * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
2212 * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 )
2213 * ( )
2214 *
2215 * = r - E
2216 * 3. Scale back to obtain expm1(x):
2217 * From step 1, we have
2218 * expm1(x) = either 2^k*[expm1(r)+1] - 1
2219 * = or 2^k*[expm1(r) + (1-2^-k)]
2220 * 4. Implementation notes:
2221 * (A). To save one multiplication, we scale the coefficient Qi
2222 * to Qi*2^i, and replace z by (x^2)/2.
2223 * (B). To achieve maximum accuracy, we compute expm1(x) by
2224 * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
2225 * (ii) if k=0, return r-E
2226 * (iii) if k=-1, return 0.5*(r-E)-0.5
2227 * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)
2228 * else return 1.0+2.0*(r-E);
2229 * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
2230 * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else
2231 * (vii) return 2^k(1-((E+2^-k)-r))
2232 *
2233 * Special cases:
2234 * expm1(INF) is INF, expm1(NaN) is NaN;
2235 * expm1(-INF) is -1, and
2236 * for finite argument, only expm1(0)=0 is exact.
2237 *
2238 * Accuracy:
2239 * according to an error analysis, the error is always less than
2240 * 1 ulp (unit in the last place).
2241 *
2242 * Misc. info.
2243 * For IEEE double
2244 * if x > 7.09782712893383973096e+02 then expm1(x) overflow
2245 *
2246 * Constants:
2247 * The hexadecimal values are the intended ones for the following
2248 * constants. The decimal values may be used, provided that the
2249 * compiler will convert from decimal to binary accurately enough
2250 * to produce the hexadecimal values shown.
2251 */
expm1(double x)2252 double expm1(double x) {
2253 static const double
2254 one = 1.0,
2255 tiny = 1.0e-300,
2256 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2257 ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
2258 ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
2259 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
2260 /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2261 x*x/2: */
2262 Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2263 Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
2264 Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2265 Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
2266 Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2267
2268 static volatile double huge = 1.0e+300;
2269
2270 double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2271 int32_t k, xsb;
2272 uint32_t hx;
2273
2274 GET_HIGH_WORD(hx, x);
2275 xsb = hx & 0x80000000; /* sign bit of x */
2276 hx &= 0x7fffffff; /* high word of |x| */
2277
2278 /* filter out huge and non-finite argument */
2279 if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2280 if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2281 if (hx >= 0x7ff00000) {
2282 uint32_t low;
2283 GET_LOW_WORD(low, x);
2284 if (((hx & 0xfffff) | low) != 0)
2285 return x + x; /* NaN */
2286 else
2287 return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2288 }
2289 if (x > o_threshold) return huge * huge; /* overflow */
2290 }
2291 if (xsb != 0) { /* x < -56*ln2, return -1.0 with inexact */
2292 if (x + tiny < 0.0) /* raise inexact */
2293 return tiny - one; /* return -1 */
2294 }
2295 }
2296
2297 /* argument reduction */
2298 if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2299 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2300 if (xsb == 0) {
2301 hi = x - ln2_hi;
2302 lo = ln2_lo;
2303 k = 1;
2304 } else {
2305 hi = x + ln2_hi;
2306 lo = -ln2_lo;
2307 k = -1;
2308 }
2309 } else {
2310 k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2311 t = k;
2312 hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2313 lo = t * ln2_lo;
2314 }
2315 STRICT_ASSIGN(double, x, hi - lo);
2316 c = (hi - x) - lo;
2317 } else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */
2318 t = huge + x; /* return x with inexact flags when x!=0 */
2319 return x - (t - (huge + x));
2320 } else {
2321 k = 0;
2322 }
2323
2324 /* x is now in primary range */
2325 hfx = 0.5 * x;
2326 hxs = x * hfx;
2327 r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2328 t = 3.0 - r1 * hfx;
2329 e = hxs * ((r1 - t) / (6.0 - x * t));
2330 if (k == 0) {
2331 return x - (x * e - hxs); /* c is 0 */
2332 } else {
2333 INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */
2334 e = (x * (e - c) - c);
2335 e -= hxs;
2336 if (k == -1) return 0.5 * (x - e) - 0.5;
2337 if (k == 1) {
2338 if (x < -0.25)
2339 return -2.0 * (e - (x + 0.5));
2340 else
2341 return one + 2.0 * (x - e);
2342 }
2343 if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2344 y = one - (e - x);
2345 // TODO(mvstanton): is this replacement for the hex float
2346 // sufficient?
2347 // if (k == 1024) y = y*2.0*0x1p1023;
2348 if (k == 1024)
2349 y = y * 2.0 * 8.98846567431158e+307;
2350 else
2351 y = y * twopk;
2352 return y - one;
2353 }
2354 t = one;
2355 if (k < 20) {
2356 SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
2357 y = t - (e - x);
2358 y = y * twopk;
2359 } else {
2360 SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */
2361 y = x - (e + t);
2362 y += one;
2363 y = y * twopk;
2364 }
2365 }
2366 return y;
2367 }
2368
cbrt(double x)2369 double cbrt(double x) {
2370 static const uint32_t
2371 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2372 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2373
2374 /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2375 static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
2376 P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */
2377 P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */
2378 P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */
2379 P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
2380
2381 int32_t hx;
2382 union {
2383 double value;
2384 uint64_t bits;
2385 } u;
2386 double r, s, t = 0.0, w;
2387 uint32_t sign;
2388 uint32_t high, low;
2389
2390 EXTRACT_WORDS(hx, low, x);
2391 sign = hx & 0x80000000; /* sign= sign(x) */
2392 hx ^= sign;
2393 if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */
2394
2395 /*
2396 * Rough cbrt to 5 bits:
2397 * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2398 * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2399 * "%" are integer division and modulus with rounding towards minus
2400 * infinity. The RHS is always >= the LHS and has a maximum relative
2401 * error of about 1 in 16. Adding a bias of -0.03306235651 to the
2402 * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2403 * floating point representation, for finite positive normal values,
2404 * ordinary integer divison of the value in bits magically gives
2405 * almost exactly the RHS of the above provided we first subtract the
2406 * exponent bias (1023 for doubles) and later add it back. We do the
2407 * subtraction virtually to keep e >= 0 so that ordinary integer
2408 * division rounds towards minus infinity; this is also efficient.
2409 */
2410 if (hx < 0x00100000) { /* zero or subnormal? */
2411 if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2412 SET_HIGH_WORD(t, 0x43500000); /* set t= 2**54 */
2413 t *= x;
2414 GET_HIGH_WORD(high, t);
2415 INSERT_WORDS(t, sign | ((high & 0x7fffffff) / 3 + B2), 0);
2416 } else {
2417 INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2418 }
2419
2420 /*
2421 * New cbrt to 23 bits:
2422 * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2423 * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2424 * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation
2425 * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2426 * gives us bounds for r = t**3/x.
2427 *
2428 * Try to optimize for parallel evaluation as in k_tanf.c.
2429 */
2430 r = (t * t) * (t / x);
2431 t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2432
2433 /*
2434 * Round t away from zero to 23 bits (sloppily except for ensuring that
2435 * the result is larger in magnitude than cbrt(x) but not much more than
2436 * 2 23-bit ulps larger). With rounding towards zero, the error bound
2437 * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
2438 * in the rounded t, the infinite-precision error in the Newton
2439 * approximation barely affects third digit in the final error
2440 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2441 * before the final error is larger than 0.667 ulps.
2442 */
2443 u.value = t;
2444 u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
2445 t = u.value;
2446
2447 /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2448 s = t * t; /* t*t is exact */
2449 r = x / s; /* error <= 0.5 ulps; |r| < |t| */
2450 w = t + t; /* t+t is exact */
2451 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2452 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */
2453
2454 return (t);
2455 }
2456
2457 /* sin(x)
2458 * Return sine function of x.
2459 *
2460 * kernel function:
2461 * __kernel_sin ... sine function on [-pi/4,pi/4]
2462 * __kernel_cos ... cose function on [-pi/4,pi/4]
2463 * __ieee754_rem_pio2 ... argument reduction routine
2464 *
2465 * Method.
2466 * Let S,C and T denote the sin, cos and tan respectively on
2467 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2468 * in [-pi/4 , +pi/4], and let n = k mod 4.
2469 * We have
2470 *
2471 * n sin(x) cos(x) tan(x)
2472 * ----------------------------------------------------------
2473 * 0 S C T
2474 * 1 C -S -1/T
2475 * 2 -S -C T
2476 * 3 -C S -1/T
2477 * ----------------------------------------------------------
2478 *
2479 * Special cases:
2480 * Let trig be any of sin, cos, or tan.
2481 * trig(+-INF) is NaN, with signals;
2482 * trig(NaN) is that NaN;
2483 *
2484 * Accuracy:
2485 * TRIG(x) returns trig(x) nearly rounded
2486 */
sin(double x)2487 double sin(double x) {
2488 double y[2], z = 0.0;
2489 int32_t n, ix;
2490
2491 /* High word of x. */
2492 GET_HIGH_WORD(ix, x);
2493
2494 /* |x| ~< pi/4 */
2495 ix &= 0x7fffffff;
2496 if (ix <= 0x3fe921fb) {
2497 return __kernel_sin(x, z, 0);
2498 } else if (ix >= 0x7ff00000) {
2499 /* sin(Inf or NaN) is NaN */
2500 return x - x;
2501 } else {
2502 /* argument reduction needed */
2503 n = __ieee754_rem_pio2(x, y);
2504 switch (n & 3) {
2505 case 0:
2506 return __kernel_sin(y[0], y[1], 1);
2507 case 1:
2508 return __kernel_cos(y[0], y[1]);
2509 case 2:
2510 return -__kernel_sin(y[0], y[1], 1);
2511 default:
2512 return -__kernel_cos(y[0], y[1]);
2513 }
2514 }
2515 }
2516
2517 /* tan(x)
2518 * Return tangent function of x.
2519 *
2520 * kernel function:
2521 * __kernel_tan ... tangent function on [-pi/4,pi/4]
2522 * __ieee754_rem_pio2 ... argument reduction routine
2523 *
2524 * Method.
2525 * Let S,C and T denote the sin, cos and tan respectively on
2526 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2527 * in [-pi/4 , +pi/4], and let n = k mod 4.
2528 * We have
2529 *
2530 * n sin(x) cos(x) tan(x)
2531 * ----------------------------------------------------------
2532 * 0 S C T
2533 * 1 C -S -1/T
2534 * 2 -S -C T
2535 * 3 -C S -1/T
2536 * ----------------------------------------------------------
2537 *
2538 * Special cases:
2539 * Let trig be any of sin, cos, or tan.
2540 * trig(+-INF) is NaN, with signals;
2541 * trig(NaN) is that NaN;
2542 *
2543 * Accuracy:
2544 * TRIG(x) returns trig(x) nearly rounded
2545 */
tan(double x)2546 double tan(double x) {
2547 double y[2], z = 0.0;
2548 int32_t n, ix;
2549
2550 /* High word of x. */
2551 GET_HIGH_WORD(ix, x);
2552
2553 /* |x| ~< pi/4 */
2554 ix &= 0x7fffffff;
2555 if (ix <= 0x3fe921fb) {
2556 return __kernel_tan(x, z, 1);
2557 } else if (ix >= 0x7ff00000) {
2558 /* tan(Inf or NaN) is NaN */
2559 return x - x; /* NaN */
2560 } else {
2561 /* argument reduction needed */
2562 n = __ieee754_rem_pio2(x, y);
2563 /* 1 -> n even, -1 -> n odd */
2564 return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2565 }
2566 }
2567
2568 /*
2569 * ES6 draft 09-27-13, section 20.2.2.12.
2570 * Math.cosh
2571 * Method :
2572 * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2
2573 * 1. Replace x by |x| (cosh(x) = cosh(-x)).
2574 * 2.
2575 * [ exp(x) - 1 ]^2
2576 * 0 <= x <= ln2/2 : cosh(x) := 1 + -------------------
2577 * 2*exp(x)
2578 *
2579 * exp(x) + 1/exp(x)
2580 * ln2/2 <= x <= 22 : cosh(x) := -------------------
2581 * 2
2582 * 22 <= x <= lnovft : cosh(x) := exp(x)/2
2583 * lnovft <= x <= ln2ovft: cosh(x) := exp(x/2)/2 * exp(x/2)
2584 * ln2ovft < x : cosh(x) := huge*huge (overflow)
2585 *
2586 * Special cases:
2587 * cosh(x) is |x| if x is +INF, -INF, or NaN.
2588 * only cosh(0)=1 is exact for finite x.
2589 */
cosh(double x)2590 double cosh(double x) {
2591 static const double KCOSH_OVERFLOW = 710.4758600739439;
2592 static const double one = 1.0, half = 0.5;
2593 static volatile double huge = 1.0e+300;
2594
2595 int32_t ix;
2596
2597 /* High word of |x|. */
2598 GET_HIGH_WORD(ix, x);
2599 ix &= 0x7fffffff;
2600
2601 // |x| in [0,0.5*log2], return 1+expm1(|x|)^2/(2*exp(|x|))
2602 if (ix < 0x3fd62e43) {
2603 double t = expm1(fabs(x));
2604 double w = one + t;
2605 // For |x| < 2^-55, cosh(x) = 1
2606 if (ix < 0x3c800000) return w;
2607 return one + (t * t) / (w + w);
2608 }
2609
2610 // |x| in [0.5*log2, 22], return (exp(|x|)+1/exp(|x|)/2
2611 if (ix < 0x40360000) {
2612 double t = exp(fabs(x));
2613 return half * t + half / t;
2614 }
2615
2616 // |x| in [22, log(maxdouble)], return half*exp(|x|)
2617 if (ix < 0x40862e42) return half * exp(fabs(x));
2618
2619 // |x| in [log(maxdouble), overflowthreshold]
2620 if (fabs(x) <= KCOSH_OVERFLOW) {
2621 double w = exp(half * fabs(x));
2622 double t = half * w;
2623 return t * w;
2624 }
2625
2626 /* x is INF or NaN */
2627 if (ix >= 0x7ff00000) return x * x;
2628
2629 // |x| > overflowthreshold.
2630 return huge * huge;
2631 }
2632
2633 /*
2634 * ES6 draft 09-27-13, section 20.2.2.30.
2635 * Math.sinh
2636 * Method :
2637 * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2
2638 * 1. Replace x by |x| (sinh(-x) = -sinh(x)).
2639 * 2.
2640 * E + E/(E+1)
2641 * 0 <= x <= 22 : sinh(x) := --------------, E=expm1(x)
2642 * 2
2643 *
2644 * 22 <= x <= lnovft : sinh(x) := exp(x)/2
2645 * lnovft <= x <= ln2ovft: sinh(x) := exp(x/2)/2 * exp(x/2)
2646 * ln2ovft < x : sinh(x) := x*shuge (overflow)
2647 *
2648 * Special cases:
2649 * sinh(x) is |x| if x is +Infinity, -Infinity, or NaN.
2650 * only sinh(0)=0 is exact for finite x.
2651 */
sinh(double x)2652 double sinh(double x) {
2653 static const double KSINH_OVERFLOW = 710.4758600739439,
2654 TWO_M28 =
2655 3.725290298461914e-9, // 2^-28, empty lower half
2656 LOG_MAXD = 709.7822265625; // 0x40862e42 00000000, empty lower half
2657 static const double shuge = 1.0e307;
2658
2659 double h = (x < 0) ? -0.5 : 0.5;
2660 // |x| in [0, 22]. return sign(x)*0.5*(E+E/(E+1))
2661 double ax = fabs(x);
2662 if (ax < 22) {
2663 // For |x| < 2^-28, sinh(x) = x
2664 if (ax < TWO_M28) return x;
2665 double t = expm1(ax);
2666 if (ax < 1) {
2667 return h * (2 * t - t * t / (t + 1));
2668 }
2669 return h * (t + t / (t + 1));
2670 }
2671 // |x| in [22, log(maxdouble)], return 0.5 * exp(|x|)
2672 if (ax < LOG_MAXD) return h * exp(ax);
2673 // |x| in [log(maxdouble), overflowthreshold]
2674 // overflowthreshold = 710.4758600739426
2675 if (ax <= KSINH_OVERFLOW) {
2676 double w = exp(0.5 * ax);
2677 double t = h * w;
2678 return t * w;
2679 }
2680 // |x| > overflowthreshold or is NaN.
2681 // Return Infinity of the appropriate sign or NaN.
2682 return x * shuge;
2683 }
2684
2685 /* Tanh(x)
2686 * Return the Hyperbolic Tangent of x
2687 *
2688 * Method :
2689 * x -x
2690 * e - e
2691 * 0. tanh(x) is defined to be -----------
2692 * x -x
2693 * e + e
2694 * 1. reduce x to non-negative by tanh(-x) = -tanh(x).
2695 * 2. 0 <= x < 2**-28 : tanh(x) := x with inexact if x != 0
2696 * -t
2697 * 2**-28 <= x < 1 : tanh(x) := -----; t = expm1(-2x)
2698 * t + 2
2699 * 2
2700 * 1 <= x < 22 : tanh(x) := 1 - -----; t = expm1(2x)
2701 * t + 2
2702 * 22 <= x <= INF : tanh(x) := 1.
2703 *
2704 * Special cases:
2705 * tanh(NaN) is NaN;
2706 * only tanh(0)=0 is exact for finite argument.
2707 */
tanh(double x)2708 double tanh(double x) {
2709 static const volatile double tiny = 1.0e-300;
2710 static const double one = 1.0, two = 2.0, huge = 1.0e300;
2711 double t, z;
2712 int32_t jx, ix;
2713
2714 GET_HIGH_WORD(jx, x);
2715 ix = jx & 0x7fffffff;
2716
2717 /* x is INF or NaN */
2718 if (ix >= 0x7ff00000) {
2719 if (jx >= 0)
2720 return one / x + one; /* tanh(+-inf)=+-1 */
2721 else
2722 return one / x - one; /* tanh(NaN) = NaN */
2723 }
2724
2725 /* |x| < 22 */
2726 if (ix < 0x40360000) { /* |x|<22 */
2727 if (ix < 0x3e300000) { /* |x|<2**-28 */
2728 if (huge + x > one) return x; /* tanh(tiny) = tiny with inexact */
2729 }
2730 if (ix >= 0x3ff00000) { /* |x|>=1 */
2731 t = expm1(two * fabs(x));
2732 z = one - two / (t + two);
2733 } else {
2734 t = expm1(-two * fabs(x));
2735 z = -t / (t + two);
2736 }
2737 /* |x| >= 22, return +-1 */
2738 } else {
2739 z = one - tiny; /* raise inexact flag */
2740 }
2741 return (jx >= 0) ? z : -z;
2742 }
2743
2744 } // namespace ieee754
2745 } // namespace base
2746 } // namespace v8
2747