1 /*
2 * e_rem_pio2.c
3 *
4 * Copyright (c) 1999-2018, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include <math.h>
9 #include "math_private.h"
10
__ieee754_rem_pio2(double x,double * y)11 int __ieee754_rem_pio2(double x, double *y) {
12 int q;
13
14 y[1] = 0.0; /* default */
15
16 /*
17 * Simple cases: all nicked from the fdlibm version for speed.
18 */
19 {
20 static const double invpio2 = 0x1.45f306dc9c883p-1;
21 static const double pio2s[] = {
22 0x1.921fb544p+0, /* 1.57079632673412561417e+00 */
23 0x1.0b4611a626331p-34, /* 6.07710050650619224932e-11 */
24 0x1.0b4611a6p-34, /* 6.07710050630396597660e-11 */
25 0x1.3198a2e037073p-69, /* 2.02226624879595063154e-21 */
26 0x1.3198a2ep-69, /* 2.02226624871116645580e-21 */
27 0x1.b839a252049c1p-104, /* 8.47842766036889956997e-32 */
28 };
29
30 double z,w,t,r,fn;
31 int i,j,idx,n,ix,hx;
32
33 hx = __HI(x); /* high word of x */
34 ix = hx&0x7fffffff;
35 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
36 {y[0] = x; y[1] = 0; return 0;}
37 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
38 if(hx>0) {
39 z = x - pio2s[0];
40 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
41 y[0] = z - pio2s[1];
42 #ifdef bottomhalf
43 y[1] = (z-y[0])-pio2s[1];
44 #endif
45 } else { /* near pi/2, use 33+33+53 bit pi */
46 z -= pio2s[2];
47 y[0] = z - pio2s[3];
48 #ifdef bottomhalf
49 y[1] = (z-y[0])-pio2s[3];
50 #endif
51 }
52 return 1;
53 } else { /* negative x */
54 z = x + pio2s[0];
55 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
56 y[0] = z + pio2s[1];
57 #ifdef bottomhalf
58 y[1] = (z-y[0])+pio2s[1];
59 #endif
60 } else { /* near pi/2, use 33+33+53 bit pi */
61 z += pio2s[2];
62 y[0] = z + pio2s[3];
63 #ifdef bottomhalf
64 y[1] = (z-y[0])+pio2s[3];
65 #endif
66 }
67 return -1;
68 }
69 }
70 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
71 t = fabs(x);
72 n = (int) (t*invpio2+0.5);
73 fn = (double)n;
74 r = t-fn*pio2s[0];
75 w = fn*pio2s[1]; /* 1st round good to 85 bit */
76 j = ix>>20;
77 idx = 1;
78 while (1) {
79 y[0] = r-w;
80 if (idx == 3)
81 break;
82 i = j-(((__HI(y[0]))>>20)&0x7ff);
83 if(i <= 33*idx-17)
84 break;
85 t = r;
86 w = fn*pio2s[2*idx];
87 r = t-w;
88 w = fn*pio2s[2*idx+1]-((t-r)-w);
89 idx++;
90 }
91 #ifdef bottomhalf
92 y[1] = (r-y[0])-w;
93 #endif
94 if(hx<0) {
95 y[0] = -y[0];
96 #ifdef bottomhalf
97 y[1] = -y[1];
98 #endif
99 return -n;
100 } else {
101 return n;
102 }
103 }
104 }
105
106 {
107 static const unsigned twooverpi[] = {
108 /* We start with two zero words, because they take up less
109 * space than the array bounds checking and special-case
110 * handling that would have to occur in their absence. */
111 0, 0,
112 /* 2/pi in hex is 0.a2f9836e... */
113 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599,
114 0x3c439041, 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0,
115 0x06492eea, 0x09d1921c, 0xfe1deb1c, 0xb129a73e, 0xe88235f5,
116 0x2ebb4484, 0xe99c7026, 0xb45f7e41, 0x3991d639, 0x835339f4,
117 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f, 0xef2f118b,
118 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
119 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1,
120 0x1f8d5d08,
121 };
122
123 /*
124 * Multiprecision multiplication of this nature is more
125 * readily done in integers than in VFP, since we can use
126 * UMULL (on CPUs that support it) to multiply 32 by 32 bits
127 * at a time whereas the VFP would only be able to do 12x12
128 * without losing accuracy.
129 *
130 * So extract the mantissa of the input number as two 32-bit
131 * integers.
132 */
133 unsigned mant1 = 0x00100000 | (__HI(x) & 0xFFFFF);
134 unsigned mant2 = __LO(x);
135 /*
136 * Now work out which part of our stored value of 2/pi we're
137 * supposed to be multiplying by.
138 *
139 * Let the IEEE exponent field of x be e. With its bias
140 * removed, (e-1023) is the index of the set bit in bit 20
141 * of 'mant1' (i.e. that set bit has real place value
142 * 2^(e-1023)). So the lowest set bit in 'mant1', 52 bits
143 * further down, must have place value 2^(e-1075).
144 *
145 * We begin taking an interest in the value of 2/pi at the
146 * bit which multiplies by _that_ to give something with
147 * place value at most 2. In other words, the highest bit of
148 * 2/pi we're interested in is the one with place value
149 * 2/(2^(e-1075)) = 2^(1076-e).
150 *
151 * The bit at the top of the first zero word of the above
152 * array has place value 2^63. Hence, the bit we want to put
153 * at the top of the first word we extract from that array
154 * is the one at bit index n, where 63-n = 1076-e and hence
155 * n=e-1013.
156 */
157 int topbitindex = ((__HI(x) >> 20) & 0x7FF) - 1013;
158 int wordindex = topbitindex >> 5;
159 int shiftup = topbitindex & 31;
160 int shiftdown = 32 - shiftup;
161 unsigned scaled[8];
162 int i;
163
164 scaled[7] = scaled[6] = 0;
165
166 for (i = 6; i-- > 0 ;) {
167 /*
168 * Extract a word from our representation of 2/pi.
169 */
170 unsigned word;
171 if (shiftup)
172 word = (twooverpi[wordindex + i] << shiftup) | (twooverpi[wordindex + i + 1] >> shiftdown);
173 else
174 word = twooverpi[wordindex + i];
175 /*
176 * Multiply it by both words of our mantissa. (Should
177 * generate UMULLs where available.)
178 */
179 unsigned long long mult1 = (unsigned long long)word * mant1;
180 unsigned long long mult2 = (unsigned long long)word * mant2;
181 /*
182 * Split those up into 32-bit chunks.
183 */
184 unsigned bottom1 = (unsigned)mult1, top1 = (unsigned)(mult1 >> 32);
185 unsigned bottom2 = (unsigned)mult2, top2 = (unsigned)(mult2 >> 32);
186 /*
187 * Add those two chunks together.
188 */
189 unsigned out1, out2, out3;
190
191 out3 = bottom2;
192 out2 = top2 + bottom1;
193 out1 = top1 + (out2 < top2);
194 /*
195 * And finally add them to our 'scaled' array.
196 */
197 unsigned s3 = scaled[i+2], s2 = scaled[i+1], s1;
198 unsigned carry;
199 s3 = out3 + s3; carry = (s3 < out3);
200 s2 = out2 + s2 + carry; carry = carry ? (s2 <= out2) : (s2 < out2);
201 s1 = out1 + carry;
202
203 scaled[i+2] = s3;
204 scaled[i+1] = s2;
205 scaled[i] = s1;
206 }
207
208
209 /*
210 * The topmost set bit in mant1 is bit 20, and that has
211 * place value 2^(e-1023). The topmost bit (bit 31) of the
212 * most significant word we extracted from our twooverpi
213 * array had place value 2^(1076-e). So the product of those
214 * two bits must have place value 2^53; and that bit will
215 * have ended up as bit 19 of scaled[0]. Hence, the integer
216 * quadrant value we want to output, consisting of the bits
217 * with place values 2^1 and 2^0, must be 52 and 53 bits
218 * below that, i.e. precisely the topmost two bits of
219 * scaled[2].
220 *
221 * Or, at least, it will be once we add 1/2, to round to the
222 * _nearest_ multiple of pi/2 rather than the next one down.
223 */
224 q = (scaled[2] + (1<<29)) >> 30;
225
226 /*
227 * Now we construct the output fraction, which is most
228 * simply done in the VFP. We just extract four consecutive
229 * bit strings from our chunk of binary data, convert them
230 * to doubles, equip each with an appropriate FP exponent,
231 * add them together, and (don't forget) multiply back up by
232 * pi/2. That way we don't have to work out ourselves where
233 * the highest fraction bit ended up.
234 *
235 * Since our displacement from the nearest multiple of pi/2
236 * can be positive or negative, the topmost of these four
237 * values must be arranged with its 2^-1 bit at the very top
238 * of the word, and then treated as a _signed_ integer.
239 */
240 int i1 = (scaled[2] << 2);
241 unsigned i2 = scaled[3];
242 unsigned i3 = scaled[4];
243 unsigned i4 = scaled[5];
244 double f1 = i1, f2 = i2 * (0x1.0p-30), f3 = i3 * (0x1.0p-62), f4 = i4 * (0x1.0p-94);
245
246 /*
247 * Now f1+f2+f3+f4 is a representation, potentially to
248 * twice double precision, of 2^32 times ((2/pi)*x minus
249 * some integer). So our remaining job is to multiply
250 * back down by (pi/2)*2^-32, and convert back to one
251 * single-precision output number.
252 */
253 double ftop = f1 + (f2 + (f3 + f4));
254 ftop = __SET_LO(ftop, 0);
255 double fbot = f4 - (((ftop-f1)-f2)-f3);
256
257 /* ... and multiply by a prec-and-a-half value of (pi/2)*2^-32. */
258 double ret = (ftop * 0x1.921fb54p-32) + (ftop * 0x1.10b4611a62633p-62 + fbot * 0x1.921fb54442d18p-32);
259
260 /* Just before we return, take the input sign into account. */
261 if (__HI(x) & 0x80000000) {
262 q = -q;
263 ret = -ret;
264 }
265 y[0] = ret;
266 return q;
267 }
268 }
269