1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32//                     ALGORITHM DESCRIPTION
33//                     ---------------------
34//
35// Description:
36//  Let K = 64 (table size).
37//
38//  Four sub-domains:
39//    1. |x| < 1/(2*K)
40//      expm1(x) ~ P(x)
41//    2. 1/(2*K) <= |x| <= 56*log(2)
42//       x       x/log(2)    n
43//      e - 1 = 2         = 2 * T[j] * (1 + P(y)) - 1
44//    3. 56*log(2) < x < MAX_LOG
45//       x       x   x/log(2)    n
46//      e - 1 ~ e = 2         = 2 * T[j] * (1 + P(y))
47//    4. x < -56*log(2)
48//       x            x
49//      e - 1 = -1 + e ~ -1
50//    where
51//       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
52//       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
53//                  j/K
54//       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
55//
56//       P(y) is a minimax polynomial approximation of exp(x)-1
57//       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
58//
59//    In case 3, to avoid problems with arithmetic overflow and underflow,
60//              n                        n1  n2
61//    value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
62//    and BIAS is a value of exponent bias.
63//
64// Special cases:
65//  expm1(NaN) is NaN
66//  expm1(+INF) is +INF
67//  expm1(-INF) is -1
68//  expm1(x) is x for subnormals
69//  for finite argument, only expm1(0)=0 is exact.
70//  For IEEE double
71//    if x > 709.782712893383973096 then expm1(x) overflow
72//
73/******************************************************************************/
74
75#include <private/bionic_asm.h>
76# -- Begin  static_func
77        .text
78        .align __bionic_asm_align
79        .type static_func, @function
80static_func:
81..B1.1:
82        call      ..L2
83..L2:
84        popl      %eax
85        lea       _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
86        lea       static_const_table@GOTOFF(%eax), %eax
87        ret
88        .size   static_func,.-static_func
89# -- End  static_func
90
91# -- Begin  expm1
92ENTRY(expm1)
93# parameter 1: 8 + %ebp
94..B2.1:
95..B2.2:
96        pushl     %ebp
97        movl      %esp, %ebp
98        subl      $120, %esp
99        movl      %ebx, 64(%esp)
100        call      static_func
101        movl      %eax, %ebx
102        movsd     128(%esp), %xmm0
103        unpcklpd  %xmm0, %xmm0
104        movapd    64(%ebx), %xmm1
105        movapd    48(%ebx), %xmm6
106        movapd    80(%ebx), %xmm2
107        movapd    96(%ebx), %xmm3
108        pextrw    $3, %xmm0, %eax
109        andl      $32767, %eax
110        movl      $16527, %edx
111        subl      %eax, %edx
112        subl      $16304, %eax
113        orl       %eax, %edx
114        cmpl      $-2147483648, %edx
115        jae       .L_2TAG_PACKET_0.0.2
116        mulpd     %xmm0, %xmm1
117        addpd     %xmm6, %xmm1
118        movapd    %xmm1, %xmm7
119        subpd     %xmm6, %xmm1
120        mulpd     %xmm1, %xmm2
121        movapd    112(%ebx), %xmm4
122        mulpd     %xmm1, %xmm3
123        movapd    128(%ebx), %xmm5
124        subpd     %xmm2, %xmm0
125        movd      %xmm7, %eax
126        movl      %eax, %ecx
127        andl      $63, %ecx
128        shll      $4, %ecx
129        sarl      $6, %eax
130        movl      %eax, %edx
131        subpd     %xmm3, %xmm0
132        movapd    160(%ebx,%ecx), %xmm2
133        movsd     144(%ebx), %xmm3
134        mulpd     %xmm0, %xmm4
135        movapd    %xmm0, %xmm1
136        mulpd     %xmm0, %xmm0
137        mulsd     %xmm0, %xmm3
138        addpd     %xmm4, %xmm5
139        mulsd     %xmm0, %xmm0
140        movapd    %xmm2, %xmm4
141        unpckhpd  %xmm2, %xmm2
142        movdqa    16(%ebx), %xmm6
143        pand      %xmm6, %xmm7
144        movdqa    32(%ebx), %xmm6
145        paddq     %xmm6, %xmm7
146        psllq     $46, %xmm7
147        mulsd     %xmm0, %xmm3
148        mulpd     %xmm5, %xmm0
149        addl      $894, %edx
150        cmpl      $1916, %edx
151        ja        .L_2TAG_PACKET_1.0.2
152        addsd     %xmm3, %xmm0
153        xorpd     %xmm3, %xmm3
154        movl      $16368, %eax
155        pinsrw    $3, %eax, %xmm3
156        orpd      %xmm7, %xmm2
157        mulsd     %xmm4, %xmm7
158        movapd    %xmm3, %xmm6
159        addsd     %xmm1, %xmm3
160        pextrw    $3, %xmm2, %edx
161        pshufd    $238, %xmm0, %xmm5
162        psrlq     $38, %xmm3
163        psllq     $38, %xmm3
164        movapd    %xmm2, %xmm4
165        subsd     %xmm3, %xmm6
166        addsd     %xmm5, %xmm0
167        addsd     %xmm6, %xmm1
168        addsd     %xmm7, %xmm4
169        mulsd     %xmm3, %xmm7
170        mulsd     %xmm2, %xmm3
171        xorpd     %xmm5, %xmm5
172        movl      $16368, %eax
173        pinsrw    $3, %eax, %xmm5
174        addsd     %xmm1, %xmm0
175        movl      $17184, %ecx
176        subl      %edx, %ecx
177        subl      $16256, %edx
178        orl       %edx, %ecx
179        jl        .L_2TAG_PACKET_2.0.2
180        mulsd     %xmm4, %xmm0
181        subsd     %xmm5, %xmm3
182        addsd     %xmm7, %xmm0
183        addsd     %xmm3, %xmm0
184.L_2TAG_PACKET_3.0.2:
185        jmp       .L_2TAG_PACKET_4.0.2
186.L_2TAG_PACKET_2.0.2:
187        cmpl      $0, %edx
188        jl        .L_2TAG_PACKET_5.0.2
189        mulsd     %xmm4, %xmm0
190        subsd     %xmm5, %xmm7
191        addsd     %xmm7, %xmm0
192        addsd     %xmm3, %xmm0
193        jmp       .L_2TAG_PACKET_3.0.2
194.L_2TAG_PACKET_5.0.2:
195        mulsd     %xmm4, %xmm0
196        addsd     %xmm7, %xmm0
197        addsd     %xmm3, %xmm0
198        subsd     %xmm5, %xmm0
199        jmp       .L_2TAG_PACKET_3.0.2
200.L_2TAG_PACKET_1.0.2:
201        movl      132(%esp), %ecx
202        addsd     %xmm0, %xmm1
203        unpckhpd  %xmm0, %xmm0
204        addsd     %xmm1, %xmm0
205        cmpl      $0, %ecx
206        jl        .L_2TAG_PACKET_6.0.2
207        fstcw     24(%esp)
208        movzwl    24(%esp), %edx
209        orl       $768, %edx
210        movw      %dx, 28(%esp)
211        fldcw     28(%esp)
212        movl      %eax, %edx
213        sarl      $1, %eax
214        subl      %eax, %edx
215        movdqa    (%ebx), %xmm6
216        pandn     %xmm2, %xmm6
217        addl      $1023, %eax
218        movd      %eax, %xmm3
219        psllq     $52, %xmm3
220        orpd      %xmm3, %xmm6
221        mulsd     %xmm3, %xmm4
222        movsd     %xmm0, 8(%esp)
223        fldl      8(%esp)
224        movsd     %xmm6, 16(%esp)
225        fldl      16(%esp)
226        movsd     %xmm4, 16(%esp)
227        fldl      16(%esp)
228        addl      $1023, %edx
229        movd      %edx, %xmm4
230        psllq     $52, %xmm4
231        faddp     %st, %st(1)
232        fmul      %st, %st(1)
233        faddp     %st, %st(1)
234        movsd     %xmm4, 8(%esp)
235        fldl      8(%esp)
236        fmulp     %st, %st(1)
237        fstpl     8(%esp)
238        movsd     8(%esp), %xmm0
239        fldcw     24(%esp)
240        pextrw    $3, %xmm0, %ecx
241        andl      $32752, %ecx
242        cmpl      $32752, %ecx
243        jae       .L_2TAG_PACKET_7.0.2
244        jmp       .L_2TAG_PACKET_4.0.2
245        cmpl      $-2147483648, %ecx
246        jb        .L_2TAG_PACKET_7.0.2
247        jmp       .L_2TAG_PACKET_4.0.2
248.L_2TAG_PACKET_7.0.2:
249        movl      $41, %edx
250.L_2TAG_PACKET_8.0.2:
251        movsd     %xmm0, (%esp)
252        movsd     128(%esp), %xmm0
253        fldl      (%esp)
254        jmp       .L_2TAG_PACKET_9.0.2
255.L_2TAG_PACKET_10.0.2:
256        cmpl      $2146435072, %eax
257        jae       .L_2TAG_PACKET_11.0.2
258        movsd     1272(%ebx), %xmm0
259        mulsd     %xmm0, %xmm0
260        movl      $41, %edx
261        jmp       .L_2TAG_PACKET_8.0.2
262.L_2TAG_PACKET_11.0.2:
263        movl      132(%esp), %eax
264        movl      128(%esp), %edx
265        movl      %eax, %ecx
266        andl      $2147483647, %eax
267        cmpl      $2146435072, %eax
268        ja        .L_2TAG_PACKET_12.0.2
269        cmpl      $0, %edx
270        jne       .L_2TAG_PACKET_12.0.2
271        cmpl      $0, %ecx
272        jl        .L_2TAG_PACKET_13.0.2
273        movsd     1256(%ebx), %xmm0
274        jmp       .L_2TAG_PACKET_4.0.2
275.L_2TAG_PACKET_13.0.2:
276        jmp       .L_2TAG_PACKET_6.0.2
277.L_2TAG_PACKET_12.0.2:
278        movsd     128(%esp), %xmm0
279        addsd     %xmm0, %xmm0
280        jmp       .L_2TAG_PACKET_4.0.2
281.L_2TAG_PACKET_14.0.2:
282        addl      $16304, %eax
283        cmpl      $15504, %eax
284        jb        .L_2TAG_PACKET_15.0.2
285        movapd    1184(%ebx), %xmm2
286        pshufd    $68, %xmm0, %xmm1
287        movapd    1200(%ebx), %xmm3
288        movapd    1216(%ebx), %xmm4
289        movsd     1232(%ebx), %xmm5
290        mulsd     %xmm1, %xmm1
291        xorpd     %xmm6, %xmm6
292        movl      $16352, %eax
293        pinsrw    $3, %eax, %xmm6
294        mulpd     %xmm0, %xmm2
295        xorpd     %xmm7, %xmm7
296        movl      $16368, %edx
297        pinsrw    $3, %edx, %xmm7
298        addpd     %xmm3, %xmm2
299        mulsd     %xmm1, %xmm5
300        pshufd    $228, %xmm1, %xmm3
301        mulpd     %xmm1, %xmm1
302        mulsd     %xmm0, %xmm6
303        mulpd     %xmm0, %xmm2
304        addpd     %xmm4, %xmm2
305        movapd    %xmm7, %xmm4
306        addsd     %xmm6, %xmm7
307        mulpd     %xmm3, %xmm1
308        psrlq     $27, %xmm7
309        psllq     $27, %xmm7
310        movsd     1288(%ebx), %xmm3
311        subsd     %xmm7, %xmm4
312        mulpd     %xmm1, %xmm2
313        addsd     %xmm4, %xmm6
314        pshufd    $238, %xmm2, %xmm1
315        addsd     %xmm2, %xmm6
316        andpd     %xmm0, %xmm3
317        movapd    %xmm0, %xmm4
318        addsd     %xmm6, %xmm1
319        subsd     %xmm3, %xmm0
320        addsd     %xmm5, %xmm1
321        mulsd     %xmm7, %xmm3
322        mulsd     %xmm7, %xmm0
323        mulsd     %xmm1, %xmm4
324        addsd     %xmm4, %xmm0
325        addsd     %xmm3, %xmm0
326        jmp       .L_2TAG_PACKET_4.0.2
327.L_2TAG_PACKET_15.0.2:
328        cmpl      $16, %eax
329        jae       .L_2TAG_PACKET_3.0.2
330        movapd    %xmm0, %xmm2
331        movd      %xmm0, %eax
332        psrlq     $31, %xmm2
333        movd      %xmm2, %ecx
334        orl       %ecx, %eax
335        je        .L_2TAG_PACKET_3.0.2
336        movl      $16, %edx
337        xorpd     %xmm1, %xmm1
338        pinsrw    $3, %edx, %xmm1
339        mulsd     %xmm1, %xmm1
340        movl      $42, %edx
341        jmp       .L_2TAG_PACKET_8.0.2
342.L_2TAG_PACKET_0.0.2:
343        cmpl      $0, %eax
344        jl        .L_2TAG_PACKET_14.0.2
345        movl      132(%esp), %eax
346        cmpl      $1083179008, %eax
347        jge       .L_2TAG_PACKET_10.0.2
348        cmpl      $-1048576, %eax
349        jae       .L_2TAG_PACKET_11.0.2
350.L_2TAG_PACKET_6.0.2:
351        xorpd     %xmm0, %xmm0
352        movl      $49136, %eax
353        pinsrw    $3, %eax, %xmm0
354        jmp       .L_2TAG_PACKET_4.0.2
355.L_2TAG_PACKET_4.0.2:
356        movsd     %xmm0, 48(%esp)
357        fldl      48(%esp)
358.L_2TAG_PACKET_9.0.2:
359        movl      64(%esp), %ebx
360        movl      %ebp, %esp
361        popl      %ebp
362        ret
363..B2.3:
364END(expm1)
365# -- End  expm1
366
367# Start file scope ASM
368ALIAS_SYMBOL(expm1l, expm1);
369# End file scope ASM
370	.section .rodata, "a"
371	.align 16
372	.align 16
373static_const_table:
374	.long	0
375	.long	4293918720
376	.long	0
377	.long	4293918720
378	.long	4294967232
379	.long	0
380	.long	4294967232
381	.long	0
382	.long	65472
383	.long	0
384	.long	65472
385	.long	0
386	.long	0
387	.long	1127743488
388	.long	0
389	.long	1127743488
390	.long	1697350398
391	.long	1079448903
392	.long	1697350398
393	.long	1079448903
394	.long	4277796864
395	.long	1065758274
396	.long	4277796864
397	.long	1065758274
398	.long	3164486458
399	.long	1025308570
400	.long	3164486458
401	.long	1025308570
402	.long	1963358694
403	.long	1065423121
404	.long	1431655765
405	.long	1069897045
406	.long	1431655765
407	.long	1067799893
408	.long	0
409	.long	1071644672
410	.long	381774871
411	.long	1062650220
412	.long	381774871
413	.long	1062650220
414	.long	0
415	.long	0
416	.long	0
417	.long	0
418	.long	1000070955
419	.long	1042145304
420	.long	1040187392
421	.long	11418
422	.long	988267849
423	.long	1039500660
424	.long	3539992576
425	.long	22960
426	.long	36755401
427	.long	1042114290
428	.long	402653184
429	.long	34629
430	.long	3634769483
431	.long	1042178627
432	.long	1820327936
433	.long	46424
434	.long	2155991225
435	.long	1041560680
436	.long	847249408
437	.long	58348
438	.long	2766913307
439	.long	1039293264
440	.long	3489660928
441	.long	70401
442	.long	3651174602
443	.long	1040488175
444	.long	2927624192
445	.long	82586
446	.long	3073892131
447	.long	1042240606
448	.long	1006632960
449	.long	94904
450	.long	1328391742
451	.long	1042019037
452	.long	3942645760
453	.long	107355
454	.long	2650893825
455	.long	1041903210
456	.long	822083584
457	.long	119943
458	.long	2397289153
459	.long	1041802037
460	.long	2281701376
461	.long	132667
462	.long	430997175
463	.long	1042110606
464	.long	1845493760
465	.long	145530
466	.long	1230936525
467	.long	1041801015
468	.long	1702887424
469	.long	158533
470	.long	740675935
471	.long	1040178913
472	.long	4110417920
473	.long	171677
474	.long	3489810261
475	.long	1041825986
476	.long	2793406464
477	.long	184965
478	.long	2532600530
479	.long	1040767882
480	.long	167772160
481	.long	198398
482	.long	3542557060
483	.long	1041827263
484	.long	2986344448
485	.long	211976
486	.long	1401563777
487	.long	1041061093
488	.long	922746880
489	.long	225703
490	.long	3129406026
491	.long	1041852413
492	.long	880803840
493	.long	239579
494	.long	900993572
495	.long	1039283234
496	.long	1275068416
497	.long	253606
498	.long	2115029358
499	.long	1042140042
500	.long	562036736
501	.long	267786
502	.long	1086643152
503	.long	1041785419
504	.long	1610612736
505	.long	282120
506	.long	82864366
507	.long	1041256244
508	.long	3045064704
509	.long	296610
510	.long	2392968152
511	.long	1040913683
512	.long	3573547008
513	.long	311258
514	.long	2905856183
515	.long	1040002214
516	.long	1988100096
517	.long	326066
518	.long	3742008261
519	.long	1040011137
520	.long	1451229184
521	.long	341035
522	.long	863393794
523	.long	1040880621
524	.long	914358272
525	.long	356167
526	.long	1446136837
527	.long	1041372426
528	.long	3707764736
529	.long	371463
530	.long	927855201
531	.long	1040617636
532	.long	360710144
533	.long	386927
534	.long	1492679939
535	.long	1041050306
536	.long	2952790016
537	.long	402558
538	.long	608827001
539	.long	1041582217
540	.long	2181038080
541	.long	418360
542	.long	606260204
543	.long	1042271987
544	.long	1711276032
545	.long	434334
546	.long	3163044019
547	.long	1041843851
548	.long	1006632960
549	.long	450482
550	.long	4148747325
551	.long	1041962972
552	.long	3900702720
553	.long	466805
554	.long	802924201
555	.long	1041275378
556	.long	1442840576
557	.long	483307
558	.long	3052749833
559	.long	1041940577
560	.long	1937768448
561	.long	499988
562	.long	2216116399
563	.long	1041486744
564	.long	914358272
565	.long	516851
566	.long	2729697836
567	.long	1041445764
568	.long	2566914048
569	.long	533897
570	.long	540608356
571	.long	1041310907
572	.long	2600468480
573	.long	551129
574	.long	2916344493
575	.long	1040535661
576	.long	1107296256
577	.long	568549
578	.long	731391814
579	.long	1039497014
580	.long	2566914048
581	.long	586158
582	.long	1024722704
583	.long	1041461625
584	.long	2961178624
585	.long	603959
586	.long	3806831748
587	.long	1041732499
588	.long	2675965952
589	.long	621954
590	.long	238953304
591	.long	1040316488
592	.long	2189426688
593	.long	640145
594	.long	749123235
595	.long	1041725785
596	.long	2063597568
597	.long	658534
598	.long	1168187977
599	.long	1041175214
600	.long	2986344448
601	.long	677123
602	.long	3506096399
603	.long	1042186095
604	.long	1426063360
605	.long	695915
606	.long	1470221620
607	.long	1041675499
608	.long	2566914048
609	.long	714911
610	.long	3182425146
611	.long	1041483134
612	.long	3087007744
613	.long	734114
614	.long	3131698208
615	.long	1042208657
616	.long	4068474880
617	.long	753526
618	.long	2300504125
619	.long	1041428596
620	.long	2415919104
621	.long	773150
622	.long	2290297931
623	.long	1037388400
624	.long	3716153344
625	.long	792987
626	.long	3532148223
627	.long	1041626194
628	.long	771751936
629	.long	813041
630	.long	1161884404
631	.long	1042015258
632	.long	3699376128
633	.long	833312
634	.long	876383176
635	.long	1037968878
636	.long	1241513984
637	.long	853805
638	.long	3379986796
639	.long	1042213153
640	.long	3699376128
641	.long	874520
642	.long	1545797737
643	.long	1041681569
644	.long	58720256
645	.long	895462
646	.long	2925146801
647	.long	1042212567
648	.long	855638016
649	.long	916631
650	.long	1316627971
651	.long	1038516204
652	.long	3883925504
653	.long	938030
654	.long	3267869137
655	.long	1040337004
656	.long	2726297600
657	.long	959663
658	.long	3720868999
659	.long	1041782409
660	.long	3992977408
661	.long	981531
662	.long	433316142
663	.long	1041994064
664	.long	1526726656
665	.long	1003638
666	.long	781232103
667	.long	1040093400
668	.long	2172649472
669	.long	1025985
670	.long	2773927732
671	.long	1053236707
672	.long	381774871
673	.long	1062650220
674	.long	379653899
675	.long	1056571845
676	.long	286331153
677	.long	1065423121
678	.long	436314138
679	.long	1059717536
680	.long	1431655765
681	.long	1067799893
682	.long	1431655765
683	.long	1069897045
684	.long	0
685	.long	1071644672
686	.long	0
687	.long	1072693248
688	.long	0
689	.long	2146435072
690	.long	0
691	.long	0
692	.long	4294967295
693	.long	2146435071
694	.long	0
695	.long	1048576
696	.long	4227858432
697	.long	4294967295
698	.type	static_const_table,@object
699	.size	static_const_table,1296
700	.data
701	.section .note.GNU-stack, ""
702# End
703