1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32//                     ALGORITHM DESCRIPTION
33//                     ---------------------
34//
35// Description:
36//  Let K = 64 (table size).
37//
38//  Four sub-domains:
39//    1. |x| < 1/(2*K)
40//      expm1(x) ~ P(x)
41//    2. 1/(2*K) <= |x| <= 56*log(2)
42//       x       x/log(2)    n
43//      e - 1 = 2         = 2 * T[j] * (1 + P(y)) - 1
44//    3. 56*log(2) < x < MAX_LOG
45//       x       x   x/log(2)    n
46//      e - 1 ~ e = 2         = 2 * T[j] * (1 + P(y))
47//    4. x < -56*log(2)
48//       x            x
49//      e - 1 = -1 + e ~ -1
50//    where
51//       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
52//       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
53//                  j/K
54//       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
55//
56//       P(y) is a minimax polynomial approximation of exp(x)-1
57//       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
58//
59//    In case 3, to avoid problems with arithmetic overflow and underflow,
60//              n                        n1  n2
61//    value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
62//    and BIAS is a value of exponent bias.
63//
64// Special cases:
65//  expm1(NaN) is NaN
66//  expm1(+INF) is +INF
67//  expm1(-INF) is -1
68//  expm1(x) is x for subnormals
69//  for finite argument, only expm1(0)=0 is exact.
70//  For IEEE double
71//    if x > 709.782712893383973096 then expm1(x) overflow
72//
73/******************************************************************************/
74
75#include <private/bionic_asm.h>
76# -- Begin  expm1
77ENTRY(expm1)
78# parameter 1: %xmm0
79..B1.1:
80..___tag_value_expm1.1:
81        subq      $56, %rsp
82..___tag_value_expm1.3:
83        movsd     %xmm0, 32(%rsp)
84..B1.2:
85        unpcklpd  %xmm0, %xmm0
86        movapd    cv(%rip), %xmm1
87        movapd    Shifter(%rip), %xmm6
88        movapd    16+cv(%rip), %xmm2
89        movapd    32+cv(%rip), %xmm3
90        pextrw    $3, %xmm0, %eax
91        andl      $32767, %eax
92        movl      $16527, %edx
93        subl      %eax, %edx
94        subl      $16304, %eax
95        orl       %eax, %edx
96        cmpl      $-2147483648, %edx
97        jae       .L_2TAG_PACKET_0.0.2
98        mulpd     %xmm0, %xmm1
99        addpd     %xmm6, %xmm1
100        movapd    %xmm1, %xmm7
101        subpd     %xmm6, %xmm1
102        mulpd     %xmm1, %xmm2
103        movapd    48+cv(%rip), %xmm4
104        mulpd     %xmm1, %xmm3
105        movapd    64+cv(%rip), %xmm5
106        subpd     %xmm2, %xmm0
107        movd      %xmm7, %eax
108        movl      %eax, %ecx
109        andl      $63, %ecx
110        shll      $4, %ecx
111        sarl      $6, %eax
112        movl      %eax, %edx
113        subpd     %xmm3, %xmm0
114        lea       Tbl_addr(%rip), %r11
115        movapd    (%rcx,%r11), %xmm2
116        movq      80+cv(%rip), %xmm3
117        mulpd     %xmm0, %xmm4
118        movapd    %xmm0, %xmm1
119        mulpd     %xmm0, %xmm0
120        mulsd     %xmm0, %xmm3
121        addpd     %xmm4, %xmm5
122        mulsd     %xmm0, %xmm0
123        movq      %xmm2, %xmm4
124        unpckhpd  %xmm2, %xmm2
125        movdqa    mmask(%rip), %xmm6
126        pand      %xmm6, %xmm7
127        movdqa    bias(%rip), %xmm6
128        paddq     %xmm6, %xmm7
129        psllq     $46, %xmm7
130        mulsd     %xmm0, %xmm3
131        mulpd     %xmm5, %xmm0
132        addl      $894, %edx
133        cmpl      $1916, %edx
134        ja        .L_2TAG_PACKET_1.0.2
135        addsd     %xmm3, %xmm0
136        xorpd     %xmm3, %xmm3
137        movl      $16368, %eax
138        pinsrw    $3, %eax, %xmm3
139        orpd      %xmm7, %xmm2
140        mulsd     %xmm4, %xmm7
141        movq      %xmm3, %xmm6
142        addsd     %xmm1, %xmm3
143        pextrw    $3, %xmm2, %edx
144        pshufd    $238, %xmm0, %xmm5
145        psrlq     $38, %xmm3
146        psllq     $38, %xmm3
147        movq      %xmm2, %xmm4
148        subsd     %xmm3, %xmm6
149        addsd     %xmm5, %xmm0
150        addsd     %xmm6, %xmm1
151        addsd     %xmm7, %xmm4
152        mulsd     %xmm3, %xmm7
153        mulsd     %xmm2, %xmm3
154        xorpd     %xmm5, %xmm5
155        movl      $16368, %eax
156        pinsrw    $3, %eax, %xmm5
157        addsd     %xmm1, %xmm0
158        movl      $17184, %ecx
159        subl      %edx, %ecx
160        subl      $16256, %edx
161        orl       %edx, %ecx
162        jl        .L_2TAG_PACKET_2.0.2
163        mulsd     %xmm4, %xmm0
164        subsd     %xmm5, %xmm3
165        addsd     %xmm7, %xmm0
166        addsd     %xmm3, %xmm0
167.L_2TAG_PACKET_3.0.2:
168        jmp       ..B1.5
169.L_2TAG_PACKET_2.0.2:
170        cmpl      $0, %edx
171        jl        .L_2TAG_PACKET_4.0.2
172        mulsd     %xmm4, %xmm0
173        subsd     %xmm5, %xmm7
174        addsd     %xmm7, %xmm0
175        addsd     %xmm3, %xmm0
176        jmp       ..B1.5
177.L_2TAG_PACKET_4.0.2:
178        mulsd     %xmm4, %xmm0
179        addsd     %xmm7, %xmm0
180        addsd     %xmm3, %xmm0
181        subsd     %xmm5, %xmm0
182        jmp       ..B1.5
183.L_2TAG_PACKET_1.0.2:
184        movl      36(%rsp), %ecx
185        addsd     %xmm0, %xmm1
186        unpckhpd  %xmm0, %xmm0
187        addsd     %xmm1, %xmm0
188        cmpl      $0, %ecx
189        jl        .L_2TAG_PACKET_5.0.2
190        fstcw     (%rsp)
191        movw      (%rsp), %dx
192        orw       $768, %dx
193        movw      %dx, 4(%rsp)
194        fldcw     4(%rsp)
195        movl      %eax, %edx
196        sarl      $1, %eax
197        subl      %eax, %edx
198        movdqa    emask(%rip), %xmm6
199        pandn     %xmm2, %xmm6
200        addl      $1023, %eax
201        movd      %eax, %xmm3
202        psllq     $52, %xmm3
203        orpd      %xmm3, %xmm6
204        mulsd     %xmm3, %xmm4
205        movsd     %xmm0, 16(%rsp)
206        fldl      16(%rsp)
207        movsd     %xmm6, 24(%rsp)
208        fldl      24(%rsp)
209        movsd     %xmm4, 16(%rsp)
210        fldl      16(%rsp)
211        addl      $1023, %edx
212        movd      %edx, %xmm4
213        psllq     $52, %xmm4
214        faddp     %st, %st(1)
215        fmul      %st, %st(1)
216        faddp     %st, %st(1)
217        movsd     %xmm4, 24(%rsp)
218        fldl      24(%rsp)
219        fmulp     %st, %st(1)
220        fstpl     16(%rsp)
221        movsd     16(%rsp), %xmm0
222        fldcw     (%rsp)
223        pextrw    $3, %xmm0, %ecx
224        andl      $32752, %ecx
225        cmpl      $32752, %ecx
226        jae       .L_2TAG_PACKET_6.0.2
227        jmp       ..B1.5
228        cmpl      $-2147483648, %ecx
229        jb        .L_2TAG_PACKET_6.0.2
230        jmp       ..B1.5
231.L_2TAG_PACKET_6.0.2:
232        movl      $41, 8(%rsp)
233        jmp       .L_2TAG_PACKET_7.0.2
234.L_2TAG_PACKET_8.0.2:
235        cmpl      $2146435072, %eax
236        jae       .L_2TAG_PACKET_9.0.2
237        movsd     XMAX(%rip), %xmm0
238        mulsd     %xmm0, %xmm0
239        movl      $41, 8(%rsp)
240        jmp       .L_2TAG_PACKET_7.0.2
241.L_2TAG_PACKET_9.0.2:
242        movl      36(%rsp), %eax
243        movl      32(%rsp), %edx
244        movl      %eax, %ecx
245        andl      $2147483647, %eax
246        cmpl      $2146435072, %eax
247        ja        .L_2TAG_PACKET_10.0.2
248        cmpl      $0, %edx
249        jne       .L_2TAG_PACKET_10.0.2
250        cmpl      $0, %ecx
251        jl        .L_2TAG_PACKET_11.0.2
252        movq      INF(%rip), %xmm0
253        jmp       ..B1.5
254.L_2TAG_PACKET_11.0.2:
255        jmp       .L_2TAG_PACKET_5.0.2
256.L_2TAG_PACKET_10.0.2:
257        movsd     32(%rsp), %xmm0
258        addsd     %xmm0, %xmm0
259        jmp       ..B1.5
260.L_2TAG_PACKET_12.0.2:
261        addl      $16304, %eax
262        cmpl      $15504, %eax
263        jb        .L_2TAG_PACKET_13.0.2
264        movapd    cvl(%rip), %xmm2
265        pshufd    $68, %xmm0, %xmm1
266        movapd    16+cvl(%rip), %xmm3
267        movapd    32+cvl(%rip), %xmm4
268        movq      48+cvl(%rip), %xmm5
269        mulsd     %xmm1, %xmm1
270        xorpd     %xmm6, %xmm6
271        movl      $16352, %eax
272        pinsrw    $3, %eax, %xmm6
273        mulpd     %xmm0, %xmm2
274        xorpd     %xmm7, %xmm7
275        movl      $16368, %edx
276        pinsrw    $3, %edx, %xmm7
277        addpd     %xmm3, %xmm2
278        mulsd     %xmm1, %xmm5
279        pshufd    $228, %xmm1, %xmm3
280        mulpd     %xmm1, %xmm1
281        mulsd     %xmm0, %xmm6
282        mulpd     %xmm0, %xmm2
283        addpd     %xmm4, %xmm2
284        movq      %xmm7, %xmm4
285        addsd     %xmm6, %xmm7
286        mulpd     %xmm3, %xmm1
287        psrlq     $27, %xmm7
288        psllq     $27, %xmm7
289        movq      HIGHMASK(%rip), %xmm3
290        subsd     %xmm7, %xmm4
291        mulpd     %xmm1, %xmm2
292        addsd     %xmm4, %xmm6
293        pshufd    $238, %xmm2, %xmm1
294        addsd     %xmm2, %xmm6
295        andpd     %xmm0, %xmm3
296        movq      %xmm0, %xmm4
297        addsd     %xmm6, %xmm1
298        subsd     %xmm3, %xmm0
299        addsd     %xmm5, %xmm1
300        mulsd     %xmm7, %xmm3
301        mulsd     %xmm7, %xmm0
302        mulsd     %xmm1, %xmm4
303        addsd     %xmm4, %xmm0
304        addsd     %xmm3, %xmm0
305        jmp       ..B1.5
306.L_2TAG_PACKET_13.0.2:
307        cmpl      $16, %eax
308        jae       .L_2TAG_PACKET_3.0.2
309        movq      %xmm0, %xmm2
310        movd      %xmm0, %eax
311        psrlq     $31, %xmm2
312        movd      %xmm2, %ecx
313        orl       %ecx, %eax
314        je        .L_2TAG_PACKET_3.0.2
315        movl      $16, %edx
316        xorpd     %xmm1, %xmm1
317        pinsrw    $3, %edx, %xmm1
318        mulsd     %xmm1, %xmm1
319        movl      $42, 8(%rsp)
320        jmp       .L_2TAG_PACKET_7.0.2
321.L_2TAG_PACKET_0.0.2:
322        cmpl      $0, %eax
323        jl        .L_2TAG_PACKET_12.0.2
324        movl      36(%rsp), %eax
325        cmpl      $1083179008, %eax
326        jge       .L_2TAG_PACKET_8.0.2
327        cmpl      $-1048576, %eax
328        jae       .L_2TAG_PACKET_9.0.2
329.L_2TAG_PACKET_5.0.2:
330        xorpd     %xmm0, %xmm0
331        movl      $49136, %eax
332        pinsrw    $3, %eax, %xmm0
333        jmp       ..B1.5
334.L_2TAG_PACKET_7.0.2:
335        movq      %xmm0, 40(%rsp)
336..B1.3:
337        movq      40(%rsp), %xmm0
338.L_2TAG_PACKET_14.0.2:
339..B1.5:
340        addq      $56, %rsp
341..___tag_value_expm1.4:
342        ret
343..___tag_value_expm1.5:
344END(expm1)
345# -- End  expm1
346	.section .rodata, "a"
347	.align 16
348	.align 16
349cv:
350	.long	1697350398
351	.long	1079448903
352	.long	1697350398
353	.long	1079448903
354	.long	4277796864
355	.long	1065758274
356	.long	4277796864
357	.long	1065758274
358	.long	3164486458
359	.long	1025308570
360	.long	3164486458
361	.long	1025308570
362	.long	1963358694
363	.long	1065423121
364	.long	1431655765
365	.long	1069897045
366	.long	1431655765
367	.long	1067799893
368	.long	0
369	.long	1071644672
370	.long	381774871
371	.long	1062650220
372	.long	381774871
373	.long	1062650220
374	.type	cv,@object
375	.size	cv,96
376	.align 16
377Shifter:
378	.long	0
379	.long	1127743488
380	.long	0
381	.long	1127743488
382	.type	Shifter,@object
383	.size	Shifter,16
384	.align 16
385Tbl_addr:
386	.long	0
387	.long	0
388	.long	0
389	.long	0
390	.long	1000070955
391	.long	1042145304
392	.long	1040187392
393	.long	11418
394	.long	988267849
395	.long	1039500660
396	.long	3539992576
397	.long	22960
398	.long	36755401
399	.long	1042114290
400	.long	402653184
401	.long	34629
402	.long	3634769483
403	.long	1042178627
404	.long	1820327936
405	.long	46424
406	.long	2155991225
407	.long	1041560680
408	.long	847249408
409	.long	58348
410	.long	2766913307
411	.long	1039293264
412	.long	3489660928
413	.long	70401
414	.long	3651174602
415	.long	1040488175
416	.long	2927624192
417	.long	82586
418	.long	3073892131
419	.long	1042240606
420	.long	1006632960
421	.long	94904
422	.long	1328391742
423	.long	1042019037
424	.long	3942645760
425	.long	107355
426	.long	2650893825
427	.long	1041903210
428	.long	822083584
429	.long	119943
430	.long	2397289153
431	.long	1041802037
432	.long	2281701376
433	.long	132667
434	.long	430997175
435	.long	1042110606
436	.long	1845493760
437	.long	145530
438	.long	1230936525
439	.long	1041801015
440	.long	1702887424
441	.long	158533
442	.long	740675935
443	.long	1040178913
444	.long	4110417920
445	.long	171677
446	.long	3489810261
447	.long	1041825986
448	.long	2793406464
449	.long	184965
450	.long	2532600530
451	.long	1040767882
452	.long	167772160
453	.long	198398
454	.long	3542557060
455	.long	1041827263
456	.long	2986344448
457	.long	211976
458	.long	1401563777
459	.long	1041061093
460	.long	922746880
461	.long	225703
462	.long	3129406026
463	.long	1041852413
464	.long	880803840
465	.long	239579
466	.long	900993572
467	.long	1039283234
468	.long	1275068416
469	.long	253606
470	.long	2115029358
471	.long	1042140042
472	.long	562036736
473	.long	267786
474	.long	1086643152
475	.long	1041785419
476	.long	1610612736
477	.long	282120
478	.long	82864366
479	.long	1041256244
480	.long	3045064704
481	.long	296610
482	.long	2392968152
483	.long	1040913683
484	.long	3573547008
485	.long	311258
486	.long	2905856183
487	.long	1040002214
488	.long	1988100096
489	.long	326066
490	.long	3742008261
491	.long	1040011137
492	.long	1451229184
493	.long	341035
494	.long	863393794
495	.long	1040880621
496	.long	914358272
497	.long	356167
498	.long	1446136837
499	.long	1041372426
500	.long	3707764736
501	.long	371463
502	.long	927855201
503	.long	1040617636
504	.long	360710144
505	.long	386927
506	.long	1492679939
507	.long	1041050306
508	.long	2952790016
509	.long	402558
510	.long	608827001
511	.long	1041582217
512	.long	2181038080
513	.long	418360
514	.long	606260204
515	.long	1042271987
516	.long	1711276032
517	.long	434334
518	.long	3163044019
519	.long	1041843851
520	.long	1006632960
521	.long	450482
522	.long	4148747325
523	.long	1041962972
524	.long	3900702720
525	.long	466805
526	.long	802924201
527	.long	1041275378
528	.long	1442840576
529	.long	483307
530	.long	3052749833
531	.long	1041940577
532	.long	1937768448
533	.long	499988
534	.long	2216116399
535	.long	1041486744
536	.long	914358272
537	.long	516851
538	.long	2729697836
539	.long	1041445764
540	.long	2566914048
541	.long	533897
542	.long	540608356
543	.long	1041310907
544	.long	2600468480
545	.long	551129
546	.long	2916344493
547	.long	1040535661
548	.long	1107296256
549	.long	568549
550	.long	731391814
551	.long	1039497014
552	.long	2566914048
553	.long	586158
554	.long	1024722704
555	.long	1041461625
556	.long	2961178624
557	.long	603959
558	.long	3806831748
559	.long	1041732499
560	.long	2675965952
561	.long	621954
562	.long	238953304
563	.long	1040316488
564	.long	2189426688
565	.long	640145
566	.long	749123235
567	.long	1041725785
568	.long	2063597568
569	.long	658534
570	.long	1168187977
571	.long	1041175214
572	.long	2986344448
573	.long	677123
574	.long	3506096399
575	.long	1042186095
576	.long	1426063360
577	.long	695915
578	.long	1470221620
579	.long	1041675499
580	.long	2566914048
581	.long	714911
582	.long	3182425146
583	.long	1041483134
584	.long	3087007744
585	.long	734114
586	.long	3131698208
587	.long	1042208657
588	.long	4068474880
589	.long	753526
590	.long	2300504125
591	.long	1041428596
592	.long	2415919104
593	.long	773150
594	.long	2290297931
595	.long	1037388400
596	.long	3716153344
597	.long	792987
598	.long	3532148223
599	.long	1041626194
600	.long	771751936
601	.long	813041
602	.long	1161884404
603	.long	1042015258
604	.long	3699376128
605	.long	833312
606	.long	876383176
607	.long	1037968878
608	.long	1241513984
609	.long	853805
610	.long	3379986796
611	.long	1042213153
612	.long	3699376128
613	.long	874520
614	.long	1545797737
615	.long	1041681569
616	.long	58720256
617	.long	895462
618	.long	2925146801
619	.long	1042212567
620	.long	855638016
621	.long	916631
622	.long	1316627971
623	.long	1038516204
624	.long	3883925504
625	.long	938030
626	.long	3267869137
627	.long	1040337004
628	.long	2726297600
629	.long	959663
630	.long	3720868999
631	.long	1041782409
632	.long	3992977408
633	.long	981531
634	.long	433316142
635	.long	1041994064
636	.long	1526726656
637	.long	1003638
638	.long	781232103
639	.long	1040093400
640	.long	2172649472
641	.long	1025985
642	.type	Tbl_addr,@object
643	.size	Tbl_addr,1024
644	.align 16
645mmask:
646	.long	4294967232
647	.long	0
648	.long	4294967232
649	.long	0
650	.type	mmask,@object
651	.size	mmask,16
652	.align 16
653bias:
654	.long	65472
655	.long	0
656	.long	65472
657	.long	0
658	.type	bias,@object
659	.size	bias,16
660	.align 16
661emask:
662	.long	0
663	.long	4293918720
664	.long	0
665	.long	4293918720
666	.type	emask,@object
667	.size	emask,16
668	.align 16
669cvl:
670	.long	2773927732
671	.long	1053236707
672	.long	381774871
673	.long	1062650220
674	.long	379653899
675	.long	1056571845
676	.long	286331153
677	.long	1065423121
678	.long	436314138
679	.long	1059717536
680	.long	1431655765
681	.long	1067799893
682	.long	1431655765
683	.long	1069897045
684	.long	0
685	.long	1071644672
686	.type	cvl,@object
687	.size	cvl,64
688	.align 8
689XMAX:
690	.long	4294967295
691	.long	2146435071
692	.type	XMAX,@object
693	.size	XMAX,8
694	.align 8
695INF:
696	.long	0
697	.long	2146435072
698	.type	INF,@object
699	.size	INF,8
700	.align 8
701HIGHMASK:
702	.long	4227858432
703	.long	4294967295
704	.type	HIGHMASK,@object
705	.size	HIGHMASK,8
706	.data
707	.section .note.GNU-stack, ""
708// -- Begin DWARF2 SEGMENT .eh_frame
709	.section .eh_frame,"a",@progbits
710.eh_frame_seg:
711	.align 1
712	.4byte 0x00000014
713	.8byte 0x00527a0100000000
714	.8byte 0x08070c1b01107801
715	.4byte 0x00000190
716	.4byte 0x0000001c
717	.4byte 0x0000001c
718	.4byte ..___tag_value_expm1.1-.
719	.4byte ..___tag_value_expm1.5-..___tag_value_expm1.1
720	.2byte 0x0400
721	.4byte ..___tag_value_expm1.3-..___tag_value_expm1.1
722	.2byte 0x400e
723	.byte 0x04
724	.4byte ..___tag_value_expm1.4-..___tag_value_expm1.3
725	.2byte 0x080e
726	.byte 0x00
727# End
728