1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32//                     ALGORITHM DESCRIPTION
33//                     ---------------------
34//
35// X87 version:
36// Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt.
37//
38// SSE version:
39// Swap x, y if |x|<|y|
40// For x=2^k*x, get y=y*2^(-k)
41// Get S ~ sqrt(x^2+y^2)  (leading 1 + leading 25 mantissa bits)
42//
43// Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) +
44//                               + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) )
45//
46// Result is 2^k*(S + Se),  where Se = S*e
47//        S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S )
48//
49// Return 2^k*(S+Se)
50//
51// For |y/x|<2^(-64), return x
52//
53// For cases where maximum biased exponent is either greater than 7fdh or
54// below 32, take a special path to check for special cases (0, NaN, Inf),
55// possible overflow, and more accurate computation for denormal results
56//
57// Special cases:
58//  hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent
59//  hypot(x,+-0) is equivalent to fabs(x)
60//  hypot(x,y) = y if (x==NaN or x==INF) and y==INF
61//  hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!)
62//  hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF)
63//
64/******************************************************************************/
65
66#include <private/bionic_asm.h>
67# -- Begin  static_func
68        .text
69        .align __bionic_asm_align
70        .type static_func, @function
71static_func:
72..B1.1:
73        call      ..L2
74..L2:
75        popl      %eax
76        lea       _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
77        lea       static_const_table@GOTOFF(%eax), %eax
78        ret
79        .size   static_func,.-static_func
80# -- End  static_func
81
82# -- Begin  hypot
83ENTRY(hypot)
84# parameter 1: 8 + %ebp
85# parameter 2: 16 + %ebp
86..B2.1:
87..B2.2:
88        pushl     %ebp
89        movl      %esp, %ebp
90        subl      $152, %esp
91        movl      %ebx, 96(%esp)
92        call      static_func
93        movl      %eax, %ebx
94        movapd    (%ebx), %xmm3
95        movsd     160(%esp), %xmm0
96        movsd     168(%esp), %xmm1
97        andpd     %xmm3, %xmm0
98        andpd     %xmm3, %xmm1
99        pextrw    $3, %xmm0, %eax
100        pextrw    $3, %xmm1, %edx
101        cmpl      $24528, %eax
102        ja        .L_2TAG_PACKET_0.0.2
103        cmpl      $24528, %edx
104        ja        .L_2TAG_PACKET_0.0.2
105.L_2TAG_PACKET_1.0.2:
106        fldl      160(%esp)
107        fldl      168(%esp)
108        fxch      %st(1)
109        fmul      %st(0), %st
110        fxch      %st(1)
111        nop
112        fmul      %st(0), %st
113        faddp     %st, %st(1)
114        fsqrt
115        jmp       .L_2TAG_PACKET_2.0.2
116.L_2TAG_PACKET_0.0.2:
117        cmpl      $32752, %eax
118        movl      %eax, %ecx
119        jae       .L_2TAG_PACKET_3.0.2
120        subl      %edx, %ecx
121        cmpl      $32752, %edx
122        jae       .L_2TAG_PACKET_3.0.2
123        addl      $928, %ecx
124        addl      %edx, %eax
125        cmpl      $1856, %ecx
126        ja        .L_2TAG_PACKET_4.0.2
127        cmpl      $49056, %eax
128        jb        .L_2TAG_PACKET_1.0.2
129        fldl      160(%esp)
130        fldl      168(%esp)
131        fxch      %st(1)
132        fmul      %st(0), %st
133        fxch      %st(1)
134        nop
135        fmul      %st(0), %st
136        faddp     %st, %st(1)
137        fsqrt
138.L_2TAG_PACKET_5.0.2:
139        fstl      (%esp)
140        fstpt     16(%esp)
141        xorl      %eax, %eax
142        movw      24(%esp), %ax
143        cmpl      $17407, %eax
144        jae       .L_2TAG_PACKET_6.0.2
145        fldl      (%esp)
146        jmp       .L_2TAG_PACKET_7.0.2
147.L_2TAG_PACKET_4.0.2:
148        movsd     %xmm0, 32(%esp)
149        movsd     %xmm1, 40(%esp)
150        fldl      32(%esp)
151        faddl     40(%esp)
152        jmp       .L_2TAG_PACKET_5.0.2
153.L_2TAG_PACKET_6.0.2:
154        movl      $46, %edx
155.L_2TAG_PACKET_8.0.2:
156        movsd     160(%esp), %xmm0
157        movsd     168(%esp), %xmm1
158        fldl      (%esp)
159        jmp       .L_2TAG_PACKET_7.0.2
160.L_2TAG_PACKET_3.0.2:
161        shufpd    $0, %xmm1, %xmm0
162        movdqa    %xmm0, %xmm2
163        movdqa    16(%ebx), %xmm3
164        movsd     %xmm0, 32(%esp)
165        movsd     %xmm1, 40(%esp)
166        cmppd     $3, %xmm0, %xmm2
167        cmppd     $0, %xmm0, %xmm3
168        movmskpd  %xmm2, %edx
169        movmskpd  %xmm3, %eax
170        testl     %edx, %edx
171        je        .L_2TAG_PACKET_9.0.2
172        fldl      32(%esp)
173        fmull     40(%esp)
174        testl     $1, %eax
175        jne       .L_2TAG_PACKET_10.0.2
176        testl     $2, %eax
177        jne       .L_2TAG_PACKET_11.0.2
178        jmp       .L_2TAG_PACKET_2.0.2
179.L_2TAG_PACKET_9.0.2:
180        fldl      32(%esp)
181        faddl     40(%esp)
182        jmp       .L_2TAG_PACKET_2.0.2
183.L_2TAG_PACKET_10.0.2:
184        fstpl     40(%esp)
185        fldl      32(%esp)
186        jmp       .L_2TAG_PACKET_7.0.2
187.L_2TAG_PACKET_11.0.2:
188        fstpl     32(%esp)
189        fldl      40(%esp)
190        jmp       .L_2TAG_PACKET_7.0.2
191.L_2TAG_PACKET_2.0.2:
192.L_2TAG_PACKET_7.0.2:
193        movl      96(%esp), %ebx
194        movl      %ebp, %esp
195        popl      %ebp
196        ret
197..B2.3:
198END(hypot)
199# -- End  hypot
200
201# Start file scope ASM
202ALIAS_SYMBOL(hypotl, hypot);
203# End file scope ASM
204	.section .rodata, "a"
205	.align 16
206	.align 16
207static_const_table:
208	.long	4294967295
209	.long	2147483647
210	.long	4294967295
211	.long	2147483647
212	.long	0
213	.long	2146435072
214	.long	0
215	.long	2146435072
216	.type	static_const_table,@object
217	.size	static_const_table,32
218	.data
219	.section .note.GNU-stack, ""
220# End
221