1 /*
2 * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
3 * license.
4 *
5 * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
6 * Date: Fri, Jun 24, 2011 at 3:20 AM
7 * Subject: Re: sqrt routine
8 * To: Kevin Ma <kma@google.com>
9 * Hi Kevin,
10 * Thanks for asking. Those routines are public domain (originally posted to
11 * comp.sys.arm a long time ago), so you can use them freely for any purpose.
12 * Cheers,
13 * Wilco
14 *
15 * ----- Original Message -----
16 * From: "Kevin Ma" <kma@google.com>
17 * To: <Wilco.Dijkstra@ntlworld.com>
18 * Sent: Thursday, June 23, 2011 11:44 PM
19 * Subject: Fwd: sqrt routine
20 * Hi Wilco,
21 * I saw your sqrt routine from several web sites, including
22 * http://www.finesse.demon.co.uk/steven/sqrt.html.
23 * Just wonder if there's any copyright information with your Successive
24 * approximation routines, or if I can freely use it for any purpose.
25 * Thanks.
26 * Kevin
27 */
28
29 // Minor modifications in code style for WebRTC, 2012.
30 // Code optimizations for MIPS, 2013.
31
32 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
33
34 /*
35 * Algorithm:
36 * Successive approximation of the equation (root + delta) ^ 2 = N
37 * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
38 * Use delta = 2^i for i = 15 .. 0.
39 *
40 * Output precision is 16 bits. Note for large input values (close to
41 * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
42 * contains the MSB information (a non-sign value). Do with caution
43 * if you need to cast the output to int16_t type.
44 *
45 * If the input value is negative, it returns 0.
46 */
47
48
WebRtcSpl_SqrtFloor(int32_t value)49 int32_t WebRtcSpl_SqrtFloor(int32_t value)
50 {
51 int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
52
53 __asm __volatile(
54 ".set push \n\t"
55 ".set noreorder \n\t"
56
57 "lui %[tmp1], 0x4000 \n\t"
58 "slt %[tmp2], %[value], %[tmp1] \n\t"
59 "sub %[tmp3], %[value], %[tmp1] \n\t"
60 "lui %[tmp1], 0x1 \n\t"
61 "or %[tmp4], %[root], %[tmp1] \n\t"
62 "movz %[value], %[tmp3], %[tmp2] \n\t"
63 "movz %[root], %[tmp4], %[tmp2] \n\t"
64
65 "addiu %[tmp1], $0, 0x4000 \n\t"
66 "addu %[tmp1], %[tmp1], %[root] \n\t"
67 "sll %[tmp1], 14 \n\t"
68 "slt %[tmp2], %[value], %[tmp1] \n\t"
69 "subu %[tmp3], %[value], %[tmp1] \n\t"
70 "ori %[tmp4], %[root], 0x8000 \n\t"
71 "movz %[value], %[tmp3], %[tmp2] \n\t"
72 "movz %[root], %[tmp4], %[tmp2] \n\t"
73
74 "addiu %[tmp1], $0, 0x2000 \n\t"
75 "addu %[tmp1], %[tmp1], %[root] \n\t"
76 "sll %[tmp1], 13 \n\t"
77 "slt %[tmp2], %[value], %[tmp1] \n\t"
78 "subu %[tmp3], %[value], %[tmp1] \n\t"
79 "ori %[tmp4], %[root], 0x4000 \n\t"
80 "movz %[value], %[tmp3], %[tmp2] \n\t"
81 "movz %[root], %[tmp4], %[tmp2] \n\t"
82
83 "addiu %[tmp1], $0, 0x1000 \n\t"
84 "addu %[tmp1], %[tmp1], %[root] \n\t"
85 "sll %[tmp1], 12 \n\t"
86 "slt %[tmp2], %[value], %[tmp1] \n\t"
87 "subu %[tmp3], %[value], %[tmp1] \n\t"
88 "ori %[tmp4], %[root], 0x2000 \n\t"
89 "movz %[value], %[tmp3], %[tmp2] \n\t"
90 "movz %[root], %[tmp4], %[tmp2] \n\t"
91
92 "addiu %[tmp1], $0, 0x800 \n\t"
93 "addu %[tmp1], %[tmp1], %[root] \n\t"
94 "sll %[tmp1], 11 \n\t"
95 "slt %[tmp2], %[value], %[tmp1] \n\t"
96 "subu %[tmp3], %[value], %[tmp1] \n\t"
97 "ori %[tmp4], %[root], 0x1000 \n\t"
98 "movz %[value], %[tmp3], %[tmp2] \n\t"
99 "movz %[root], %[tmp4], %[tmp2] \n\t"
100
101 "addiu %[tmp1], $0, 0x400 \n\t"
102 "addu %[tmp1], %[tmp1], %[root] \n\t"
103 "sll %[tmp1], 10 \n\t"
104 "slt %[tmp2], %[value], %[tmp1] \n\t"
105 "subu %[tmp3], %[value], %[tmp1] \n\t"
106 "ori %[tmp4], %[root], 0x800 \n\t"
107 "movz %[value], %[tmp3], %[tmp2] \n\t"
108 "movz %[root], %[tmp4], %[tmp2] \n\t"
109
110 "addiu %[tmp1], $0, 0x200 \n\t"
111 "addu %[tmp1], %[tmp1], %[root] \n\t"
112 "sll %[tmp1], 9 \n\t"
113 "slt %[tmp2], %[value], %[tmp1] \n\t"
114 "subu %[tmp3], %[value], %[tmp1] \n\t"
115 "ori %[tmp4], %[root], 0x400 \n\t"
116 "movz %[value], %[tmp3], %[tmp2] \n\t"
117 "movz %[root], %[tmp4], %[tmp2] \n\t"
118
119 "addiu %[tmp1], $0, 0x100 \n\t"
120 "addu %[tmp1], %[tmp1], %[root] \n\t"
121 "sll %[tmp1], 8 \n\t"
122 "slt %[tmp2], %[value], %[tmp1] \n\t"
123 "subu %[tmp3], %[value], %[tmp1] \n\t"
124 "ori %[tmp4], %[root], 0x200 \n\t"
125 "movz %[value], %[tmp3], %[tmp2] \n\t"
126 "movz %[root], %[tmp4], %[tmp2] \n\t"
127
128 "addiu %[tmp1], $0, 0x80 \n\t"
129 "addu %[tmp1], %[tmp1], %[root] \n\t"
130 "sll %[tmp1], 7 \n\t"
131 "slt %[tmp2], %[value], %[tmp1] \n\t"
132 "subu %[tmp3], %[value], %[tmp1] \n\t"
133 "ori %[tmp4], %[root], 0x100 \n\t"
134 "movz %[value], %[tmp3], %[tmp2] \n\t"
135 "movz %[root], %[tmp4], %[tmp2] \n\t"
136
137 "addiu %[tmp1], $0, 0x40 \n\t"
138 "addu %[tmp1], %[tmp1], %[root] \n\t"
139 "sll %[tmp1], 6 \n\t"
140 "slt %[tmp2], %[value], %[tmp1] \n\t"
141 "subu %[tmp3], %[value], %[tmp1] \n\t"
142 "ori %[tmp4], %[root], 0x80 \n\t"
143 "movz %[value], %[tmp3], %[tmp2] \n\t"
144 "movz %[root], %[tmp4], %[tmp2] \n\t"
145
146 "addiu %[tmp1], $0, 0x20 \n\t"
147 "addu %[tmp1], %[tmp1], %[root] \n\t"
148 "sll %[tmp1], 5 \n\t"
149 "slt %[tmp2], %[value], %[tmp1] \n\t"
150 "subu %[tmp3], %[value], %[tmp1] \n\t"
151 "ori %[tmp4], %[root], 0x40 \n\t"
152 "movz %[value], %[tmp3], %[tmp2] \n\t"
153 "movz %[root], %[tmp4], %[tmp2] \n\t"
154
155 "addiu %[tmp1], $0, 0x10 \n\t"
156 "addu %[tmp1], %[tmp1], %[root] \n\t"
157 "sll %[tmp1], 4 \n\t"
158 "slt %[tmp2], %[value], %[tmp1] \n\t"
159 "subu %[tmp3], %[value], %[tmp1] \n\t"
160 "ori %[tmp4], %[root], 0x20 \n\t"
161 "movz %[value], %[tmp3], %[tmp2] \n\t"
162 "movz %[root], %[tmp4], %[tmp2] \n\t"
163
164 "addiu %[tmp1], $0, 0x8 \n\t"
165 "addu %[tmp1], %[tmp1], %[root] \n\t"
166 "sll %[tmp1], 3 \n\t"
167 "slt %[tmp2], %[value], %[tmp1] \n\t"
168 "subu %[tmp3], %[value], %[tmp1] \n\t"
169 "ori %[tmp4], %[root], 0x10 \n\t"
170 "movz %[value], %[tmp3], %[tmp2] \n\t"
171 "movz %[root], %[tmp4], %[tmp2] \n\t"
172
173 "addiu %[tmp1], $0, 0x4 \n\t"
174 "addu %[tmp1], %[tmp1], %[root] \n\t"
175 "sll %[tmp1], 2 \n\t"
176 "slt %[tmp2], %[value], %[tmp1] \n\t"
177 "subu %[tmp3], %[value], %[tmp1] \n\t"
178 "ori %[tmp4], %[root], 0x8 \n\t"
179 "movz %[value], %[tmp3], %[tmp2] \n\t"
180 "movz %[root], %[tmp4], %[tmp2] \n\t"
181
182 "addiu %[tmp1], $0, 0x2 \n\t"
183 "addu %[tmp1], %[tmp1], %[root] \n\t"
184 "sll %[tmp1], 1 \n\t"
185 "slt %[tmp2], %[value], %[tmp1] \n\t"
186 "subu %[tmp3], %[value], %[tmp1] \n\t"
187 "ori %[tmp4], %[root], 0x4 \n\t"
188 "movz %[value], %[tmp3], %[tmp2] \n\t"
189 "movz %[root], %[tmp4], %[tmp2] \n\t"
190
191 "addiu %[tmp1], $0, 0x1 \n\t"
192 "addu %[tmp1], %[tmp1], %[root] \n\t"
193 "slt %[tmp2], %[value], %[tmp1] \n\t"
194 "ori %[tmp4], %[root], 0x2 \n\t"
195 "movz %[root], %[tmp4], %[tmp2] \n\t"
196
197 ".set pop \n\t"
198
199 : [root] "+r" (root), [value] "+r" (value),
200 [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
201 [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
202 :
203 );
204
205 return root >> 1;
206 }
207
208