1@
2@ Written by Wilco Dijkstra, 1996. The following email exchange establishes the
3@ license.
4@
5@ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
6@ Date: Fri, Jun 24, 2011 at 3:20 AM
7@ Subject: Re: sqrt routine
8@ To: Kevin Ma <kma@google.com>
9@ Hi Kevin,
10@ Thanks for asking. Those routines are public domain (originally posted to
11@ comp.sys.arm a long time ago), so you can use them freely for any purpose.
12@ Cheers,
13@ Wilco
14@
15@ ----- Original Message -----
16@ From: "Kevin Ma" <kma@google.com>
17@ To: <Wilco.Dijkstra@ntlworld.com>
18@ Sent: Thursday, June 23, 2011 11:44 PM
19@ Subject: Fwd: sqrt routine
20@ Hi Wilco,
21@ I saw your sqrt routine from several web sites, including
22@ http://www.finesse.demon.co.uk/steven/sqrt.html.
23@ Just wonder if there's any copyright information with your Successive
24@ approximation routines, or if I can freely use it for any purpose.
25@ Thanks.
26@ Kevin
27
28@ Minor modifications in code style for WebRTC, 2012.
29@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
30
31@ Input :             r0 32 bit unsigned integer
32@ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
33@ Registers touched:  r1, r2
34
35#include "webrtc/system_wrappers/include/asm_defines.h"
36
37GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
38.align  2
39DEFINE_FUNCTION WebRtcSpl_SqrtFloor
40  mov    r1, #3 << 30
41  mov    r2, #1 << 30
42
43  @ unroll for i = 0 .. 15
44
45  cmp    r0, r2, ror #2 * 0
46  subhs  r0, r0, r2, ror #2 * 0
47  adc    r2, r1, r2, lsl #1
48
49  cmp    r0, r2, ror #2 * 1
50  subhs  r0, r0, r2, ror #2 * 1
51  adc    r2, r1, r2, lsl #1
52
53  cmp    r0, r2, ror #2 * 2
54  subhs  r0, r0, r2, ror #2 * 2
55  adc    r2, r1, r2, lsl #1
56
57  cmp    r0, r2, ror #2 * 3
58  subhs  r0, r0, r2, ror #2 * 3
59  adc    r2, r1, r2, lsl #1
60
61  cmp    r0, r2, ror #2 * 4
62  subhs  r0, r0, r2, ror #2 * 4
63  adc    r2, r1, r2, lsl #1
64
65  cmp    r0, r2, ror #2 * 5
66  subhs  r0, r0, r2, ror #2 * 5
67  adc    r2, r1, r2, lsl #1
68
69  cmp    r0, r2, ror #2 * 6
70  subhs  r0, r0, r2, ror #2 * 6
71  adc    r2, r1, r2, lsl #1
72
73  cmp    r0, r2, ror #2 * 7
74  subhs  r0, r0, r2, ror #2 * 7
75  adc    r2, r1, r2, lsl #1
76
77  cmp    r0, r2, ror #2 * 8
78  subhs  r0, r0, r2, ror #2 * 8
79  adc    r2, r1, r2, lsl #1
80
81  cmp    r0, r2, ror #2 * 9
82  subhs  r0, r0, r2, ror #2 * 9
83  adc    r2, r1, r2, lsl #1
84
85  cmp    r0, r2, ror #2 * 10
86  subhs  r0, r0, r2, ror #2 * 10
87  adc    r2, r1, r2, lsl #1
88
89  cmp    r0, r2, ror #2 * 11
90  subhs  r0, r0, r2, ror #2 * 11
91  adc    r2, r1, r2, lsl #1
92
93  cmp    r0, r2, ror #2 * 12
94  subhs  r0, r0, r2, ror #2 * 12
95  adc    r2, r1, r2, lsl #1
96
97  cmp    r0, r2, ror #2 * 13
98  subhs  r0, r0, r2, ror #2 * 13
99  adc    r2, r1, r2, lsl #1
100
101  cmp    r0, r2, ror #2 * 14
102  subhs  r0, r0, r2, ror #2 * 14
103  adc    r2, r1, r2, lsl #1
104
105  cmp    r0, r2, ror #2 * 15
106  subhs  r0, r0, r2, ror #2 * 15
107  adc    r2, r1, r2, lsl #1
108
109  bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
110  bx lr
111