1 /************************************************************************
2  * Copyright (C) 2002-2009, Xiph.org Foundation
3  * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following disclaimer
14  * in the documentation and/or other materials provided with the
15  * distribution.
16  *     * Neither the names of the Xiph.org Foundation nor Pinknoise
17  * Productions Ltd nor the names of its contributors may be used to
18  * endorse or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  ************************************************************************
33 
34  function: arm7 and later wide math functions
35 
36  ************************************************************************/
37 
38 #ifdef _ARM_ASSEM_
39 
40 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
41 #define _V_WIDE_MATH
42 
MULT32(ogg_int32_t x,ogg_int32_t y)43 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
44   int lo,hi;
45   asm volatile("smull\t%0, %1, %2, %3"
46                : "=&r"(lo),"=&r"(hi)
47                : "%r"(x),"r"(y)
48 	       : "cc");
49   return(hi);
50 }
51 
MULT31(ogg_int32_t x,ogg_int32_t y)52 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
53   return MULT32(x,y)<<1;
54 }
55 
MULT31_SHIFT15(ogg_int32_t x,ogg_int32_t y)56 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
57   int lo,hi;
58   asm volatile("smull	%0, %1, %2, %3\n\t"
59 	       "movs	%0, %0, lsr #15\n\t"
60 	       "adc	%1, %0, %1, lsl #17\n\t"
61                : "=&r"(lo),"=&r"(hi)
62                : "%r"(x),"r"(y)
63 	       : "cc");
64   return(hi);
65 }
66 
67 #define MB() asm volatile ("" : : : "memory")
68 
XPROD32(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)69 static inline void XPROD32(ogg_int32_t  a, ogg_int32_t  b,
70 			   ogg_int32_t  t, ogg_int32_t  v,
71 			   ogg_int32_t *x, ogg_int32_t *y)
72 {
73   int x1, y1, l;
74   asm(	"smull	%0, %1, %4, %6\n\t"
75 	"smlal	%0, %1, %5, %7\n\t"
76 	"rsb	%3, %4, #0\n\t"
77 	"smull	%0, %2, %5, %6\n\t"
78 	"smlal	%0, %2, %3, %7"
79 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
80 	: "3" (a), "r" (b), "r" (t), "r" (v)
81 	: "cc" );
82   *x = x1;
83   MB();
84   *y = y1;
85 }
86 
87 /* x = (a*t + b*v)>>31,    y = (b*t - a*v)>>31 */
XPROD31(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)88 static inline void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
89 			   ogg_int32_t  t, ogg_int32_t  v,
90 			   ogg_int32_t *x, ogg_int32_t *y)
91 {
92   int x1, y1, l;
93   asm(	"smull	%0, %1, %4, %6\n\t"
94 	"smlal	%0, %1, %5, %7\n\t"
95 	"rsb	%3, %4, #0\n\t"
96 	"smull	%0, %2, %5, %6\n\t"
97 	"smlal	%0, %2, %3, %7"
98 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
99 	: "3" (a), "r" (b), "r" (t), "r" (v)
100 	: "cc" );
101   *x = x1 << 1;
102   MB();
103   *y = y1 << 1;
104 }
105 
106 /* x = (a*t - b*v)>>31,     y = (b*t + a*v)>>31 */
XNPROD31(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)107 static inline void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
108 			    ogg_int32_t  t, ogg_int32_t  v,
109 			    ogg_int32_t *x, ogg_int32_t *y)
110 {
111   int x1, y1, l;
112   asm(	"rsb	%2, %4, #0\n\t"
113 	"smull	%0, %1, %3, %5\n\t"
114 	"smlal	%0, %1, %2, %6\n\t"
115 	"smull	%0, %2, %4, %5\n\t"
116 	"smlal	%0, %2, %3, %6"
117 	: "=&r" (l), "=&r" (x1), "=&r" (y1)
118 	: "r" (a), "r" (b), "r" (t), "r" (v)
119 	: "cc" );
120   *x = x1 << 1;
121   MB();
122   *y = y1 << 1;
123 }
124 
125 #endif
126 
127 #ifndef _V_CLIP_MATH
128 #define _V_CLIP_MATH
129 
CLIP_TO_15(ogg_int32_t x)130 static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
131   int tmp;
132   asm volatile("subs	%1, %0, #32768\n\t"
133 	       "movpl	%0, #0x7f00\n\t"
134 	       "orrpl	%0, %0, #0xff\n"
135 	       "adds	%1, %0, #32768\n\t"
136 	       "movmi	%0, #0x8000"
137 	       : "+r"(x),"=r"(tmp)
138 	       :
139 	       : "cc");
140   return(x);
141 }
142 
143 #endif
144 
145 #ifndef _V_LSP_MATH_ASM
146 #define _V_LSP_MATH_ASM
147 
lsp_loop_asm(ogg_uint32_t * qip,ogg_uint32_t * pip,ogg_int32_t * qexpp,ogg_int32_t * ilsp,ogg_int32_t wi,ogg_int32_t m)148 static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
149 				ogg_int32_t *qexpp,
150 				ogg_int32_t *ilsp,ogg_int32_t wi,
151 				ogg_int32_t m){
152 
153   ogg_uint32_t qi=*qip,pi=*pip;
154   ogg_int32_t qexp=*qexpp;
155 
156   asm("mov     r0,%3;"
157       "mov     r1,%5,asr#1;"
158       "add     r0,r0,r1,lsl#3;"
159       "1:"
160 
161       "ldmdb   r0!,{r1,r3};"
162       "subs    r1,r1,%4;"          //ilsp[j]-wi
163       "rsbmi   r1,r1,#0;"          //labs(ilsp[j]-wi)
164       "umull   %0,r2,r1,%0;"       //qi*=labs(ilsp[j]-wi)
165 
166       "subs    r1,r3,%4;"          //ilsp[j+1]-wi
167       "rsbmi   r1,r1,#0;"          //labs(ilsp[j+1]-wi)
168       "umull   %1,r3,r1,%1;"       //pi*=labs(ilsp[j+1]-wi)
169 
170       "cmn     r2,r3;"             // shift down 16?
171       "beq     0f;"
172       "add     %2,%2,#16;"
173       "mov     %0,%0,lsr #16;"
174       "orr     %0,%0,r2,lsl #16;"
175       "mov     %1,%1,lsr #16;"
176       "orr     %1,%1,r3,lsl #16;"
177       "0:"
178       "cmp     r0,%3;\n"
179       "bhi     1b;\n"
180 
181       // odd filter assymetry
182       "ands    r0,%5,#1;\n"
183       "beq     2f;\n"
184       "add     r0,%3,%5,lsl#2;\n"
185 
186       "ldr     r1,[r0,#-4];\n"
187       "mov     r0,#0x4000;\n"
188 
189       "subs    r1,r1,%4;\n"          //ilsp[j]-wi
190       "rsbmi   r1,r1,#0;\n"          //labs(ilsp[j]-wi)
191       "umull   %0,r2,r1,%0;\n"       //qi*=labs(ilsp[j]-wi)
192       "umull   %1,r3,r0,%1;\n"       //pi*=labs(ilsp[j+1]-wi)
193 
194       "cmn     r2,r3;\n"             // shift down 16?
195       "beq     2f;\n"
196       "add     %2,%2,#16;\n"
197       "mov     %0,%0,lsr #16;\n"
198       "orr     %0,%0,r2,lsl #16;\n"
199       "mov     %1,%1,lsr #16;\n"
200       "orr     %1,%1,r3,lsl #16;\n"
201 
202       //qi=(pi>>shift)*labs(ilsp[j]-wi);
203       //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
204       //qexp+=shift;
205 
206       //}
207 
208       /* normalize to max 16 sig figs */
209       "2:"
210       "mov     r2,#0;"
211       "orr     r1,%0,%1;"
212       "tst     r1,#0xff000000;"
213       "addne   r2,r2,#8;"
214       "movne   r1,r1,lsr #8;"
215       "tst     r1,#0x00f00000;"
216       "addne   r2,r2,#4;"
217       "movne   r1,r1,lsr #4;"
218       "tst     r1,#0x000c0000;"
219       "addne   r2,r2,#2;"
220       "movne   r1,r1,lsr #2;"
221       "tst     r1,#0x00020000;"
222       "addne   r2,r2,#1;"
223       "movne   r1,r1,lsr #1;"
224       "tst     r1,#0x00010000;"
225       "addne   r2,r2,#1;"
226       "mov     %0,%0,lsr r2;"
227       "mov     %1,%1,lsr r2;"
228       "add     %2,%2,r2;"
229 
230       : "+r"(qi),"+r"(pi),"+r"(qexp)
231       : "r"(ilsp),"r"(wi),"r"(m)
232       : "r0","r1","r2","r3","cc");
233 
234   *qip=qi;
235   *pip=pi;
236   *qexpp=qexp;
237 }
238 
lsp_norm_asm(ogg_uint32_t * qip,ogg_int32_t * qexpp)239 static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
240 
241   ogg_uint32_t qi=*qip;
242   ogg_int32_t qexp=*qexpp;
243 
244   asm("tst     %0,#0x0000ff00;"
245       "moveq   %0,%0,lsl #8;"
246       "subeq   %1,%1,#8;"
247       "tst     %0,#0x0000f000;"
248       "moveq   %0,%0,lsl #4;"
249       "subeq   %1,%1,#4;"
250       "tst     %0,#0x0000c000;"
251       "moveq   %0,%0,lsl #2;"
252       "subeq   %1,%1,#2;"
253       "tst     %0,#0x00008000;"
254       "moveq   %0,%0,lsl #1;"
255       "subeq   %1,%1,#1;"
256       : "+r"(qi),"+r"(qexp)
257       :
258       : "cc");
259   *qip=qi;
260   *qexpp=qexp;
261 }
262 
263 #endif
264 #endif
265 
266