1 /*
2  * Copyright (C) 2005 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_GGL_FIXED_H
18 #define ANDROID_GGL_FIXED_H
19 
20 #include <math.h>
21 #include <pixelflinger/pixelflinger.h>
22 
23 // ----------------------------------------------------------------------------
24 
25 #define CONST           __attribute__((const))
26 #define ALWAYS_INLINE   __attribute__((always_inline))
27 
28 const GGLfixed FIXED_BITS = 16;
29 const GGLfixed FIXED_EPSILON  = 1;
30 const GGLfixed FIXED_ONE  = 1L<<FIXED_BITS;
31 const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1);
32 const GGLfixed FIXED_MIN  = 0x80000000L;
33 const GGLfixed FIXED_MAX  = 0x7FFFFFFFL;
34 
35 inline GGLfixed gglIntToFixed(GGLfixed i)       ALWAYS_INLINE ;
36 inline GGLfixed gglFixedToIntRound(GGLfixed f)  ALWAYS_INLINE ;
37 inline GGLfixed gglFixedToIntFloor(GGLfixed f)  ALWAYS_INLINE ;
38 inline GGLfixed gglFixedToIntCeil(GGLfixed f)   ALWAYS_INLINE ;
39 inline GGLfixed gglFracx(GGLfixed v)            ALWAYS_INLINE ;
40 inline GGLfixed gglFloorx(GGLfixed v)           ALWAYS_INLINE ;
41 inline GGLfixed gglCeilx(GGLfixed v)            ALWAYS_INLINE ;
42 inline GGLfixed gglCenterx(GGLfixed v)          ALWAYS_INLINE ;
43 inline GGLfixed gglRoundx(GGLfixed v)           ALWAYS_INLINE ;
44 
gglIntToFixed(GGLfixed i)45 GGLfixed gglIntToFixed(GGLfixed i) {
46     return i<<FIXED_BITS;
47 }
gglFixedToIntRound(GGLfixed f)48 GGLfixed gglFixedToIntRound(GGLfixed f) {
49     return (f + FIXED_HALF)>>FIXED_BITS;
50 }
gglFixedToIntFloor(GGLfixed f)51 GGLfixed gglFixedToIntFloor(GGLfixed f) {
52     return f>>FIXED_BITS;
53 }
gglFixedToIntCeil(GGLfixed f)54 GGLfixed gglFixedToIntCeil(GGLfixed f) {
55     return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS;
56 }
57 
gglFracx(GGLfixed v)58 GGLfixed gglFracx(GGLfixed v) {
59     return v & ((1<<FIXED_BITS)-1);
60 }
gglFloorx(GGLfixed v)61 GGLfixed gglFloorx(GGLfixed v) {
62     return gglFixedToIntFloor(v)<<FIXED_BITS;
63 }
gglCeilx(GGLfixed v)64 GGLfixed gglCeilx(GGLfixed v) {
65     return gglFixedToIntCeil(v)<<FIXED_BITS;
66 }
gglCenterx(GGLfixed v)67 GGLfixed gglCenterx(GGLfixed v) {
68     return gglFloorx(v + FIXED_HALF) | FIXED_HALF;
69 }
gglRoundx(GGLfixed v)70 GGLfixed gglRoundx(GGLfixed v) {
71     return gglFixedToIntRound(v)<<FIXED_BITS;
72 }
73 
74 // conversion from (unsigned) int, short, byte to fixed...
75 #define GGL_B_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<10 )
76 #define GGL_S_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<2 )
77 #define GGL_I_TO_X(_x)      GGLfixed( ((int32_t(_x)>>1)+1)>>14 )
78 #define GGL_UB_TO_X(_x)     GGLfixed(   uint32_t(_x) +      \
79                                         (uint32_t(_x)<<8) + \
80                                         (uint32_t(_x)>>7) )
81 #define GGL_US_TO_X(_x)     GGLfixed( (_x) + ((_x)>>15) )
82 #define GGL_UI_TO_X(_x)     GGLfixed( (((_x)>>1)+1)>>15 )
83 
84 // ----------------------------------------------------------------------------
85 
86 GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST;
87 GGLfixed gglSqrtx(GGLfixed a) CONST;
88 GGLfixed gglSqrtRecipx(GGLfixed x) CONST;
89 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST;
90 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c);
91 
92 int32_t gglRecipQNormalized(int32_t x, int* exponent);
93 int32_t gglRecipQ(GGLfixed x, int q) CONST;
94 
95 inline GGLfixed gglRecip(GGLfixed x) CONST;
gglRecip(GGLfixed x)96 inline GGLfixed gglRecip(GGLfixed x) {
97     return gglRecipQ(x, 16);
98 }
99 
100 inline GGLfixed gglRecip28(GGLfixed x) CONST;
gglRecip28(GGLfixed x)101 int32_t gglRecip28(GGLfixed x) {
102     return gglRecipQ(x, 28);
103 }
104 
105 // ----------------------------------------------------------------------------
106 
107 #if defined(__arm__) && !defined(__thumb__)
108 
109 // inline ARM implementations
110 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
gglMulx(GGLfixed x,GGLfixed y,int shift)111 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) {
112     GGLfixed result, t;
113     if (__builtin_constant_p(shift)) {
114     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
115         "movs   %[lo], %[lo], lsr %[rshift]         \n"
116         "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
117         : [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x)
118         : "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift)
119         : "cc"
120         );
121     } else {
122     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
123         "movs   %[lo], %[lo], lsr %[rshift]         \n"
124         "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
125         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
126         : "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift)
127         : "cc"
128         );
129     }
130     return result;
131 }
132 
133 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulAddx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)134 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
135     GGLfixed result, t;
136     if (__builtin_constant_p(shift)) {
137     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
138         "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
139         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
140         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
141         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
142         );
143     } else {
144     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
145         "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
146         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
147         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
148         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
149         );
150     }
151     return result;
152 }
153 
154 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulSubx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)155 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
156     GGLfixed result, t;
157     if (__builtin_constant_p(shift)) {
158     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
159         "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
160         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
161         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
162         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
163         );
164     } else {
165     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
166         "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
167         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
168         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
169         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
170         );
171     }
172     return result;
173 }
174 
175 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)176 inline int64_t gglMulii(int32_t x, int32_t y)
177 {
178     // 64-bits result: r0=low, r1=high
179     union {
180         struct {
181             int32_t lo;
182             int32_t hi;
183         } s;
184         int64_t res;
185     };
186     asm("smull %0, %1, %2, %3   \n"
187         : "=r"(s.lo), "=&r"(s.hi)
188         : "%r"(x), "r"(y)
189         :
190         );
191     return res;
192 }
193 #elif defined(__mips__) && __mips_isa_rev < 6
194 
195 /*inline MIPS implementations*/
196 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
gglMulx(GGLfixed a,GGLfixed b,int shift)197 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
198     GGLfixed result,tmp,tmp1,tmp2;
199 
200     if (__builtin_constant_p(shift)) {
201         if (shift == 0) {
202             asm ("mult %[a], %[b] \t\n"
203               "mflo  %[res]   \t\n"
204             : [res]"=&r"(result),[tmp]"=&r"(tmp)
205             : [a]"r"(a),[b]"r"(b)
206             : "%hi","%lo"
207             );
208         } else if (shift == 32)
209         {
210             asm ("mult %[a], %[b] \t\n"
211             "li  %[tmp],1\t\n"
212             "sll  %[tmp],%[tmp],0x1f\t\n"
213             "mflo %[res]   \t\n"
214             "addu %[tmp1],%[tmp],%[res] \t\n"
215             "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
216             "sra %[tmp],%[tmp],0x1f \t\n"
217             "mfhi  %[res]   \t\n"
218             "addu %[res],%[res],%[tmp]\t\n"
219             "addu %[res],%[res],%[tmp1]\t\n"
220             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
221             : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
222             : "%hi","%lo"
223             );
224         } else if ((shift >0) && (shift < 32))
225         {
226             asm ("mult %[a], %[b] \t\n"
227             "li  %[tmp],1 \t\n"
228             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
229             "mflo  %[res]   \t\n"
230             "addu %[tmp1],%[tmp],%[res] \t\n"
231             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
232             "addu  %[res],%[res],%[tmp] \t\n"
233             "mfhi  %[tmp]   \t\n"
234             "addu  %[tmp],%[tmp],%[tmp1] \t\n"
235             "sll   %[tmp],%[tmp],%[lshift] \t\n"
236             "srl   %[res],%[res],%[rshift]    \t\n"
237             "or    %[res],%[res],%[tmp] \t\n"
238             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
239             : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
240             : "%hi","%lo"
241             );
242         } else {
243             asm ("mult %[a], %[b] \t\n"
244             "li  %[tmp],1 \t\n"
245             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
246             "mflo  %[res]   \t\n"
247             "addu %[tmp1],%[tmp],%[res] \t\n"
248             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
249             "sra  %[tmp2],%[tmp],0x1f \t\n"
250             "addu  %[res],%[res],%[tmp] \t\n"
251             "mfhi  %[tmp]   \t\n"
252             "addu  %[tmp],%[tmp],%[tmp2] \t\n"
253             "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
254             "srl   %[tmp2],%[res],%[rshift]    \t\n"
255             "srav  %[res], %[tmp],%[rshift]\t\n"
256             "sll   %[tmp],%[tmp],1 \t\n"
257             "sll   %[tmp],%[tmp],%[norbits] \t\n"
258             "or    %[tmp],%[tmp],%[tmp2] \t\n"
259             "movz  %[res],%[tmp],%[bit5] \t\n"
260             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
261             : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
262             : "%hi","%lo"
263             );
264         }
265     } else {
266         asm ("mult %[a], %[b] \t\n"
267         "li  %[tmp],1 \t\n"
268         "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
269         "mflo  %[res]   \t\n"
270         "addu %[tmp1],%[tmp],%[res] \t\n"
271         "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
272         "sra  %[tmp2],%[tmp],0x1f \t\n"
273         "addu  %[res],%[res],%[tmp] \t\n"
274         "mfhi  %[tmp]   \t\n"
275         "addu  %[tmp],%[tmp],%[tmp2] \t\n"
276         "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
277         "srl   %[tmp2],%[res],%[rshift]    \t\n"
278         "srav  %[res], %[tmp],%[rshift]\t\n"
279         "sll   %[tmp],%[tmp],1 \t\n"
280         "sll   %[tmp],%[tmp],%[norbits] \t\n"
281         "or    %[tmp],%[tmp],%[tmp2] \t\n"
282         "movz  %[res],%[tmp],%[bit5] \t\n"
283          : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
284          : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
285          : "%hi","%lo"
286          );
287         }
288 
289         return result;
290 }
291 
292 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)293 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
294     GGLfixed result,t,tmp1,tmp2;
295 
296     if (__builtin_constant_p(shift)) {
297         if (shift == 0) {
298                  asm ("mult %[a], %[b] \t\n"
299                  "mflo  %[lo]   \t\n"
300                  "addu  %[lo],%[lo],%[c]    \t\n"
301                  : [lo]"=&r"(result)
302                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
303                  : "%hi","%lo"
304                  );
305                 } else if (shift == 32) {
306                     asm ("mult %[a], %[b] \t\n"
307                     "mfhi  %[lo]   \t\n"
308                     "addu  %[lo],%[lo],%[c]    \t\n"
309                     : [lo]"=&r"(result)
310                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
311                     : "%hi","%lo"
312                     );
313                 } else if ((shift>0) && (shift<32)) {
314                     asm ("mult %[a], %[b] \t\n"
315                     "mflo  %[res]   \t\n"
316                     "mfhi  %[t]   \t\n"
317                     "srl   %[res],%[res],%[rshift]    \t\n"
318                     "sll   %[t],%[t],%[lshift]     \t\n"
319                     "or  %[res],%[res],%[t]    \t\n"
320                     "addu  %[res],%[res],%[c]    \t\n"
321                     : [res]"=&r"(result),[t]"=&r"(t)
322                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
323                     : "%hi","%lo"
324                     );
325                 } else {
326                     asm ("mult %[a], %[b] \t\n"
327                     "nor %[tmp1],$zero,%[shift]\t\n"
328                     "mflo  %[res]   \t\n"
329                     "mfhi  %[t]   \t\n"
330                     "srl   %[res],%[res],%[shift]    \t\n"
331                     "sll   %[tmp2],%[t],1     \t\n"
332                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
333                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
334                     "srav  %[res],%[t],%[shift]     \t\n"
335                     "andi %[tmp2],%[shift],0x20\t\n"
336                     "movz %[res],%[tmp1],%[tmp2]\t\n"
337                     "addu  %[res],%[res],%[c]    \t\n"
338                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
339                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
340                     : "%hi","%lo"
341                     );
342                 }
343             } else {
344                 asm ("mult %[a], %[b] \t\n"
345                 "nor %[tmp1],$zero,%[shift]\t\n"
346                 "mflo  %[res]   \t\n"
347                 "mfhi  %[t]   \t\n"
348                 "srl   %[res],%[res],%[shift]    \t\n"
349                 "sll   %[tmp2],%[t],1     \t\n"
350                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
351                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
352                 "srav  %[res],%[t],%[shift]     \t\n"
353                 "andi %[tmp2],%[shift],0x20\t\n"
354                 "movz %[res],%[tmp1],%[tmp2]\t\n"
355                 "addu  %[res],%[res],%[c]    \t\n"
356                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
357                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
358                 : "%hi","%lo"
359                 );
360             }
361             return result;
362 }
363 
364 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)365 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
366     GGLfixed result,t,tmp1,tmp2;
367 
368     if (__builtin_constant_p(shift)) {
369         if (shift == 0) {
370                  asm ("mult %[a], %[b] \t\n"
371                  "mflo  %[lo]   \t\n"
372                  "subu  %[lo],%[lo],%[c]    \t\n"
373                  : [lo]"=&r"(result)
374                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
375                  : "%hi","%lo"
376                  );
377                 } else if (shift == 32) {
378                     asm ("mult %[a], %[b] \t\n"
379                     "mfhi  %[lo]   \t\n"
380                     "subu  %[lo],%[lo],%[c]    \t\n"
381                     : [lo]"=&r"(result)
382                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
383                     : "%hi","%lo"
384                     );
385                 } else if ((shift>0) && (shift<32)) {
386                     asm ("mult %[a], %[b] \t\n"
387                     "mflo  %[res]   \t\n"
388                     "mfhi  %[t]   \t\n"
389                     "srl   %[res],%[res],%[rshift]    \t\n"
390                     "sll   %[t],%[t],%[lshift]     \t\n"
391                     "or  %[res],%[res],%[t]    \t\n"
392                     "subu  %[res],%[res],%[c]    \t\n"
393                     : [res]"=&r"(result),[t]"=&r"(t)
394                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
395                     : "%hi","%lo"
396                     );
397                 } else {
398                     asm ("mult %[a], %[b] \t\n"
399                     "nor %[tmp1],$zero,%[shift]\t\n"
400                      "mflo  %[res]   \t\n"
401                      "mfhi  %[t]   \t\n"
402                      "srl   %[res],%[res],%[shift]    \t\n"
403                      "sll   %[tmp2],%[t],1     \t\n"
404                      "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
405                      "or  %[tmp1],%[tmp2],%[res]    \t\n"
406                      "srav  %[res],%[t],%[shift]     \t\n"
407                      "andi %[tmp2],%[shift],0x20\t\n"
408                      "movz %[res],%[tmp1],%[tmp2]\t\n"
409                      "subu  %[res],%[res],%[c]    \t\n"
410                      : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
411                      : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
412                      : "%hi","%lo"
413                      );
414                     }
415                 } else {
416                 asm ("mult %[a], %[b] \t\n"
417                 "nor %[tmp1],$zero,%[shift]\t\n"
418                 "mflo  %[res]   \t\n"
419                 "mfhi  %[t]   \t\n"
420                 "srl   %[res],%[res],%[shift]    \t\n"
421                 "sll   %[tmp2],%[t],1     \t\n"
422                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
423                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
424                 "srav  %[res],%[t],%[shift]     \t\n"
425                 "andi %[tmp2],%[shift],0x20\t\n"
426                 "movz %[res],%[tmp1],%[tmp2]\t\n"
427                 "subu  %[res],%[res],%[c]    \t\n"
428                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
429                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
430                 : "%hi","%lo"
431                 );
432             }
433     return result;
434 }
435 
436 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)437 inline int64_t gglMulii(int32_t x, int32_t y) {
438     union {
439         struct {
440 #if defined(__MIPSEL__)
441             int32_t lo;
442             int32_t hi;
443 #elif defined(__MIPSEB__)
444             int32_t hi;
445             int32_t lo;
446 #endif
447         } s;
448         int64_t res;
449     }u;
450     asm("mult %2, %3 \t\n"
451         "mfhi %1   \t\n"
452         "mflo %0   \t\n"
453         : "=r"(u.s.lo), "=&r"(u.s.hi)
454         : "%r"(x), "r"(y)
455 	: "%hi","%lo"
456         );
457     return u.res;
458 }
459 
460 #elif defined(__aarch64__)
461 
462 // inline AArch64 implementations
463 
464 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
gglMulx(GGLfixed x,GGLfixed y,int shift)465 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
466 {
467     GGLfixed result;
468     GGLfixed round;
469 
470     asm("mov    %x[round], #1                        \n"
471         "lsl    %x[round], %x[round], %x[shift]      \n"
472         "lsr    %x[round], %x[round], #1             \n"
473         "smaddl %x[result], %w[x], %w[y],%x[round]   \n"
474         "lsr    %x[result], %x[result], %x[shift]    \n"
475         : [round]"=&r"(round), [result]"=&r"(result) \
476         : [x]"r"(x), [y]"r"(y), [shift] "r"(shift)   \
477         :
478        );
479     return result;
480 }
481 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulAddx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)482 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
483 {
484     GGLfixed result;
485     asm("smull  %x[result], %w[x], %w[y]                     \n"
486         "lsr    %x[result], %x[result], %x[shift]            \n"
487         "add    %w[result], %w[result], %w[a]                \n"
488         : [result]"=&r"(result)                               \
489         : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
490         :
491         );
492     return result;
493 }
494 
495 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulSubx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)496 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
497 {
498 
499     GGLfixed result;
500     int rshift;
501 
502     asm("smull  %x[result], %w[x], %w[y]                     \n"
503         "lsr    %x[result], %x[result], %x[shift]            \n"
504         "sub    %w[result], %w[result], %w[a]                \n"
505         : [result]"=&r"(result)                               \
506         : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
507         :
508         );
509     return result;
510 }
511 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)512 inline int64_t gglMulii(int32_t x, int32_t y)
513 {
514     int64_t res;
515     asm("smull  %x0, %w1, %w2 \n"
516         : "=r"(res)
517         : "%r"(x), "r"(y)
518         :
519         );
520     return res;
521 }
522 
523 #elif defined(__mips__) && __mips_isa_rev == 6
524 
525 /*inline MIPS implementations*/
526 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
gglMulx(GGLfixed a,GGLfixed b,int shift)527 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
528     GGLfixed result,tmp,tmp1,tmp2;
529 
530     if (__builtin_constant_p(shift)) {
531         if (shift == 0) {
532             asm ("mul %[res], %[a], %[b] \t\n"
533             : [res]"=&r"(result)
534             : [a]"r"(a),[b]"r"(b)
535             );
536         } else if (shift == 32)
537         {
538             asm ("mul %[res], %[a], %[b] \t\n"
539             "li  %[tmp],1\t\n"
540             "sll  %[tmp],%[tmp],0x1f\t\n"
541             "addu %[tmp1],%[tmp],%[res] \t\n"
542             "muh %[res], %[a], %[b] \t\n"
543             "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
544             "sra %[tmp],%[tmp],0x1f \t\n"
545             "addu %[res],%[res],%[tmp]\t\n"
546             "addu %[res],%[res],%[tmp1]\t\n"
547             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
548             : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
549             );
550         } else if ((shift >0) && (shift < 32))
551         {
552             asm ("mul %[res], %[a], %[b] \t\n"
553             "li  %[tmp],1 \t\n"
554             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
555             "addu %[tmp1],%[tmp],%[res] \t\n"
556             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
557             "addu  %[res],%[res],%[tmp] \t\n"
558             "muh %[tmp], %[a], %[b] \t\n"
559             "addu  %[tmp],%[tmp],%[tmp1] \t\n"
560             "sll   %[tmp],%[tmp],%[lshift] \t\n"
561             "srl   %[res],%[res],%[rshift]    \t\n"
562             "or    %[res],%[res],%[tmp] \t\n"
563             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
564             : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
565             );
566         } else {
567             asm ("mul %[res], %[a], %[b] \t\n"
568             "li  %[tmp],1 \t\n"
569             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
570             "addu %[tmp1],%[tmp],%[res] \t\n"
571             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
572             "sra  %[tmp2],%[tmp],0x1f \t\n"
573             "addu  %[res],%[res],%[tmp] \t\n"
574             "muh  %[tmp], %[a], %[b]   \t\n"
575             "addu  %[tmp],%[tmp],%[tmp2] \t\n"
576             "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
577             "srl   %[tmp2],%[res],%[rshift]    \t\n"
578             "srav  %[res], %[tmp],%[rshift]\t\n"
579             "sll   %[tmp],%[tmp],1 \t\n"
580             "sll   %[tmp],%[tmp],%[norbits] \t\n"
581             "or    %[tmp],%[tmp],%[tmp2] \t\n"
582             "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
583             "selnez  %[res],%[res],%[bit5] \t\n"
584             "or    %[res],%[res],%[tmp] \t\n"
585             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
586             : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
587             );
588         }
589     } else {
590         asm ("mul %[res], %[a], %[b] \t\n"
591         "li  %[tmp],1 \t\n"
592         "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
593         "addu %[tmp1],%[tmp],%[res] \t\n"
594         "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
595         "sra  %[tmp2],%[tmp],0x1f \t\n"
596         "addu  %[res],%[res],%[tmp] \t\n"
597         "muh  %[tmp], %[a], %[b] \t\n"
598         "addu  %[tmp],%[tmp],%[tmp2] \t\n"
599         "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
600         "srl   %[tmp2],%[res],%[rshift]    \t\n"
601         "srav  %[res], %[tmp],%[rshift]\t\n"
602         "sll   %[tmp],%[tmp],1 \t\n"
603         "sll   %[tmp],%[tmp],%[norbits] \t\n"
604         "or    %[tmp],%[tmp],%[tmp2] \t\n"
605         "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
606         "selnez  %[res],%[res],%[bit5] \t\n"
607         "or    %[res],%[res],%[tmp] \t\n"
608          : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
609          : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
610          );
611         }
612         return result;
613 }
614 
615 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)616 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
617     GGLfixed result,t,tmp1,tmp2;
618 
619     if (__builtin_constant_p(shift)) {
620         if (shift == 0) {
621                  asm ("mul %[lo], %[a], %[b] \t\n"
622                  "addu  %[lo],%[lo],%[c]    \t\n"
623                  : [lo]"=&r"(result)
624                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
625                  );
626                 } else if (shift == 32) {
627                     asm ("muh %[lo], %[a], %[b] \t\n"
628                     "addu  %[lo],%[lo],%[c]    \t\n"
629                     : [lo]"=&r"(result)
630                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
631                     );
632                 } else if ((shift>0) && (shift<32)) {
633                     asm ("mul %[res], %[a], %[b] \t\n"
634                     "muh  %[t], %[a], %[b] \t\n"
635                     "srl   %[res],%[res],%[rshift]    \t\n"
636                     "sll   %[t],%[t],%[lshift]     \t\n"
637                     "or  %[res],%[res],%[t]    \t\n"
638                     "addu  %[res],%[res],%[c]    \t\n"
639                     : [res]"=&r"(result),[t]"=&r"(t)
640                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
641                     );
642                 } else {
643                     asm ("mul %[res], %[a], %[b] \t\n"
644                     "muh %[t], %[a], %[b] \t\n"
645                     "nor %[tmp1],$zero,%[shift]\t\n"
646                     "srl   %[res],%[res],%[shift]    \t\n"
647                     "sll   %[tmp2],%[t],1     \t\n"
648                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
649                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
650                     "srav  %[res],%[t],%[shift]     \t\n"
651                     "andi %[tmp2],%[shift],0x20\t\n"
652                     "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
653                     "selnez %[res],%[res],%[tmp2]\t\n"
654                     "or %[res],%[res],%[tmp1]\t\n"
655                     "addu  %[res],%[res],%[c]    \t\n"
656                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
657                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
658                     );
659                 }
660             } else {
661                 asm ("mul %[res], %[a], %[b] \t\n"
662                 "muh %[t], %[a], %[b] \t\n"
663                 "nor %[tmp1],$zero,%[shift]\t\n"
664                 "srl   %[res],%[res],%[shift]    \t\n"
665                 "sll   %[tmp2],%[t],1     \t\n"
666                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
667                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
668                 "srav  %[res],%[t],%[shift]     \t\n"
669                 "andi %[tmp2],%[shift],0x20\t\n"
670                 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
671                 "selnez %[res],%[res],%[tmp2]\t\n"
672                 "or %[res],%[res],%[tmp1]\t\n"
673                 "addu  %[res],%[res],%[c]    \t\n"
674                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
675                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
676                 );
677             }
678             return result;
679 }
680 
681 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)682 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
683     GGLfixed result,t,tmp1,tmp2;
684 
685     if (__builtin_constant_p(shift)) {
686         if (shift == 0) {
687                  asm ("mul %[lo], %[a], %[b] \t\n"
688                  "subu  %[lo],%[lo],%[c]    \t\n"
689                  : [lo]"=&r"(result)
690                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
691                  );
692                 } else if (shift == 32) {
693                     asm ("muh %[lo], %[a], %[b] \t\n"
694                     "subu  %[lo],%[lo],%[c]    \t\n"
695                     : [lo]"=&r"(result)
696                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
697                     );
698                 } else if ((shift>0) && (shift<32)) {
699                     asm ("mul %[res], %[a], %[b] \t\n"
700                     "muh %[t], %[a], %[b] \t\n"
701                     "srl   %[res],%[res],%[rshift]    \t\n"
702                     "sll   %[t],%[t],%[lshift]     \t\n"
703                     "or  %[res],%[res],%[t]    \t\n"
704                     "subu  %[res],%[res],%[c]    \t\n"
705                     : [res]"=&r"(result),[t]"=&r"(t)
706                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
707                     );
708                 } else {
709                     asm ("mul %[res], %[a], %[b] \t\n"
710                     "muh %[t], %[a], %[b] \t\n"
711                     "nor %[tmp1],$zero,%[shift]\t\n"
712                     "srl   %[res],%[res],%[shift]    \t\n"
713                     "sll   %[tmp2],%[t],1     \t\n"
714                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
715                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
716                     "srav  %[res],%[t],%[shift]     \t\n"
717                     "andi %[tmp2],%[shift],0x20\t\n"
718                     "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
719                     "selnez %[res],%[res],%[tmp2]\t\n"
720                     "or %[res],%[res],%[tmp1]\t\n"
721                     "subu  %[res],%[res],%[c]    \t\n"
722                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
723                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
724                      );
725                     }
726                 } else {
727                 asm ("mul %[res], %[a], %[b] \t\n"
728                 "muh %[t], %[a], %[b] \t\n"
729                 "nor %[tmp1],$zero,%[shift]\t\n"
730                 "srl   %[res],%[res],%[shift]    \t\n"
731                 "sll   %[tmp2],%[t],1     \t\n"
732                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
733                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
734                 "srav  %[res],%[t],%[shift]     \t\n"
735                 "andi %[tmp2],%[shift],0x20\t\n"
736                 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
737                 "selnez %[res],%[res],%[tmp2]\t\n"
738                 "or %[res],%[res],%[tmp1]\t\n"
739                 "subu  %[res],%[res],%[c]    \t\n"
740                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
741                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
742                 );
743             }
744     return result;
745 }
746 
747 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)748 inline int64_t gglMulii(int32_t x, int32_t y) {
749     union {
750         struct {
751 #if defined(__MIPSEL__)
752             int32_t lo;
753             int32_t hi;
754 #elif defined(__MIPSEB__)
755             int32_t hi;
756             int32_t lo;
757 #endif
758         } s;
759         int64_t res;
760     }u;
761     asm("mul %0, %2, %3 \t\n"
762         "muh %1, %2, %3 \t\n"
763         : "=r"(u.s.lo), "=&r"(u.s.hi)
764         : "%r"(x), "r"(y)
765         );
766     return u.res;
767 }
768 
769 #else // ----------------------------------------------------------------------
770 
771 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
gglMulx(GGLfixed a,GGLfixed b,int shift)772 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
773     return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift);
774 }
775 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)776 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
777     return GGLfixed((int64_t(a)*b)>>shift) + c;
778 }
779 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)780 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
781     return GGLfixed((int64_t(a)*b)>>shift) - c;
782 }
783 inline int64_t gglMulii(int32_t a, int32_t b) CONST;
gglMulii(int32_t a,int32_t b)784 inline int64_t gglMulii(int32_t a, int32_t b) {
785     return int64_t(a)*b;
786 }
787 
788 #endif
789 
790 // ------------------------------------------------------------------------
791 
792 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST;
gglMulx(GGLfixed a,GGLfixed b)793 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) {
794     return gglMulx(a, b, 16);
795 }
796 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c)797 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) {
798     return gglMulAddx(a, b, c, 16);
799 }
800 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c)801 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
802     return gglMulSubx(a, b, c, 16);
803 }
804 
805 // ------------------------------------------------------------------------
806 
807 inline int32_t gglClz(int32_t x) CONST;
gglClz(int32_t x)808 inline int32_t gglClz(int32_t x)
809 {
810 #if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__)
811     return __builtin_clz(x);
812 #else
813     if (!x) return 32;
814     int32_t exp = 31;
815     if (x & 0xFFFF0000) { exp -=16; x >>= 16; }
816     if (x & 0x0000ff00) { exp -= 8; x >>= 8; }
817     if (x & 0x000000f0) { exp -= 4; x >>= 4; }
818     if (x & 0x0000000c) { exp -= 2; x >>= 2; }
819     if (x & 0x00000002) { exp -= 1; }
820     return exp;
821 #endif
822 }
823 
824 // ------------------------------------------------------------------------
825 
826 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST;
827 
828 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST;
gglDivQ16(GGLfixed n,GGLfixed d)829 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) {
830     return gglDivQ(n, d, 16);
831 }
832 
833 inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST;
gglDivx(GGLfixed n,GGLfixed d)834 inline int32_t gglDivx(GGLfixed n, GGLfixed d) {
835     return gglDivQ(n, d, 16);
836 }
837 
838 // ------------------------------------------------------------------------
839 
840 inline GGLfixed gglRecipFast(GGLfixed x) CONST;
gglRecipFast(GGLfixed x)841 inline GGLfixed gglRecipFast(GGLfixed x)
842 {
843     // This is a really bad approximation of 1/x, but it's also
844     // very fast. x must be strictly positive.
845     // if x between [0.5, 1[ , then 1/x = 3-2*x
846     // (we use 2.30 fixed-point)
847     const int32_t lz = gglClz(x);
848     return (0xC0000000 - (x << (lz - 1))) >> (30-lz);
849 }
850 
851 // ------------------------------------------------------------------------
852 
853 inline GGLfixed gglClampx(GGLfixed c) CONST;
gglClampx(GGLfixed c)854 inline GGLfixed gglClampx(GGLfixed c)
855 {
856 #if defined(__thumb__)
857     // clamp without branches
858     c &= ~(c>>31);  c = FIXED_ONE - c;
859     c &= ~(c>>31);  c = FIXED_ONE - c;
860 #else
861 #if defined(__arm__)
862     // I don't know why gcc thinks its smarter than me! The code below
863     // clamps to zero in one instruction, but gcc won't generate it and
864     // replace it by a cmp + movlt (it's quite amazing actually).
865     asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
866 #elif defined(__aarch64__)
867     asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
868 #else
869     c &= ~(c>>31);
870 #endif
871     if (c>FIXED_ONE)
872         c = FIXED_ONE;
873 #endif
874     return c;
875 }
876 
877 // ------------------------------------------------------------------------
878 
879 #endif // ANDROID_GGL_FIXED_H
880