1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /*********************************************************************************/
19 /*  Filename: fastquant_inline.h                                                        */
20 /*  Description: Implementation for in-line functions used in dct.cpp           */
21 /*  Modified:                                                                   */
22 /*********************************************************************************/
23 #ifndef _FASTQUANT_INLINE_H_
24 #define _FASTQUANT_INLINE_H_
25 
26 #include "mp4def.h"
27 
28 #if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4) /* ARM GNU COMPILER  */
29 
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)30 __inline int32 aan_scale(int32 q_value, int32 coeff, int32 round, int32 QPdiv2)
31 {
32     q_value = coeff * q_value + round;
33     coeff = q_value >> 16;
34     if (coeff < 0)  coeff += QPdiv2;
35     else            coeff -= QPdiv2;
36 
37     return coeff;
38 }
39 
40 
coeff_quant(int32 coeff,int32 q_scale,int32 shift)41 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
42 {
43     int32 q_value;
44 
45     q_value = coeff * q_scale;      //q_value = -((-(coeff + QPdiv2)*q_scale)>>LSL);
46     q_value >>= shift;                  //q_value = (((coeff - QPdiv2)*q_scale)>>LSL );
47     q_value += ((UInt)q_value >> 31); /* add one if negative */
48 
49     return q_value;
50 }
51 
coeff_clip(int32 q_value,int32 ac_clip)52 __inline int32  coeff_clip(int32 q_value, int32 ac_clip)
53 {
54     int32 coeff = q_value + ac_clip;
55 
56     if ((UInt)coeff > (UInt)(ac_clip << 1))
57         q_value = ac_clip ^(q_value >> 31);
58 
59     return q_value;
60 }
61 
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)62 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
63 {
64     int32 coeff;
65 
66     OSCL_UNUSED_ARG(tmp);
67 
68     if (q_value < 0)
69     {
70         coeff = q_value * QPx2 - Addition;
71         if (coeff < -2048)
72             coeff = -2048;
73     }
74     else
75     {
76         coeff = q_value * QPx2 + Addition;
77         if (coeff > 2047)
78             coeff = 2047;
79     }
80     return coeff;
81 }
82 
smlabb(int32 q_value,int32 coeff,int32 round)83 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
84 {
85     q_value = coeff * q_value + round;
86 
87     return q_value;
88 }
89 
smulbb(int32 q_scale,int32 coeff)90 __inline int32 smulbb(int32 q_scale, int32 coeff)
91 {
92     int32 q_value;
93 
94     q_value = coeff * q_scale;
95 
96     return q_value;
97 }
98 
aan_dc_scale(int32 coeff,int32 QP)99 __inline int32 aan_dc_scale(int32 coeff, int32 QP)
100 {
101 
102     if (coeff < 0)  coeff += (QP >> 1);
103     else            coeff -= (QP >> 1);
104 
105     return coeff;
106 }
107 
clip_2047(int32 q_value,int32 tmp)108 __inline int32 clip_2047(int32 q_value, int32 tmp)
109 {
110     OSCL_UNUSED_ARG(tmp);
111 
112     if (q_value < -2048)
113     {
114         q_value = -2048;
115     }
116     else if (q_value > 2047)
117     {
118         q_value = 2047;
119     }
120 
121     return q_value;
122 }
123 
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)124 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
125 {
126     int32 coeff;
127 
128     OSCL_UNUSED_ARG(tmp);
129 
130     coeff = q_value << 1;
131     stepsize *= QP;
132     if (coeff > 0)
133     {
134         q_value = (coeff + 1) * stepsize;
135         q_value >>= 4;
136         if (q_value > 2047) q_value = 2047;
137     }
138     else
139     {
140         q_value = (coeff - 1) * stepsize;
141         q_value += 15;
142         q_value >>= 4;
143         if (q_value < -2048)    q_value = -2048;
144     }
145 
146     return q_value;
147 }
148 
coeff_dequant_mpeg_intra(int32 q_value,int32 tmp)149 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
150 {
151     OSCL_UNUSED_ARG(tmp);
152 
153     q_value <<= 1;
154     if (q_value > 0)
155     {
156         q_value >>= 4;
157         if (q_value > 2047) q_value = 2047;
158     }
159     else
160     {
161         q_value += 15;
162         q_value >>= 4;
163         if (q_value < -2048) q_value = -2048;
164     }
165 
166     return q_value;
167 }
168 
169 #elif defined(__CC_ARM)  /* only work with arm v5 */
170 
171 #if defined(__TARGET_ARCH_5TE)
172 
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)173 __inline int32 aan_scale(int32 q_value, int32 coeff,
174                          int32 round, int32 QPdiv2)
175 {
176     __asm
177     {
178         smlabb q_value, coeff, q_value, round
179         movs       coeff, q_value, asr #16
180         addle   coeff, coeff, QPdiv2
181         subgt   coeff, coeff, QPdiv2
182     }
183 
184     return coeff;
185 }
186 
coeff_quant(int32 coeff,int32 q_scale,int32 shift)187 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
188 {
189     int32 q_value;
190 
191     __asm
192     {
193         smulbb  q_value, q_scale, coeff    /*mov    coeff, coeff, lsl #14*/
194         mov     coeff, q_value, asr shift   /*smull tmp, coeff, q_scale, coeff*/
195         add q_value, coeff, coeff, lsr #31
196     }
197 
198 
199     return q_value;
200 }
201 
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)202 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
203 {
204     int32 coeff;
205 
206     __asm
207     {
208         cmp     q_value, #0
209         smulbb  coeff, q_value, QPx2
210         sublt   coeff, coeff, Addition
211         addge   coeff, coeff, Addition
212         add     q_value, coeff, tmp
213         subs    q_value, q_value, #3840
214         subcss  q_value, q_value, #254
215         eorhi   coeff, tmp, coeff, asr #31
216     }
217 
218     return coeff;
219 }
220 
smlabb(int32 q_value,int32 coeff,int32 round)221 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
222 {
223     __asm
224     {
225         smlabb q_value, coeff, q_value, round
226     }
227 
228     return q_value;
229 }
230 
smulbb(int32 q_scale,int32 coeff)231 __inline int32 smulbb(int32 q_scale, int32 coeff)
232 {
233     int32 q_value;
234 
235     __asm
236     {
237         smulbb  q_value, q_scale, coeff
238     }
239 
240     return q_value;
241 }
242 
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)243 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
244 {
245     /* tmp must have value of 2047 */
246     int32 coeff;
247     __asm
248     {
249         movs    coeff, q_value, lsl #1
250         smulbb  stepsize, stepsize, QP
251         addgt   coeff, coeff, #1
252         sublt   coeff, coeff, #1
253         smulbb  q_value, coeff, stepsize
254         addlt   q_value, q_value, #15
255         mov     q_value, q_value, asr #4
256         add     coeff, q_value, tmp
257         subs    coeff, coeff, #0xf00
258         subcss  coeff, coeff, #0xfe
259         eorhi   q_value, tmp, q_value, asr #31
260     }
261 
262     return q_value;
263 }
264 
265 
266 #else // not ARMV5TE
267 
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)268 __inline int32 aan_scale(int32 q_value, int32 coeff,
269                          int32 round, int32 QPdiv2)
270 {
271     __asm
272     {
273         mla q_value, coeff, q_value, round
274         movs       coeff, q_value, asr #16
275         addle   coeff, coeff, QPdiv2
276         subgt   coeff, coeff, QPdiv2
277     }
278 
279     return coeff;
280 }
281 
coeff_quant(int32 coeff,int32 q_scale,int32 shift)282 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
283 {
284     int32 q_value;
285 
286     __asm
287     {
288         mul q_value, q_scale, coeff    /*mov    coeff, coeff, lsl #14*/
289         mov     coeff, q_value, asr shift   /*smull tmp, coeff, q_scale, coeff*/
290         add q_value, coeff, coeff, lsr #31
291     }
292 
293 
294     return q_value;
295 }
296 
297 
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)298 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
299 {
300     int32 coeff;
301 
302     __asm
303     {
304         cmp     q_value, #0
305         mul coeff, q_value, QPx2
306         sublt   coeff, coeff, Addition
307         addge   coeff, coeff, Addition
308         add     q_value, coeff, tmp
309         subs    q_value, q_value, #3840
310         subcss  q_value, q_value, #254
311         eorhi   coeff, tmp, coeff, asr #31
312     }
313 
314     return coeff;
315 }
316 
smlabb(int32 q_value,int32 coeff,int32 round)317 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
318 {
319     __asm
320     {
321         mla q_value, coeff, q_value, round
322     }
323 
324     return q_value;
325 }
326 
smulbb(int32 q_scale,int32 coeff)327 __inline int32 smulbb(int32 q_scale, int32 coeff)
328 {
329     int32 q_value;
330 
331     __asm
332     {
333         mul q_value, q_scale, coeff
334     }
335 
336     return q_value;
337 }
338 
339 
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)340 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
341 {
342     /* tmp must have value of 2047 */
343     int32 coeff;
344     __asm
345     {
346         movs    coeff, q_value, lsl #1
347         mul  stepsize, stepsize, QP
348         addgt   coeff, coeff, #1
349         sublt   coeff, coeff, #1
350         mul q_value, coeff, stepsize
351         addlt   q_value, q_value, #15
352         mov     q_value, q_value, asr #4
353         add     coeff, q_value, tmp
354         subs    coeff, coeff, #0xf00
355         subcss  coeff, coeff, #0xfe
356         eorhi   q_value, tmp, q_value, asr #31
357     }
358 
359     return q_value;
360 }
361 
362 
363 #endif
364 
coeff_clip(int32 q_value,int32 ac_clip)365 __inline int32  coeff_clip(int32 q_value, int32 ac_clip)
366 {
367     int32 coeff;
368 
369     __asm
370     {
371         add     coeff, q_value, ac_clip
372         subs    coeff, coeff, ac_clip, lsl #1
373         eorhi   q_value, ac_clip, q_value, asr #31
374     }
375 
376     return q_value;
377 }
378 
aan_dc_scale(int32 coeff,int32 QP)379 __inline int32 aan_dc_scale(int32 coeff, int32 QP)
380 {
381 
382     __asm
383     {
384         cmp   coeff, #0
385         addle   coeff, coeff, QP, asr #1
386         subgt   coeff, coeff, QP, asr #1
387     }
388 
389     return coeff;
390 }
391 
clip_2047(int32 q_value,int32 tmp)392 __inline int32 clip_2047(int32 q_value, int32 tmp)
393 {
394     /* tmp must have value of 2047 */
395     int32 coeff;
396 
397     __asm
398     {
399         add     coeff, q_value, tmp
400         subs    coeff, coeff, #0xf00
401         subcss  coeff, coeff, #0xfe
402         eorhi   q_value, tmp, q_value, asr #31
403     }
404 
405     return q_value;
406 }
407 
coeff_dequant_mpeg_intra(int32 q_value,int32 tmp)408 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
409 {
410     int32 coeff;
411 
412     __asm
413     {
414         movs    q_value, q_value, lsl #1
415         addlt   q_value, q_value, #15
416         mov     q_value, q_value, asr #4
417         add     coeff, q_value, tmp
418         subs    coeff, coeff, #0xf00
419         subcss  coeff, coeff, #0xfe
420         eorhi   q_value, tmp, q_value, asr #31
421     }
422 
423     return q_value;
424 }
425 
426 #elif ( defined(PV_ARM_GCC_V4) || defined(PV_ARM_GCC_V5) ) /* ARM GNU COMPILER  */
427 
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)428 __inline int32 aan_scale(int32 q_value, int32 coeff,
429                          int32 round, int32 QPdiv2)
430 {
431     register int32 out;
432     register int32 qv = q_value;
433     register int32 cf = coeff;
434     register int32 rr = round;
435     register int32 qp = QPdiv2;
436 
437     asm volatile("smlabb %0, %2, %1, %3\n\t"
438                  "movs %0, %0, asr #16\n\t"
439                  "addle %0, %0, %4\n\t"
440                  "subgt %0, %0, %4"
441              : "=&r"(out)
442                          : "r"(qv),
443                          "r"(cf),
444                          "r"(rr),
445                          "r"(qp));
446     return out;
447 }
448 
coeff_quant(int32 coeff,int32 q_scale,int32 shift)449 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
450 {
451     register int32 out;
452     register int32 temp1;
453     register int32 cc = coeff;
454     register int32 qs = q_scale;
455     register int32 ss = shift;
456 
457     asm volatile("smulbb %0, %3, %2\n\t"
458                  "mov %1, %0, asr %4\n\t"
459                  "add %0, %1, %1, lsr #31"
460              : "=&r"(out),
461                  "=&r"(temp1)
462                          : "r"(cc),
463                          "r"(qs),
464                          "r"(ss));
465 
466     return out;
467 }
468 
coeff_clip(int32 q_value,int32 ac_clip)469 __inline int32 coeff_clip(int32 q_value, int32 ac_clip)
470 {
471     register int32 coeff;
472 
473     asm volatile("add   %1, %0, %2\n\t"
474                  "subs  %1, %1, %2, lsl #1\n\t"
475                  "eorhi %0, %2, %0, asr #31"
476              : "+r"(q_value),
477                  "=&r"(coeff)
478                          : "r"(ac_clip));
479 
480     return q_value;
481 }
482 
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)483 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
484 {
485     register int32 out;
486     register int32 temp1;
487     register int32 qv = q_value;
488     register int32 qp = QPx2;
489     register int32 aa = Addition;
490     register int32 tt = tmp;
491 
492     asm volatile("cmp    %2, #0\n\t"
493                  "mul    %0, %2, %3\n\t"
494                  "sublt  %0, %0, %4\n\t"
495                  "addge  %0, %0, %4\n\t"
496                  "add    %1, %0, %5\n\t"
497                  "subs   %1, %1, #3840\n\t"
498                  "subcss %1, %1, #254\n\t"
499                  "eorhi  %0, %5, %0, asr #31"
500              : "=&r"(out),
501                  "=&r"(temp1)
502                          : "r"(qv),
503                          "r"(qp),
504                          "r"(aa),
505                          "r"(tt));
506 
507     return out;
508 }
509 
smlabb(int32 q_value,int32 coeff,int32 round)510 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
511 {
512     register int32 out;
513     register int32 aa = (int32)q_value;
514     register int32 bb = (int32)coeff;
515     register int32 cc = (int32)round;
516 
517     asm volatile("smlabb %0, %1, %2, %3"
518              : "=&r"(out)
519                          : "r"(aa),
520                          "r"(bb),
521                          "r"(cc));
522     return out;
523 }
524 
smulbb(int32 q_scale,int32 coeff)525 __inline int32 smulbb(int32 q_scale, int32 coeff)
526 {
527     register int32 out;
528     register int32 aa = (int32)q_scale;
529     register int32 bb = (int32)coeff;
530 
531     asm volatile("smulbb %0, %1, %2"
532              : "=&r"(out)
533                          : "r"(aa),
534                          "r"(bb));
535     return out;
536 }
537 
aan_dc_scale(int32 coeff,int32 QP)538 __inline int32 aan_dc_scale(int32 coeff, int32 QP)
539 {
540     register int32 out;
541     register int32 cc = coeff;
542     register int32 qp = QP;
543 
544     asm volatile("cmp %1, #0\n\t"
545                  "addle %0, %1, %2, asr #1\n\t"
546                  "subgt %0, %1, %2, asr #1"
547              : "=&r"(out)
548                          : "r"(cc),
549                          "r"(qp));
550     return out;
551 }
552 
clip_2047(int32 q_value,int32 tmp)553 __inline int32 clip_2047(int32 q_value, int32 tmp)
554 {
555     register int32 coeff;
556     asm volatile("add    %1, %0, %2\n\t"
557                  "subs   %1, %1, #0xF00\n\t"
558                  "subcss %1, %1, #0xFE\n\t"
559                  "eorhi  %0, %2, %0, asr #31"
560              : "+r"(q_value),
561                  "=&r"(coeff)
562                          : "r"(tmp));
563 
564     return q_value;
565 }
566 
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)567 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
568 {
569     register int32 out;
570     register int32 temp1;
571     register int32 qv = q_value;
572     register int32 ss = stepsize;
573     register int32 qp = QP;
574     register int32 tt = tmp;
575 
576     asm volatile("movs    %1, %2, lsl #1\n\t"
577                  "mul     %0, %3, %4\n\t"
578                  "addgt   %1, %1, #1\n\t"
579                  "sublt   %1, %1, #1\n\t"
580                  "mul     %0, %1, %0\n\t"
581                  "addlt   %0, %0, #15\n\t"
582                  "mov     %0, %0, asr #4\n\t"
583                  "add     %1, %0, %5\n\t"
584                  "subs    %1, %1, #0xF00\n\t"
585                  "subcss  %1, %1, #0xFE\n\t"
586                  "eorhi   %0, %5, %0, asr #31"
587              : "=&r"(out),
588                  "=&r"(temp1)
589                          : "r"(qv),
590                          "r"(ss),
591                          "r"(qp),
592                          "r"(tt));
593 
594     return out;
595 
596 }
597 
coeff_dequant_mpeg_intra(int32 q_value,int32 tmp)598 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
599 {
600     register int32 out;
601     register int32 temp1;
602     register int32 qv = q_value;
603     register int32 tt = tmp;
604 
605     asm volatile("movs    %1, %2, lsl #1\n\t"
606                  "addlt   %1, %1, #15\n\t"
607                  "mov     %0, %1, asr #4\n\t"
608                  "add     %1, %0, %3\n\t"
609                  "subs    %1, %1, #0xF00\n\t"
610                  "subcss  %1, %1, #0xFE\n\t"
611                  "eorhi   %0, %3, %0, asr #31"
612              : "=&r"(out),
613                  "=&r"(temp1)
614                          : "r"(qv),
615                          "r"(tt));
616     return out;
617 }
618 
619 
620 #endif // Platform
621 
622 
623 #endif //_FASTQUANT_INLINE_H_
624 
625 
626