1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /*********************************************************************************/
19 /* Filename: fastquant_inline.h */
20 /* Description: Implementation for in-line functions used in dct.cpp */
21 /* Modified: */
22 /*********************************************************************************/
23 #ifndef _FASTQUANT_INLINE_H_
24 #define _FASTQUANT_INLINE_H_
25
26 #include "mp4def.h"
27
28 #if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4) /* ARM GNU COMPILER */
29
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)30 __inline int32 aan_scale(int32 q_value, int32 coeff, int32 round, int32 QPdiv2)
31 {
32 q_value = coeff * q_value + round;
33 coeff = q_value >> 16;
34 if (coeff < 0) coeff += QPdiv2;
35 else coeff -= QPdiv2;
36
37 return coeff;
38 }
39
40
coeff_quant(int32 coeff,int32 q_scale,int32 shift)41 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
42 {
43 int32 q_value;
44
45 q_value = coeff * q_scale; //q_value = -((-(coeff + QPdiv2)*q_scale)>>LSL);
46 q_value >>= shift; //q_value = (((coeff - QPdiv2)*q_scale)>>LSL );
47 q_value += ((UInt)q_value >> 31); /* add one if negative */
48
49 return q_value;
50 }
51
coeff_clip(int32 q_value,int32 ac_clip)52 __inline int32 coeff_clip(int32 q_value, int32 ac_clip)
53 {
54 int32 coeff = q_value + ac_clip;
55
56 if ((UInt)coeff > (UInt)(ac_clip << 1))
57 q_value = ac_clip ^(q_value >> 31);
58
59 return q_value;
60 }
61
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)62 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
63 {
64 int32 coeff;
65
66 OSCL_UNUSED_ARG(tmp);
67
68 if (q_value < 0)
69 {
70 coeff = q_value * QPx2 - Addition;
71 if (coeff < -2048)
72 coeff = -2048;
73 }
74 else
75 {
76 coeff = q_value * QPx2 + Addition;
77 if (coeff > 2047)
78 coeff = 2047;
79 }
80 return coeff;
81 }
82
smlabb(int32 q_value,int32 coeff,int32 round)83 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
84 {
85 q_value = coeff * q_value + round;
86
87 return q_value;
88 }
89
smulbb(int32 q_scale,int32 coeff)90 __inline int32 smulbb(int32 q_scale, int32 coeff)
91 {
92 int32 q_value;
93
94 q_value = coeff * q_scale;
95
96 return q_value;
97 }
98
aan_dc_scale(int32 coeff,int32 QP)99 __inline int32 aan_dc_scale(int32 coeff, int32 QP)
100 {
101
102 if (coeff < 0) coeff += (QP >> 1);
103 else coeff -= (QP >> 1);
104
105 return coeff;
106 }
107
clip_2047(int32 q_value,int32 tmp)108 __inline int32 clip_2047(int32 q_value, int32 tmp)
109 {
110 OSCL_UNUSED_ARG(tmp);
111
112 if (q_value < -2048)
113 {
114 q_value = -2048;
115 }
116 else if (q_value > 2047)
117 {
118 q_value = 2047;
119 }
120
121 return q_value;
122 }
123
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)124 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
125 {
126 int32 coeff;
127
128 OSCL_UNUSED_ARG(tmp);
129
130 coeff = q_value << 1;
131 stepsize *= QP;
132 if (coeff > 0)
133 {
134 q_value = (coeff + 1) * stepsize;
135 q_value >>= 4;
136 if (q_value > 2047) q_value = 2047;
137 }
138 else
139 {
140 q_value = (coeff - 1) * stepsize;
141 q_value += 15;
142 q_value >>= 4;
143 if (q_value < -2048) q_value = -2048;
144 }
145
146 return q_value;
147 }
148
coeff_dequant_mpeg_intra(int32 q_value,int32 tmp)149 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
150 {
151 OSCL_UNUSED_ARG(tmp);
152
153 q_value <<= 1;
154 if (q_value > 0)
155 {
156 q_value >>= 4;
157 if (q_value > 2047) q_value = 2047;
158 }
159 else
160 {
161 q_value += 15;
162 q_value >>= 4;
163 if (q_value < -2048) q_value = -2048;
164 }
165
166 return q_value;
167 }
168
169 #elif defined(__CC_ARM) /* only work with arm v5 */
170
171 #if defined(__TARGET_ARCH_5TE)
172
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)173 __inline int32 aan_scale(int32 q_value, int32 coeff,
174 int32 round, int32 QPdiv2)
175 {
176 __asm
177 {
178 smlabb q_value, coeff, q_value, round
179 movs coeff, q_value, asr #16
180 addle coeff, coeff, QPdiv2
181 subgt coeff, coeff, QPdiv2
182 }
183
184 return coeff;
185 }
186
coeff_quant(int32 coeff,int32 q_scale,int32 shift)187 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
188 {
189 int32 q_value;
190
191 __asm
192 {
193 smulbb q_value, q_scale, coeff /*mov coeff, coeff, lsl #14*/
194 mov coeff, q_value, asr shift /*smull tmp, coeff, q_scale, coeff*/
195 add q_value, coeff, coeff, lsr #31
196 }
197
198
199 return q_value;
200 }
201
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)202 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
203 {
204 int32 coeff;
205
206 __asm
207 {
208 cmp q_value, #0
209 smulbb coeff, q_value, QPx2
210 sublt coeff, coeff, Addition
211 addge coeff, coeff, Addition
212 add q_value, coeff, tmp
213 subs q_value, q_value, #3840
214 subcss q_value, q_value, #254
215 eorhi coeff, tmp, coeff, asr #31
216 }
217
218 return coeff;
219 }
220
smlabb(int32 q_value,int32 coeff,int32 round)221 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
222 {
223 __asm
224 {
225 smlabb q_value, coeff, q_value, round
226 }
227
228 return q_value;
229 }
230
smulbb(int32 q_scale,int32 coeff)231 __inline int32 smulbb(int32 q_scale, int32 coeff)
232 {
233 int32 q_value;
234
235 __asm
236 {
237 smulbb q_value, q_scale, coeff
238 }
239
240 return q_value;
241 }
242
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)243 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
244 {
245 /* tmp must have value of 2047 */
246 int32 coeff;
247 __asm
248 {
249 movs coeff, q_value, lsl #1
250 smulbb stepsize, stepsize, QP
251 addgt coeff, coeff, #1
252 sublt coeff, coeff, #1
253 smulbb q_value, coeff, stepsize
254 addlt q_value, q_value, #15
255 mov q_value, q_value, asr #4
256 add coeff, q_value, tmp
257 subs coeff, coeff, #0xf00
258 subcss coeff, coeff, #0xfe
259 eorhi q_value, tmp, q_value, asr #31
260 }
261
262 return q_value;
263 }
264
265
266 #else // not ARMV5TE
267
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)268 __inline int32 aan_scale(int32 q_value, int32 coeff,
269 int32 round, int32 QPdiv2)
270 {
271 __asm
272 {
273 mla q_value, coeff, q_value, round
274 movs coeff, q_value, asr #16
275 addle coeff, coeff, QPdiv2
276 subgt coeff, coeff, QPdiv2
277 }
278
279 return coeff;
280 }
281
coeff_quant(int32 coeff,int32 q_scale,int32 shift)282 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
283 {
284 int32 q_value;
285
286 __asm
287 {
288 mul q_value, q_scale, coeff /*mov coeff, coeff, lsl #14*/
289 mov coeff, q_value, asr shift /*smull tmp, coeff, q_scale, coeff*/
290 add q_value, coeff, coeff, lsr #31
291 }
292
293
294 return q_value;
295 }
296
297
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)298 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
299 {
300 int32 coeff;
301
302 __asm
303 {
304 cmp q_value, #0
305 mul coeff, q_value, QPx2
306 sublt coeff, coeff, Addition
307 addge coeff, coeff, Addition
308 add q_value, coeff, tmp
309 subs q_value, q_value, #3840
310 subcss q_value, q_value, #254
311 eorhi coeff, tmp, coeff, asr #31
312 }
313
314 return coeff;
315 }
316
smlabb(int32 q_value,int32 coeff,int32 round)317 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
318 {
319 __asm
320 {
321 mla q_value, coeff, q_value, round
322 }
323
324 return q_value;
325 }
326
smulbb(int32 q_scale,int32 coeff)327 __inline int32 smulbb(int32 q_scale, int32 coeff)
328 {
329 int32 q_value;
330
331 __asm
332 {
333 mul q_value, q_scale, coeff
334 }
335
336 return q_value;
337 }
338
339
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)340 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
341 {
342 /* tmp must have value of 2047 */
343 int32 coeff;
344 __asm
345 {
346 movs coeff, q_value, lsl #1
347 mul stepsize, stepsize, QP
348 addgt coeff, coeff, #1
349 sublt coeff, coeff, #1
350 mul q_value, coeff, stepsize
351 addlt q_value, q_value, #15
352 mov q_value, q_value, asr #4
353 add coeff, q_value, tmp
354 subs coeff, coeff, #0xf00
355 subcss coeff, coeff, #0xfe
356 eorhi q_value, tmp, q_value, asr #31
357 }
358
359 return q_value;
360 }
361
362
363 #endif
364
coeff_clip(int32 q_value,int32 ac_clip)365 __inline int32 coeff_clip(int32 q_value, int32 ac_clip)
366 {
367 int32 coeff;
368
369 __asm
370 {
371 add coeff, q_value, ac_clip
372 subs coeff, coeff, ac_clip, lsl #1
373 eorhi q_value, ac_clip, q_value, asr #31
374 }
375
376 return q_value;
377 }
378
aan_dc_scale(int32 coeff,int32 QP)379 __inline int32 aan_dc_scale(int32 coeff, int32 QP)
380 {
381
382 __asm
383 {
384 cmp coeff, #0
385 addle coeff, coeff, QP, asr #1
386 subgt coeff, coeff, QP, asr #1
387 }
388
389 return coeff;
390 }
391
clip_2047(int32 q_value,int32 tmp)392 __inline int32 clip_2047(int32 q_value, int32 tmp)
393 {
394 /* tmp must have value of 2047 */
395 int32 coeff;
396
397 __asm
398 {
399 add coeff, q_value, tmp
400 subs coeff, coeff, #0xf00
401 subcss coeff, coeff, #0xfe
402 eorhi q_value, tmp, q_value, asr #31
403 }
404
405 return q_value;
406 }
407
coeff_dequant_mpeg_intra(int32 q_value,int32 tmp)408 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
409 {
410 int32 coeff;
411
412 __asm
413 {
414 movs q_value, q_value, lsl #1
415 addlt q_value, q_value, #15
416 mov q_value, q_value, asr #4
417 add coeff, q_value, tmp
418 subs coeff, coeff, #0xf00
419 subcss coeff, coeff, #0xfe
420 eorhi q_value, tmp, q_value, asr #31
421 }
422
423 return q_value;
424 }
425
426 #elif ( defined(PV_ARM_GCC_V4) || defined(PV_ARM_GCC_V5) ) /* ARM GNU COMPILER */
427
aan_scale(int32 q_value,int32 coeff,int32 round,int32 QPdiv2)428 __inline int32 aan_scale(int32 q_value, int32 coeff,
429 int32 round, int32 QPdiv2)
430 {
431 register int32 out;
432 register int32 qv = q_value;
433 register int32 cf = coeff;
434 register int32 rr = round;
435 register int32 qp = QPdiv2;
436
437 asm volatile("smlabb %0, %2, %1, %3\n\t"
438 "movs %0, %0, asr #16\n\t"
439 "addle %0, %0, %4\n\t"
440 "subgt %0, %0, %4"
441 : "=&r"(out)
442 : "r"(qv),
443 "r"(cf),
444 "r"(rr),
445 "r"(qp));
446 return out;
447 }
448
coeff_quant(int32 coeff,int32 q_scale,int32 shift)449 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift)
450 {
451 register int32 out;
452 register int32 temp1;
453 register int32 cc = coeff;
454 register int32 qs = q_scale;
455 register int32 ss = shift;
456
457 asm volatile("smulbb %0, %3, %2\n\t"
458 "mov %1, %0, asr %4\n\t"
459 "add %0, %1, %1, lsr #31"
460 : "=&r"(out),
461 "=&r"(temp1)
462 : "r"(cc),
463 "r"(qs),
464 "r"(ss));
465
466 return out;
467 }
468
coeff_clip(int32 q_value,int32 ac_clip)469 __inline int32 coeff_clip(int32 q_value, int32 ac_clip)
470 {
471 register int32 coeff;
472
473 asm volatile("add %1, %0, %2\n\t"
474 "subs %1, %1, %2, lsl #1\n\t"
475 "eorhi %0, %2, %0, asr #31"
476 : "+r"(q_value),
477 "=&r"(coeff)
478 : "r"(ac_clip));
479
480 return q_value;
481 }
482
coeff_dequant(int32 q_value,int32 QPx2,int32 Addition,int32 tmp)483 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp)
484 {
485 register int32 out;
486 register int32 temp1;
487 register int32 qv = q_value;
488 register int32 qp = QPx2;
489 register int32 aa = Addition;
490 register int32 tt = tmp;
491
492 asm volatile("cmp %2, #0\n\t"
493 "mul %0, %2, %3\n\t"
494 "sublt %0, %0, %4\n\t"
495 "addge %0, %0, %4\n\t"
496 "add %1, %0, %5\n\t"
497 "subs %1, %1, #3840\n\t"
498 "subcss %1, %1, #254\n\t"
499 "eorhi %0, %5, %0, asr #31"
500 : "=&r"(out),
501 "=&r"(temp1)
502 : "r"(qv),
503 "r"(qp),
504 "r"(aa),
505 "r"(tt));
506
507 return out;
508 }
509
smlabb(int32 q_value,int32 coeff,int32 round)510 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round)
511 {
512 register int32 out;
513 register int32 aa = (int32)q_value;
514 register int32 bb = (int32)coeff;
515 register int32 cc = (int32)round;
516
517 asm volatile("smlabb %0, %1, %2, %3"
518 : "=&r"(out)
519 : "r"(aa),
520 "r"(bb),
521 "r"(cc));
522 return out;
523 }
524
smulbb(int32 q_scale,int32 coeff)525 __inline int32 smulbb(int32 q_scale, int32 coeff)
526 {
527 register int32 out;
528 register int32 aa = (int32)q_scale;
529 register int32 bb = (int32)coeff;
530
531 asm volatile("smulbb %0, %1, %2"
532 : "=&r"(out)
533 : "r"(aa),
534 "r"(bb));
535 return out;
536 }
537
aan_dc_scale(int32 coeff,int32 QP)538 __inline int32 aan_dc_scale(int32 coeff, int32 QP)
539 {
540 register int32 out;
541 register int32 cc = coeff;
542 register int32 qp = QP;
543
544 asm volatile("cmp %1, #0\n\t"
545 "addle %0, %1, %2, asr #1\n\t"
546 "subgt %0, %1, %2, asr #1"
547 : "=&r"(out)
548 : "r"(cc),
549 "r"(qp));
550 return out;
551 }
552
clip_2047(int32 q_value,int32 tmp)553 __inline int32 clip_2047(int32 q_value, int32 tmp)
554 {
555 register int32 coeff;
556 asm volatile("add %1, %0, %2\n\t"
557 "subs %1, %1, #0xF00\n\t"
558 "subcss %1, %1, #0xFE\n\t"
559 "eorhi %0, %2, %0, asr #31"
560 : "+r"(q_value),
561 "=&r"(coeff)
562 : "r"(tmp));
563
564 return q_value;
565 }
566
coeff_dequant_mpeg(int32 q_value,int32 stepsize,int32 QP,int32 tmp)567 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp)
568 {
569 register int32 out;
570 register int32 temp1;
571 register int32 qv = q_value;
572 register int32 ss = stepsize;
573 register int32 qp = QP;
574 register int32 tt = tmp;
575
576 asm volatile("movs %1, %2, lsl #1\n\t"
577 "mul %0, %3, %4\n\t"
578 "addgt %1, %1, #1\n\t"
579 "sublt %1, %1, #1\n\t"
580 "mul %0, %1, %0\n\t"
581 "addlt %0, %0, #15\n\t"
582 "mov %0, %0, asr #4\n\t"
583 "add %1, %0, %5\n\t"
584 "subs %1, %1, #0xF00\n\t"
585 "subcss %1, %1, #0xFE\n\t"
586 "eorhi %0, %5, %0, asr #31"
587 : "=&r"(out),
588 "=&r"(temp1)
589 : "r"(qv),
590 "r"(ss),
591 "r"(qp),
592 "r"(tt));
593
594 return out;
595
596 }
597
coeff_dequant_mpeg_intra(int32 q_value,int32 tmp)598 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp)
599 {
600 register int32 out;
601 register int32 temp1;
602 register int32 qv = q_value;
603 register int32 tt = tmp;
604
605 asm volatile("movs %1, %2, lsl #1\n\t"
606 "addlt %1, %1, #15\n\t"
607 "mov %0, %1, asr #4\n\t"
608 "add %1, %0, %3\n\t"
609 "subs %1, %1, #0xF00\n\t"
610 "subcss %1, %1, #0xFE\n\t"
611 "eorhi %0, %3, %0, asr #31"
612 : "=&r"(out),
613 "=&r"(temp1)
614 : "r"(qv),
615 "r"(tt));
616 return out;
617 }
618
619
620 #endif // Platform
621
622
623 #endif //_FASTQUANT_INLINE_H_
624
625
626