1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /*
19 ------------------------------------------------------------------------------
20  MODULE DESCRIPTION
21 
22  This file contains the functions that transform an 8r8 image block from
23  dequantized DCT coefficients to spatial domain pirel values by calculating
24  inverse discrete cosine transform (IDCT).
25 
26 ------------------------------------------------------------------------------
27 */
28 /*----------------------------------------------------------------------------
29 ; INCLUDES
30 ----------------------------------------------------------------------------*/
31 #include "mp4dec_lib.h"
32 #include "idct.h"
33 #include "motion_comp.h"
34 #ifndef FAST_IDCT
35 
36 /*
37 ------------------------------------------------------------------------------
38  FUNCTION NAME: idct
39 ------------------------------------------------------------------------------
40  INPUT AND OUTPUT DEFINITIONS FOR idct
41 
42  Inputs:
43     blk = pointer to the buffer containing the dequantized DCT
44           coefficients of type int for an 8r8 image block;
45           values range from (-2048, 2047) which defined as standard.
46 
47  Local Stores/Buffers/Pointers Needed:
48     None
49 
50  Global Stores/Buffers/Pointers Needed:
51     None
52 
53  Outputs:
54     None
55 
56  Pointers and Buffers Modified:
57     blk points to the found IDCT values for an 8r8 image block.
58 
59  Local Stores Modified:
60     None
61 
62  Global Stores Modified:
63     None
64 
65 ------------------------------------------------------------------------------
66  FUNCTION DESCRIPTION FOR idct
67 
68  This function transforms an 8r8 image block from dequantized DCT coefficients
69  (F(u,v)) to spatial domain pirel values (f(r,y)) by performing the two
70  dimensional inverse discrete cosine transform (IDCT).
71 
72          _7_ _7_      C(u) C(v)
73     f(r,y) = \   \  F(u,v)---- ----cos[(2r+1)*u*pi/16]cos[(2y+1)*v*pi/16]
74          /__ /__    2    2
75          u=0 v=0
76 
77     where   C(i) = 1/sqrt(2)    if i=0
78         C(i) = 1        otherwise
79 
80  2-D IDCT can be separated as horizontal(row-wise) and vertical(column-wise)
81  1-D IDCTs. Therefore, 2-D IDCT values are found by the following two steps:
82  1. Find horizontal 1-D IDCT values for each row from 8r8 dequantized DCT
83     coefficients by row IDCT operation.
84 
85           _7_        C(u)
86     g(r,v) =  \   F(u,v) ---- cos[(2r+1)*u*pi/16]
87           /__         2
88           u=0
89 
90  2. Find vertical 1-D IDCT values for each column from the results of 1
91     by column IDCT operation.
92 
93               _7_        C(v)
94     f(r,y) =  \   g(r,v) ---- cos[(2y+1)*v*pi/16]
95           /__         2
96           v=0
97 
98 ------------------------------------------------------------------------------
99  REQUIREMENTS FOR idct
100 
101  None
102 
103 ------------------------------------------------------------------------------
104 */
105 /*  REFERENCES FOR idct */
106 /* idct.c, inverse fast discrete cosine transform
107  inverse two dimensional DCT, Chen-Wang algorithm
108  (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984)
109  32-bit integer arithmetic (8 bit coefficients)
110  11 mults, 29 adds per DCT
111  sE, 18.8.91
112 
113  coefficients ertended to 12 bit for IEEE1180-1990
114  compliance                           sE,  2.1.94
115 */
116 
117 
118 /*----------------------------------------------------------------------------
119 ; Function Code FOR idct
120 ----------------------------------------------------------------------------*/
idct_intra(int * blk,uint8 * comp,int width)121 void idct_intra(
122     int *blk, uint8 *comp, int width
123 )
124 {
125     /*----------------------------------------------------------------------------
126     ; Define all local variables
127     ----------------------------------------------------------------------------*/
128     int i;
129     int32   tmpBLK[64];
130     int32   *tmpBLK32 = &tmpBLK[0];
131     int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
132     int32   a;
133     int offset = width - 8;
134     /*----------------------------------------------------------------------------
135     ; Function body here
136     ----------------------------------------------------------------------------*/
137     /* two dimensional inverse discrete cosine transform */
138 
139 
140     /* column (vertical) IDCT */
141     for (i = B_SIZE - 1; i >= 0; i--)
142     {
143         /* initialize butterfly nodes at first stage */
144 
145         r1 = blk[B_SIZE * 4 + i] << 11;
146         /* since row IDCT results have net left shift by 3 */
147         /* this left shift by 8 gives net left shift by 11 */
148         /* in order to maintain the same scale as that of  */
149         /* coefficients Wi */
150 
151         r2 = blk[B_SIZE * 6 + i];
152         r3 = blk[B_SIZE * 2 + i];
153         r4 = blk[B_SIZE * 1 + i];
154         r5 = blk[B_SIZE * 7 + i];
155         r6 = blk[B_SIZE * 5 + i];
156         r7 = blk[B_SIZE * 3 + i];
157 
158         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
159         {
160             /* shortcut */
161             /* execute if values of g(r,1) to g(r,7) in a column*/
162             /* are all zeros */
163 
164             /* make output of IDCT >>3 or scaled by 1/8 and */
165             /* with the proper rounding */
166             a = (blk[B_SIZE * 0 + i]) << 3;
167             tmpBLK32[B_SIZE * 0 + i] = a;
168             tmpBLK32[B_SIZE * 1 + i] = a;
169             tmpBLK32[B_SIZE * 2 + i] = a;
170             tmpBLK32[B_SIZE * 3 + i] = a;
171             tmpBLK32[B_SIZE * 4 + i] = a;
172             tmpBLK32[B_SIZE * 5 + i] = a;
173             tmpBLK32[B_SIZE * 6 + i] = a;
174             tmpBLK32[B_SIZE * 7 + i] = a;
175         }
176         else
177         {
178             r0 = (blk[8 * 0 + i] << 11) + 128;
179 
180             /* first stage */
181 
182             r8 = W7 * (r4 + r5);
183             r4 = (r8 + (W1 - W7) * r4);
184             /* Multiplication with Wi increases the net left */
185             /* shift from 11 to 14,we have to shift back by 3*/
186             r5 = (r8 - (W1 + W7) * r5);
187             r8 = W3 * (r6 + r7);
188             r6 = (r8 - (W3 - W5) * r6);
189             r7 = (r8 - (W3 + W5) * r7);
190 
191             /* second stage */
192             r8 = r0 + r1;
193             r0 -= r1;
194 
195             r1 = W6 * (r3 + r2);
196             r2 = (r1 - (W2 + W6) * r2);
197             r3 = (r1 + (W2 - W6) * r3);
198 
199             r1 = r4 + r6;
200             r4 -= r6;
201             r6 = r5 + r7;
202             r5 -= r7;
203 
204             /* third stage */
205             r7 = r8 + r3;
206             r8 -= r3;
207             r3 = r0 + r2;
208             r0 -= r2;
209             r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
210             r4 = (181 * (r4 - r5) + 128) >> 8;
211 
212             /* fourth stage */
213             /* net shift of IDCT is >>3 after the following */
214             /* shift operation, it makes output of 2-D IDCT */
215             /* scaled by 1/8, that is scaled twice by       */
216             /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
217             /* see detail analysis in design doc.           */
218             tmpBLK32[0 + i] = (r7 + r1) >> 8;
219             tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
220             tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
221             tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
222             tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
223             tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
224             tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
225             tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
226         }
227     }
228     /* row (horizontal) IDCT */
229     for (i = 0 ; i < B_SIZE; i++)
230     {
231         /* initialize butterfly nodes at the first stage */
232 
233         r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
234         /* r1 left shift by 11 is to maintain the same  */
235         /* scale as that of coefficients (W1,...W7) */
236         /* since blk[4] won't multiply with Wi.     */
237         /* see detail diagram in design document.   */
238 
239         r2 = tmpBLK32[6+(i<<3)];
240         r3 = tmpBLK32[2+(i<<3)];
241         r4 = tmpBLK32[1+(i<<3)];
242         r5 = tmpBLK32[7+(i<<3)];
243         r6 = tmpBLK32[5+(i<<3)];
244         r7 = tmpBLK32[3+(i<<3)];
245 
246         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
247         {
248             /* shortcut */
249             /* execute if values of F(1,v) to F(7,v) in a row*/
250             /* are all zeros */
251 
252             /* output of row IDCT scaled by 8 */
253             a = (((int32)tmpBLK32[0+(i<<3)] + 32) >> 6);
254             CLIP_RESULT(a)
255             *comp++ = a;
256             *comp++ = a;
257             *comp++ = a;
258             *comp++ = a;
259             *comp++ = a;
260             *comp++ = a;
261             *comp++ = a;
262             *comp++ = a;
263 
264             comp += offset;
265         }
266 
267         else
268         {
269             /* for proper rounding in the fourth stage */
270             r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
271 
272             /* first stage */
273 
274             r8 = W7 * (r4 + r5) + 4;
275             r4 = (r8 + (W1 - W7) * r4) >> 3;
276             r5 = (r8 - (W1 + W7) * r5) >> 3;
277 
278             r8 = W3 * (r6 + r7) + 4;
279             r6 = (r8 - (W3 - W5) * r6) >> 3;
280             r7 = (r8 - (W3 + W5) * r7) >> 3;
281 
282             /* second stage */
283             r8 = r0 + r1;
284             r0 -= r1;
285 
286             r1 = W6 * (r3 + r2) + 4;
287             r2 = (r1 - (W2 + W6) * r2) >> 3;
288             r3 = (r1 + (W2 - W6) * r3) >> 3;
289 
290             r1 = r4 + r6;
291             r4 -= r6;
292             r6 = r5 + r7;
293             r5 -= r7;
294 
295             /* third stage */
296             r7 = r8 + r3;
297             r8 -= r3;
298             r3 = r0 + r2;
299             r0 -= r2;
300             r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
301             r4 = (181 * (r4 - r5) + 128) >> 8;
302 
303             /* fourth stage */
304             /* net shift of this function is <<3 after the    */
305             /* following shift operation, it makes output of  */
306             /* row IDCT scaled by 8 to retain 3 bits precision*/
307             a = ((r7 + r1) >> 14);
308             CLIP_RESULT(a)
309             *comp++ = a;
310             a = ((r3 + r2) >> 14);
311             CLIP_RESULT(a)
312             *comp++ = a;
313             a = ((r0 + r4) >> 14);
314             CLIP_RESULT(a)
315             *comp++ = a;
316             a = ((r8 + r6) >> 14);
317             CLIP_RESULT(a)
318             *comp++ = a;
319             a = ((r8 - r6) >> 14);
320             CLIP_RESULT(a)
321             *comp++ = a;
322             a = ((r0 - r4) >> 14);
323             CLIP_RESULT(a)
324             *comp++ = a;
325             a = ((r3 - r2) >> 14);
326             CLIP_RESULT(a)
327             *comp++ = a;
328             a = ((r7 - r1) >> 14);
329             CLIP_RESULT(a)
330             *comp++ = a;
331 
332             comp += offset;
333         }
334     }
335 
336 
337 
338     /*----------------------------------------------------------------------------
339     ; Return nothing or data or data pointer
340     ----------------------------------------------------------------------------*/
341     return;
342 }
343 
idct(int * blk,uint8 * pred,uint8 * dst,int width)344 void idct(
345     int *blk, uint8 *pred, uint8 *dst, int width)
346 {
347     /*----------------------------------------------------------------------------
348     ; Define all local variables
349     ----------------------------------------------------------------------------*/
350     int i;
351     int32   tmpBLK[64];
352     int32   *tmpBLK32 = &tmpBLK[0];
353     int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
354     int32   a;
355     int res;
356 
357     /*----------------------------------------------------------------------------
358     ; Function body here
359     ----------------------------------------------------------------------------*/
360     /* two dimensional inverse discrete cosine transform */
361 
362 
363     /* column (vertical) IDCT */
364     for (i = B_SIZE - 1; i >= 0; i--)
365     {
366         /* initialize butterfly nodes at first stage */
367 
368         r1 = blk[B_SIZE * 4 + i] << 11;
369         /* since row IDCT results have net left shift by 3 */
370         /* this left shift by 8 gives net left shift by 11 */
371         /* in order to maintain the same scale as that of  */
372         /* coefficients Wi */
373 
374         r2 = blk[B_SIZE * 6 + i];
375         r3 = blk[B_SIZE * 2 + i];
376         r4 = blk[B_SIZE * 1 + i];
377         r5 = blk[B_SIZE * 7 + i];
378         r6 = blk[B_SIZE * 5 + i];
379         r7 = blk[B_SIZE * 3 + i];
380 
381         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
382         {
383             /* shortcut */
384             /* execute if values of g(r,1) to g(r,7) in a column*/
385             /* are all zeros */
386 
387             /* make output of IDCT >>3 or scaled by 1/8 and */
388             /* with the proper rounding */
389             a = (blk[B_SIZE * 0 + i]) << 3;
390             tmpBLK32[B_SIZE * 0 + i] = a;
391             tmpBLK32[B_SIZE * 1 + i] = a;
392             tmpBLK32[B_SIZE * 2 + i] = a;
393             tmpBLK32[B_SIZE * 3 + i] = a;
394             tmpBLK32[B_SIZE * 4 + i] = a;
395             tmpBLK32[B_SIZE * 5 + i] = a;
396             tmpBLK32[B_SIZE * 6 + i] = a;
397             tmpBLK32[B_SIZE * 7 + i] = a;
398         }
399         else
400         {
401             r0 = (blk[8 * 0 + i] << 11) + 128;
402 
403             /* first stage */
404 
405             r8 = W7 * (r4 + r5);
406             r4 = (r8 + (W1 - W7) * r4);
407             /* Multiplication with Wi increases the net left */
408             /* shift from 11 to 14,we have to shift back by 3*/
409             r5 = (r8 - (W1 + W7) * r5);
410             r8 = W3 * (r6 + r7);
411             r6 = (r8 - (W3 - W5) * r6);
412             r7 = (r8 - (W3 + W5) * r7);
413 
414             /* second stage */
415             r8 = r0 + r1;
416             r0 -= r1;
417 
418             r1 = W6 * (r3 + r2);
419             r2 = (r1 - (W2 + W6) * r2);
420             r3 = (r1 + (W2 - W6) * r3);
421 
422             r1 = r4 + r6;
423             r4 -= r6;
424             r6 = r5 + r7;
425             r5 -= r7;
426 
427             /* third stage */
428             r7 = r8 + r3;
429             r8 -= r3;
430             r3 = r0 + r2;
431             r0 -= r2;
432             r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
433             r4 = (181 * (r4 - r5) + 128) >> 8;
434 
435             /* fourth stage */
436             /* net shift of IDCT is >>3 after the following */
437             /* shift operation, it makes output of 2-D IDCT */
438             /* scaled by 1/8, that is scaled twice by       */
439             /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
440             /* see detail analysis in design doc.           */
441             tmpBLK32[0 + i] = (r7 + r1) >> 8;
442             tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
443             tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
444             tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
445             tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
446             tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
447             tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
448             tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
449         }
450     }
451     /* row (horizontal) IDCT */
452     for (i = B_SIZE - 1; i >= 0; i--)
453     {
454         /* initialize butterfly nodes at the first stage */
455 
456         r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
457         /* r1 left shift by 11 is to maintain the same  */
458         /* scale as that of coefficients (W1,...W7) */
459         /* since blk[4] won't multiply with Wi.     */
460         /* see detail diagram in design document.   */
461 
462         r2 = tmpBLK32[6+(i<<3)];
463         r3 = tmpBLK32[2+(i<<3)];
464         r4 = tmpBLK32[1+(i<<3)];
465         r5 = tmpBLK32[7+(i<<3)];
466         r6 = tmpBLK32[5+(i<<3)];
467         r7 = tmpBLK32[3+(i<<3)];
468 
469         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
470         {
471             /* shortcut */
472             /* execute if values of F(1,v) to F(7,v) in a row*/
473             /* are all zeros */
474 
475             /* output of row IDCT scaled by 8 */
476             a = (tmpBLK32[0+(i<<3)] + 32) >> 6;
477             blk[0+(i<<3)] = a;
478             blk[1+(i<<3)] = a;
479             blk[2+(i<<3)] = a;
480             blk[3+(i<<3)] = a;
481             blk[4+(i<<3)] = a;
482             blk[5+(i<<3)] = a;
483             blk[6+(i<<3)] = a;
484             blk[7+(i<<3)] = a;
485 
486         }
487 
488         else
489         {
490             /* for proper rounding in the fourth stage */
491             r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
492 
493             /* first stage */
494 
495             r8 = W7 * (r4 + r5) + 4;
496             r4 = (r8 + (W1 - W7) * r4) >> 3;
497             r5 = (r8 - (W1 + W7) * r5) >> 3;
498 
499             r8 = W3 * (r6 + r7) + 4;
500             r6 = (r8 - (W3 - W5) * r6) >> 3;
501             r7 = (r8 - (W3 + W5) * r7) >> 3;
502 
503             /* second stage */
504             r8 = r0 + r1;
505             r0 -= r1;
506 
507             r1 = W6 * (r3 + r2) + 4;
508             r2 = (r1 - (W2 + W6) * r2) >> 3;
509             r3 = (r1 + (W2 - W6) * r3) >> 3;
510 
511             r1 = r4 + r6;
512             r4 -= r6;
513             r6 = r5 + r7;
514             r5 -= r7;
515 
516             /* third stage */
517             r7 = r8 + r3;
518             r8 -= r3;
519             r3 = r0 + r2;
520             r0 -= r2;
521             r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
522             r4 = (181 * (r4 - r5) + 128) >> 8;
523 
524             /* fourth stage */
525             /* net shift of this function is <<3 after the    */
526             /* following shift operation, it makes output of  */
527             /* row IDCT scaled by 8 to retain 3 bits precision*/
528             blk[0+(i<<3)] = (r7 + r1) >> 14;
529             blk[1+(i<<3)] = (r3 + r2) >> 14;
530             blk[2+(i<<3)] = (r0 + r4) >> 14;
531             blk[3+(i<<3)] = (r8 + r6) >> 14;
532             blk[4+(i<<3)] = (r8 - r6) >> 14;
533             blk[5+(i<<3)] = (r0 - r4) >> 14;
534             blk[6+(i<<3)] = (r3 - r2) >> 14;
535             blk[7+(i<<3)] = (r7 - r1) >> 14;
536         }
537         /*  add with prediction ,  08/03/05 */
538         res = (*pred++ + block[0+(i<<3)]);
539         CLIP_RESULT(res);
540         *dst++ = res;
541         res = (*pred++ + block[1+(i<<3)]);
542         CLIP_RESULT(res);
543         *dst++ = res;
544         res = (*pred++ + block[2+(i<<3)]);
545         CLIP_RESULT(res);
546         *dst++ = res;
547         res = (*pred++ + block[3+(i<<3)]);
548         CLIP_RESULT(res);
549         *dst++ = res;
550         res = (*pred++ + block[4+(i<<3)]);
551         CLIP_RESULT(res);
552         *dst++ = res;
553         res = (*pred++ + block[5+(i<<3)]);
554         CLIP_RESULT(res);
555         *dst++ = res;
556         res = (*pred++ + block[6+(i<<3)]);
557         CLIP_RESULT(res);
558         *dst++ = res;
559         res = (*pred++ + block[7+(i<<3)]);
560         CLIP_RESULT(res);
561         *dst++ = res;
562 
563         pred += 8;
564         dst += (width - 8);
565     }
566 
567 
568 
569     /*----------------------------------------------------------------------------
570     ; Return nothing or data or data pointer
571     ----------------------------------------------------------------------------*/
572     return;
573 }
574 
575 #endif
576 /*----------------------------------------------------------------------------
577 ; End Function: idct
578 ----------------------------------------------------------------------------*/
579 
580