1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /*
19 ------------------------------------------------------------------------------
20  INPUT AND OUTPUT DEFINITIONS
21 
22  Inputs:
23     [input_variable_name] = [description of the input to module, its type
24                  definition, and length (when applicable)]
25 
26  Local Stores/Buffers/Pointers Needed:
27     [local_store_name] = [description of the local store, its type
28                   definition, and length (when applicable)]
29     [local_buffer_name] = [description of the local buffer, its type
30                    definition, and length (when applicable)]
31     [local_ptr_name] = [description of the local pointer, its type
32                 definition, and length (when applicable)]
33 
34  Global Stores/Buffers/Pointers Needed:
35     [global_store_name] = [description of the global store, its type
36                    definition, and length (when applicable)]
37     [global_buffer_name] = [description of the global buffer, its type
38                 definition, and length (when applicable)]
39     [global_ptr_name] = [description of the global pointer, its type
40                  definition, and length (when applicable)]
41 
42  Outputs:
43     [return_variable_name] = [description of data/pointer returned
44                   by module, its type definition, and length
45                   (when applicable)]
46 
47  Pointers and Buffers Modified:
48     [variable_bfr_ptr] points to the [describe where the
49       variable_bfr_ptr points to, its type definition, and length
50       (when applicable)]
51     [variable_bfr] contents are [describe the new contents of
52       variable_bfr]
53 
54  Local Stores Modified:
55     [local_store_name] = [describe new contents, its type
56                   definition, and length (when applicable)]
57 
58  Global Stores Modified:
59     [global_store_name] = [describe new contents, its type
60                    definition, and length (when applicable)]
61 
62 ------------------------------------------------------------------------------
63  FUNCTION DESCRIPTION
64 
65 ------------------------------------------------------------------------------
66  REQUIREMENTS
67 
68 ------------------------------------------------------------------------------
69  REFERENCES
70 
71 ------------------------------------------------------------------------------
72  PSEUDO-CODE
73 
74 ------------------------------------------------------------------------------
75  RESOURCES USED
76    When the code is written for a specific target processor the
77      the resources used should be documented below.
78 
79  STACK USAGE: [stack count for this module] + [variable to represent
80           stack usage for each subroutine called]
81 
82      where: [stack usage variable] = stack usage for [subroutine
83          name] (see [filename].ext)
84 
85  DATA MEMORY USED: x words
86 
87  PROGRAM MEMORY USED: x words
88 
89  CLOCK CYCLES: [cycle count equation for this module] + [variable
90            used to represent cycle count for each subroutine
91            called]
92 
93      where: [cycle count variable] = cycle count for [subroutine
94         name] (see [filename].ext)
95 
96 ------------------------------------------------------------------------------
97 */
98 
99 /*----------------------------------------------------------------------------
100 ; INCLUDES
101 ----------------------------------------------------------------------------*/
102 #include "mp4dec_lib.h"
103 #include "idct.h"
104 #include "motion_comp.h"
105 
106 #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
107 /*----------------------------------------------------------------------------
108 ; MACROS
109 ; Define module specific macros here
110 ----------------------------------------------------------------------------*/
111 
112 /*----------------------------------------------------------------------------
113 ; DEFINES
114 ; Include all pre-processor statements here. Include conditional
115 ; compile variables also.
116 ----------------------------------------------------------------------------*/
117 
118 /*----------------------------------------------------------------------------
119 ; LOCAL FUNCTION DEFINITIONS
120 ; Function Prototype declaration
121 ----------------------------------------------------------------------------*/
122 /* private prototypes */
123 static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width);
124 static void idctrow_intra(int16 *blk, PIXEL *, int width);
125 static void idctcol(int16 *blk);
126 
127 #ifdef FAST_IDCT
128 // mapping from nz_coefs to functions to be used
129 
130 
131 // ARM4 does not allow global data when they are not constant hence
132 // an array of function pointers cannot be considered as array of constants
133 // (actual addresses are only known when the dll is loaded).
134 // So instead of arrays of function pointers, we'll store here
135 // arrays of rows or columns and then call the idct function
136 // corresponding to such the row/column number:
137 
138 
139 static void (*const idctcolVCA[10][4])(int16*) =
140 {
141     {&idctcol1, &idctcol0, &idctcol0, &idctcol0},
142     {&idctcol1, &idctcol1, &idctcol0, &idctcol0},
143     {&idctcol2, &idctcol1, &idctcol0, &idctcol0},
144     {&idctcol3, &idctcol1, &idctcol0, &idctcol0},
145     {&idctcol3, &idctcol2, &idctcol0, &idctcol0},
146     {&idctcol3, &idctcol2, &idctcol1, &idctcol0},
147     {&idctcol3, &idctcol2, &idctcol1, &idctcol1},
148     {&idctcol3, &idctcol2, &idctcol2, &idctcol1},
149     {&idctcol3, &idctcol3, &idctcol2, &idctcol1},
150     {&idctcol4, &idctcol3, &idctcol2, &idctcol1}
151 };
152 
153 
154 static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) =
155 {
156     &idctrow1,
157     &idctrow2,
158     &idctrow2,
159     &idctrow2,
160     &idctrow2,
161     &idctrow3,
162     &idctrow4,
163     &idctrow4,
164     &idctrow4,
165     &idctrow4
166 };
167 
168 
169 static void (*const idctcolVCA2[16])(int16*) =
170 {
171     &idctcol0, &idctcol4, &idctcol3, &idctcol4,
172     &idctcol2, &idctcol4, &idctcol3, &idctcol4,
173     &idctcol1, &idctcol4, &idctcol3, &idctcol4,
174     &idctcol2, &idctcol4, &idctcol3, &idctcol4
175 };
176 
177 static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) =
178 {
179     &idctrow1, &idctrow4, &idctrow3, &idctrow4,
180     &idctrow2, &idctrow4, &idctrow3, &idctrow4
181 };
182 
183 static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) =
184 {
185     &idctrow1_intra,
186     &idctrow2_intra,
187     &idctrow2_intra,
188     &idctrow2_intra,
189     &idctrow2_intra,
190     &idctrow3_intra,
191     &idctrow4_intra,
192     &idctrow4_intra,
193     &idctrow4_intra,
194     &idctrow4_intra
195 };
196 
197 static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) =
198 {
199     &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra,
200     &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra
201 };
202 #endif
203 
204 /*----------------------------------------------------------------------------
205 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
206 ; Variable declaration - defined here and used outside this module
207 ----------------------------------------------------------------------------*/
208 
209 /*----------------------------------------------------------------------------
210 ; EXTERNAL FUNCTION REFERENCES
211 ; Declare functions defined elsewhere and referenced in this module
212 ----------------------------------------------------------------------------*/
213 
214 /*----------------------------------------------------------------------------
215 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
216 ; Declare variables used in this module but defined elsewhere
217 ----------------------------------------------------------------------------*/
218 
219 /*----------------------------------------------------------------------------
220 ; FUNCTION CODE
221 ----------------------------------------------------------------------------*/
MBlockIDCT(VideoDecData * video)222 void MBlockIDCT(VideoDecData *video)
223 {
224     Vop *currVop = video->currVop;
225     MacroBlock *mblock = video->mblock;
226     PIXEL *c_comp;
227     PIXEL *cu_comp;
228     PIXEL *cv_comp;
229     int x_pos = video->mbnum_col;
230     int y_pos = video->mbnum_row;
231     int width, width_uv;
232     int32 offset;
233     width = video->width;
234     width_uv = width >> 1;
235     offset = (int32)(y_pos << 4) * width + (x_pos << 4);
236 
237     c_comp  = currVop->yChan + offset;
238     cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2);
239     cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2);
240 
241     BlockIDCT_intra(mblock, c_comp, 0, width);
242     BlockIDCT_intra(mblock, c_comp + 8, 1, width);
243     BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width);
244     BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width);
245     BlockIDCT_intra(mblock, cu_comp, 4, width_uv);
246     BlockIDCT_intra(mblock, cv_comp, 5, width_uv);
247 }
248 
249 
BlockIDCT_intra(MacroBlock * mblock,PIXEL * c_comp,int comp,int width)250 void BlockIDCT_intra(
251     MacroBlock *mblock, PIXEL *c_comp, int comp, int width)
252 {
253     /*----------------------------------------------------------------------------
254     ; Define all local variables
255     ----------------------------------------------------------------------------*/
256     int16 *coeff_in = mblock->block[comp];
257 #ifdef INTEGER_IDCT
258 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
259     int i, bmapr;
260     int nz_coefs = mblock->no_coeff[comp];
261     uint8 *bitmapcol = mblock->bitmapcol[comp];
262     uint8 bitmaprow = mblock->bitmaprow[comp];
263 
264     /*----------------------------------------------------------------------------
265     ; Function body here
266     ----------------------------------------------------------------------------*/
267     if (nz_coefs <= 10)
268     {
269         bmapr = (nz_coefs - 1);
270 
271         (*(idctcolVCA[bmapr]))(coeff_in);
272         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
273         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
274         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
275 
276         (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width);
277     }
278     else
279     {
280         i = 8;
281         while (i--)
282         {
283             bmapr = (int)bitmapcol[i];
284             if (bmapr)
285             {
286                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
287                 {
288                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
289                 }
290                 else
291                 {
292                     idctcol(coeff_in + i);
293                 }
294             }
295         }
296         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
297         {
298             bitmaprow >>= 4;
299             (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width);
300         }
301         else
302         {
303             idctrow_intra(coeff_in, c_comp, width);
304         }
305     }
306 #else
307     void idct_intra(int *block, uint8 *comp, int width);
308     idct_intra(coeff_in, c_comp, width);
309 #endif
310 #else
311     void idctref_intra(int *block, uint8 *comp, int width);
312     idctref_intra(coeff_in, c_comp, width);
313 #endif
314 
315 
316     /*----------------------------------------------------------------------------
317     ; Return nothing or data or data pointer
318     ----------------------------------------------------------------------------*/
319     return;
320 }
321 
322 /*  08/04/05, no residue, just copy from pred to output */
Copy_Blk_to_Vop(uint8 * dst,uint8 * pred,int width)323 void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width)
324 {
325     /* copy 4 bytes at a time */
326     width -= 4;
327     *((uint32*)dst) = *((uint32*)pred);
328     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
329     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
330     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
331     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
332     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
333     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
334     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
335     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
336     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
337     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
338     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
339     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
340     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
341     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
342     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
343 
344     return ;
345 }
346 
347 /*  08/04/05 compute IDCT and add prediction at the end  */
BlockIDCT(uint8 * dst,uint8 * pred,int16 * coeff_in,int width,int nz_coefs,uint8 * bitmapcol,uint8 bitmaprow)348 void BlockIDCT(
349     uint8 *dst,  /* destination */
350     uint8 *pred, /* prediction block, pitch 16 */
351     int16   *coeff_in,  /* DCT data, size 64 */
352     int width, /* width of dst */
353     int nz_coefs,
354     uint8 *bitmapcol,
355     uint8 bitmaprow
356 )
357 {
358 #ifdef INTEGER_IDCT
359 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
360     int i, bmapr;
361     /*----------------------------------------------------------------------------
362     ; Function body here
363     ----------------------------------------------------------------------------*/
364     if (nz_coefs <= 10)
365     {
366         bmapr = (nz_coefs - 1);
367         (*(idctcolVCA[bmapr]))(coeff_in);
368         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
369         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
370         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
371 
372         (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width);
373         return ;
374     }
375     else
376     {
377         i = 8;
378 
379         while (i--)
380         {
381             bmapr = (int)bitmapcol[i];
382             if (bmapr)
383             {
384                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
385                 {
386                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
387                 }
388                 else
389                 {
390                     idctcol(coeff_in + i);
391                 }
392             }
393         }
394         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
395         {
396             (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width);
397         }
398         else
399         {
400             idctrow(coeff_in, pred, dst, width);
401         }
402         return ;
403     }
404 #else // FAST_IDCT
405     void idct(int *block, uint8 *pred, uint8 *dst, int width);
406     idct(coeff_in, pred, dst, width);
407     return;
408 #endif // FAST_IDCT
409 #else // INTEGER_IDCT
410     void idctref(int *block, uint8 *pred, uint8 *dst, int width);
411     idctref(coeff_in, pred, dst, width);
412     return;
413 #endif // INTEGER_IDCT
414 
415 }
416 /*----------------------------------------------------------------------------
417 ;  End Function: block_idct
418 ----------------------------------------------------------------------------*/
419 
420 
421 /****************************************************************************/
422 
423 /*
424 ------------------------------------------------------------------------------
425  FUNCTION NAME: idctrow
426 ------------------------------------------------------------------------------
427  INPUT AND OUTPUT DEFINITIONS FOR idctrow
428 
429  Inputs:
430     [input_variable_name] = [description of the input to module, its type
431                  definition, and length (when applicable)]
432 
433  Local Stores/Buffers/Pointers Needed:
434     [local_store_name] = [description of the local store, its type
435                   definition, and length (when applicable)]
436     [local_buffer_name] = [description of the local buffer, its type
437                    definition, and length (when applicable)]
438     [local_ptr_name] = [description of the local pointer, its type
439                 definition, and length (when applicable)]
440 
441  Global Stores/Buffers/Pointers Needed:
442     [global_store_name] = [description of the global store, its type
443                    definition, and length (when applicable)]
444     [global_buffer_name] = [description of the global buffer, its type
445                 definition, and length (when applicable)]
446     [global_ptr_name] = [description of the global pointer, its type
447                  definition, and length (when applicable)]
448 
449  Outputs:
450     [return_variable_name] = [description of data/pointer returned
451                   by module, its type definition, and length
452                   (when applicable)]
453 
454  Pointers and Buffers Modified:
455     [variable_bfr_ptr] points to the [describe where the
456       variable_bfr_ptr points to, its type definition, and length
457       (when applicable)]
458     [variable_bfr] contents are [describe the new contents of
459       variable_bfr]
460 
461  Local Stores Modified:
462     [local_store_name] = [describe new contents, its type
463                   definition, and length (when applicable)]
464 
465  Global Stores Modified:
466     [global_store_name] = [describe new contents, its type
467                    definition, and length (when applicable)]
468 
469 ------------------------------------------------------------------------------
470  FUNCTION DESCRIPTION FOR idctrow
471 
472 ------------------------------------------------------------------------------
473  REQUIREMENTS FOR idctrow
474 
475 ------------------------------------------------------------------------------
476  REFERENCES FOR idctrow
477 
478 ------------------------------------------------------------------------------
479  PSEUDO-CODE FOR idctrow
480 
481 ------------------------------------------------------------------------------
482  RESOURCES USED FOR idctrow
483    When the code is written for a specific target processor the
484      the resources used should be documented below.
485 
486  STACK USAGE: [stack count for this module] + [variable to represent
487           stack usage for each subroutine called]
488 
489      where: [stack usage variable] = stack usage for [subroutine
490          name] (see [filename].ext)
491 
492  DATA MEMORY USED: x words
493 
494  PROGRAM MEMORY USED: x words
495 
496  CLOCK CYCLES: [cycle count equation for this module] + [variable
497            used to represent cycle count for each subroutine
498            called]
499 
500      where: [cycle count variable] = cycle count for [subroutine
501         name] (see [filename].ext)
502 
503 ------------------------------------------------------------------------------
504 */
505 
506 /*----------------------------------------------------------------------------
507 ; Function Code FOR idctrow
508 ----------------------------------------------------------------------------*/
509 __attribute__((no_sanitize("signed-integer-overflow")))
idctrow(int16 * blk,uint8 * pred,uint8 * dst,int width)510 void idctrow(
511     int16 *blk, uint8 *pred, uint8 *dst, int width
512 )
513 {
514     /*----------------------------------------------------------------------------
515     ; Define all local variables
516     ----------------------------------------------------------------------------*/
517     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
518     int i = 8;
519     uint32 pred_word, dst_word;
520     int res, res2;
521 
522     /*----------------------------------------------------------------------------
523     ; Function body here
524     ----------------------------------------------------------------------------*/
525     /* row (horizontal) IDCT
526     *
527     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
528     * ( k + - ) * l ) l=0                      8          2
529     *
530     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
531 
532     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
533     width -= 4;
534     dst -= width;
535     pred -= 12;
536     blk -= 8;
537 
538     while (i--)
539     {
540         x1 = (int32)blk[12] << 8;
541         blk[12] = 0;
542         x2 = blk[14];
543         blk[14] = 0;
544         x3 = blk[10];
545         blk[10] = 0;
546         x4 = blk[9];
547         blk[9] = 0;
548         x5 = blk[15];
549         blk[15] = 0;
550         x6 = blk[13];
551         blk[13] = 0;
552         x7 = blk[11];
553         blk[11] = 0;
554         x0 = ((*(blk += 8)) << 8) + 8192;
555         blk[0] = 0;   /* for proper rounding in the fourth stage */
556 
557         /* first stage */
558         x8 = W7 * (x4 + x5) + 4;
559         x4 = (x8 + (W1 - W7) * x4) >> 3;
560         x5 = (x8 - (W1 + W7) * x5) >> 3;
561         x8 = W3 * (x6 + x7) + 4;
562         x6 = (x8 - (W3 - W5) * x6) >> 3;
563         x7 = (x8 - (W3 + W5) * x7) >> 3;
564 
565         /* second stage */
566         x8 = x0 + x1;
567         x0 -= x1;
568         x1 = W6 * (x3 + x2) + 4;
569         x2 = (x1 - (W2 + W6) * x2) >> 3;
570         x3 = (x1 + (W2 - W6) * x3) >> 3;
571         x1 = x4 + x6;
572         x4 -= x6;
573         x6 = x5 + x7;
574         x5 -= x7;
575 
576         /* third stage */
577         x7 = x8 + x3;
578         x8 -= x3;
579         x3 = x0 + x2;
580         x0 -= x2;
581         x2 = (181 * (x4 + x5) + 128) >> 8;
582         x4 = (181 * (x4 - x5) + 128) >> 8;
583 
584         /* fourth stage */
585         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
586 
587         res = (x7 + x1) >> 14;
588         ADD_AND_CLIP1(res);
589         res2 = (x3 + x2) >> 14;
590         ADD_AND_CLIP2(res2);
591         dst_word = (res2 << 8) | res;
592         res = (x0 + x4) >> 14;
593         ADD_AND_CLIP3(res);
594         dst_word |= (res << 16);
595         res = (x8 + x6) >> 14;
596         ADD_AND_CLIP4(res);
597         dst_word |= (res << 24);
598         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
599 
600         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
601 
602         res = (x8 - x6) >> 14;
603         ADD_AND_CLIP1(res);
604         res2 = (x0 - x4) >> 14;
605         ADD_AND_CLIP2(res2);
606         dst_word = (res2 << 8) | res;
607         res = (x3 - x2) >> 14;
608         ADD_AND_CLIP3(res);
609         dst_word |= (res << 16);
610         res = (x7 - x1) >> 14;
611         ADD_AND_CLIP4(res);
612         dst_word |= (res << 24);
613         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
614     }
615     /*----------------------------------------------------------------------------
616     ; Return nothing or data or data pointer
617     ----------------------------------------------------------------------------*/
618     return;
619 }
620 
621 __attribute__((no_sanitize("signed-integer-overflow")))
idctrow_intra(int16 * blk,PIXEL * comp,int width)622 void idctrow_intra(
623     int16 *blk, PIXEL *comp, int width
624 )
625 {
626     /*----------------------------------------------------------------------------
627     ; Define all local variables
628     ----------------------------------------------------------------------------*/
629     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
630     int i = 8;
631     int offset = width;
632     int32 word;
633 
634     /*----------------------------------------------------------------------------
635     ; Function body here
636     ----------------------------------------------------------------------------*/
637     /* row (horizontal) IDCT
638     *
639     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
640     * ( k + - ) * l ) l=0                      8          2
641     *
642     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
643     while (i--)
644     {
645         x1 = (int32)blk[4] << 8;
646         blk[4] = 0;
647         x2 = blk[6];
648         blk[6] = 0;
649         x3 = blk[2];
650         blk[2] = 0;
651         x4 = blk[1];
652         blk[1] = 0;
653         x5 = blk[7];
654         blk[7] = 0;
655         x6 = blk[5];
656         blk[5] = 0;
657         x7 = blk[3];
658         blk[3] = 0;
659 #ifndef FAST_IDCT
660         /* shortcut */  /* covered by idctrow1  01/9/2001 */
661         if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
662         {
663             blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6;
664             return;
665         }
666 #endif
667         x0 = ((int32)blk[0] << 8) + 8192;
668         blk[0] = 0;  /* for proper rounding in the fourth stage */
669 
670         /* first stage */
671         x8 = W7 * (x4 + x5) + 4;
672         x4 = (x8 + (W1 - W7) * x4) >> 3;
673         x5 = (x8 - (W1 + W7) * x5) >> 3;
674         x8 = W3 * (x6 + x7) + 4;
675         x6 = (x8 - (W3 - W5) * x6) >> 3;
676         x7 = (x8 - (W3 + W5) * x7) >> 3;
677 
678         /* second stage */
679         x8 = x0 + x1;
680         x0 -= x1;
681         x1 = W6 * (x3 + x2) + 4;
682         x2 = (x1 - (W2 + W6) * x2) >> 3;
683         x3 = (x1 + (W2 - W6) * x3) >> 3;
684         x1 = x4 + x6;
685         x4 -= x6;
686         x6 = x5 + x7;
687         x5 -= x7;
688 
689         /* third stage */
690         x7 = x8 + x3;
691         x8 -= x3;
692         x3 = x0 + x2;
693         x0 -= x2;
694         x2 = (181 * (x4 + x5) + 128) >> 8;
695         x4 = (181 * (x4 - x5) + 128) >> 8;
696 
697         /* fourth stage */
698         word = ((x7 + x1) >> 14);
699         CLIP_RESULT(word)
700 
701         temp = ((x3 + x2) >> 14);
702         CLIP_RESULT(temp)
703         word = word | (temp << 8);
704 
705         temp = ((x0 + x4) >> 14);
706         CLIP_RESULT(temp)
707         word = word | (temp << 16);
708 
709         temp = ((x8 + x6) >> 14);
710         CLIP_RESULT(temp)
711         word = word | (temp << 24);
712         *((int32*)(comp)) = word;
713 
714         word = ((x8 - x6) >> 14);
715         CLIP_RESULT(word)
716 
717         temp = ((x0 - x4) >> 14);
718         CLIP_RESULT(temp)
719         word = word | (temp << 8);
720 
721         temp = ((x3 - x2) >> 14);
722         CLIP_RESULT(temp)
723         word = word | (temp << 16);
724 
725         temp = ((x7 - x1) >> 14);
726         CLIP_RESULT(temp)
727         word = word | (temp << 24);
728         *((int32*)(comp + 4)) = word;
729         comp += offset;
730 
731         blk += B_SIZE;
732     }
733     /*----------------------------------------------------------------------------
734     ; Return nothing or data or data pointer
735     ----------------------------------------------------------------------------*/
736     return;
737 }
738 
739 /*----------------------------------------------------------------------------
740 ; End Function: idctrow
741 ----------------------------------------------------------------------------*/
742 
743 
744 /****************************************************************************/
745 
746 /*
747 ------------------------------------------------------------------------------
748  FUNCTION NAME: idctcol
749 ------------------------------------------------------------------------------
750  INPUT AND OUTPUT DEFINITIONS FOR idctcol
751 
752  Inputs:
753     [input_variable_name] = [description of the input to module, its type
754                  definition, and length (when applicable)]
755 
756  Local Stores/Buffers/Pointers Needed:
757     [local_store_name] = [description of the local store, its type
758                   definition, and length (when applicable)]
759     [local_buffer_name] = [description of the local buffer, its type
760                    definition, and length (when applicable)]
761     [local_ptr_name] = [description of the local pointer, its type
762                 definition, and length (when applicable)]
763 
764  Global Stores/Buffers/Pointers Needed:
765     [global_store_name] = [description of the global store, its type
766                    definition, and length (when applicable)]
767     [global_buffer_name] = [description of the global buffer, its type
768                 definition, and length (when applicable)]
769     [global_ptr_name] = [description of the global pointer, its type
770                  definition, and length (when applicable)]
771 
772  Outputs:
773     [return_variable_name] = [description of data/pointer returned
774                   by module, its type definition, and length
775                   (when applicable)]
776 
777  Pointers and Buffers Modified:
778     [variable_bfr_ptr] points to the [describe where the
779       variable_bfr_ptr points to, its type definition, and length
780       (when applicable)]
781     [variable_bfr] contents are [describe the new contents of
782       variable_bfr]
783 
784  Local Stores Modified:
785     [local_store_name] = [describe new contents, its type
786                   definition, and length (when applicable)]
787 
788  Global Stores Modified:
789     [global_store_name] = [describe new contents, its type
790                    definition, and length (when applicable)]
791 
792 ------------------------------------------------------------------------------
793  FUNCTION DESCRIPTION FOR idctcol
794 
795 ------------------------------------------------------------------------------
796  REQUIREMENTS FOR idctcol
797 
798 ------------------------------------------------------------------------------
799  REFERENCES FOR idctcol
800 
801 ------------------------------------------------------------------------------
802  PSEUDO-CODE FOR idctcol
803 
804 ------------------------------------------------------------------------------
805  RESOURCES USED FOR idctcol
806    When the code is written for a specific target processor the
807      the resources used should be documented below.
808 
809  STACK USAGE: [stack count for this module] + [variable to represent
810           stack usage for each subroutine called]
811 
812      where: [stack usage variable] = stack usage for [subroutine
813          name] (see [filename].ext)
814 
815  DATA MEMORY USED: x words
816 
817  PROGRAM MEMORY USED: x words
818 
819  CLOCK CYCLES: [cycle count equation for this module] + [variable
820            used to represent cycle count for each subroutine
821            called]
822 
823      where: [cycle count variable] = cycle count for [subroutine
824         name] (see [filename].ext)
825 
826 ------------------------------------------------------------------------------
827 */
828 
829 /*----------------------------------------------------------------------------
830 ; Function Code FOR idctcol
831 ----------------------------------------------------------------------------*/
832 __attribute__((no_sanitize("signed-integer-overflow")))
idctcol(int16 * blk)833 void idctcol(
834     int16 *blk
835 )
836 {
837     /*----------------------------------------------------------------------------
838     ; Define all local variables
839     ----------------------------------------------------------------------------*/
840     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
841 
842     /*----------------------------------------------------------------------------
843     ; Function body here
844     ----------------------------------------------------------------------------*/
845     /* column (vertical) IDCT
846     *
847     * 7                         pi         1 dst[8*k] = sum c[l] * src[8*l] *
848     * cos( -- * ( k + - ) * l ) l=0                        8          2
849     *
850     * where: c[0]    = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
851     x1 = (int32)blk[32] << 11;
852     x2 = blk[48];
853     x3 = blk[16];
854     x4 = blk[8];
855     x5 = blk[56];
856     x6 = blk[40];
857     x7 = blk[24];
858 #ifndef FAST_IDCT
859     /* shortcut */        /* covered by idctcolumn1  01/9/2001 */
860     if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
861     {
862         blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56]
863                                               = blk[0] << 3;
864         return;
865     }
866 #endif
867 
868     x0 = ((int32)blk[0] << 11) + 128;
869 
870     /* first stage */
871     x8 = W7 * (x4 + x5);
872     x4 = x8 + (W1 - W7) * x4;
873     x5 = x8 - (W1 + W7) * x5;
874     x8 = W3 * (x6 + x7);
875     x6 = x8 - (W3 - W5) * x6;
876     x7 = x8 - (W3 + W5) * x7;
877 
878     /* second stage */
879     x8 = x0 + x1;
880     x0 -= x1;
881     x1 = W6 * (x3 + x2);
882     x2 = x1 - (W2 + W6) * x2;
883     x3 = x1 + (W2 - W6) * x3;
884     x1 = x4 + x6;
885     x4 -= x6;
886     x6 = x5 + x7;
887     x5 -= x7;
888 
889     /* third stage */
890     x7 = x8 + x3;
891     x8 -= x3;
892     x3 = x0 + x2;
893     x0 -= x2;
894     x2 = (181 * (x4 + x5) + 128) >> 8;
895     x4 = (181 * (x4 - x5) + 128) >> 8;
896 
897     /* fourth stage */
898     blk[0]    = (x7 + x1) >> 8;
899     blk[8] = (x3 + x2) >> 8;
900     blk[16] = (x0 + x4) >> 8;
901     blk[24] = (x8 + x6) >> 8;
902     blk[32] = (x8 - x6) >> 8;
903     blk[40] = (x0 - x4) >> 8;
904     blk[48] = (x3 - x2) >> 8;
905     blk[56] = (x7 - x1) >> 8;
906     /*----------------------------------------------------------------------------
907     ; Return nothing or data or data pointer
908     ----------------------------------------------------------------------------*/
909     return;
910 }
911 /*----------------------------------------------------------------------------
912 ;  End Function: idctcol
913 ----------------------------------------------------------------------------*/
914 
915