1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "mp4enc_lib.h"
19 #include "mp4lib_int.h"
20 #include "dct_inline.h"
21 
22 #define FDCT_SHIFT 10
23 
24 #ifdef __cplusplus
25 extern "C"
26 {
27 #endif
28 
29     /**************************************************************************/
30     /*  Function:   BlockDCT_AANwSub
31         Date:       7/31/01
32         Input:
33         Output:     out[64] ==> next block
34         Purpose:    Do subtraction for zero MV first
35         Modified:
36     **************************************************************************/
37 
BlockDCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)38     Void BlockDCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
39     {
40         Short *dst;
41         Int k0, k1, k2, k3, k4, k5, k6, k7;
42         Int round;
43         Int k12 = 0x022A02D4;
44         Int k14 = 0x0188053A;
45         Int abs_sum;
46         Int mask;
47         Int tmp, tmp2;
48         Int ColTh;
49 
50         dst = out + 64 ;
51         ColTh = *dst;
52         out += 128;
53         round = 1 << (FDCT_SHIFT - 1);
54 
55         do  /* fdct_nextrow */
56         {
57             /* assuming the block is word-aligned */
58             mask = 0x1FE;
59             tmp = *((Int*) cur);    /* contains 4 pixels */
60             tmp2 = *((Int*) pred); /* prediction 4 pixels */
61             k0 = tmp2 & 0xFF;
62             k1 = mask & (tmp << 1);
63             k0 = k1 - (k0 << 1);
64             k1 = (tmp2 >> 8) & 0xFF;
65             k2 = mask & (tmp >> 7);
66             k1 = k2 - (k1 << 1);
67             k2 = (tmp2 >> 16) & 0xFF;
68             k3 = mask & (tmp >> 15);
69             k2 = k3 - (k2 << 1);
70             k3 = (tmp2 >> 24) & 0xFF;
71             k4 = mask & (tmp >> 23);
72             k3 = k4 - (k3 << 1);
73             tmp = *((Int*)(cur + 4));   /* another 4 pixels */
74             tmp2 = *((Int*)(pred + 4));
75             k4 = tmp2 & 0xFF;
76             k5 = mask & (tmp << 1);
77             k4 = k5 - (k4 << 1);
78             k5 = (tmp2 >> 8) & 0xFF;
79             k6 = mask & (tmp >> 7);
80             k5 = k6 - (k5 << 1);
81             k6 = (tmp2 >> 16) & 0xFF;
82             k7 = mask & (tmp >> 15);
83             k6 = k7 - (k6 << 1);
84             k7 = (tmp2 >> 24) & 0xFF;
85             tmp = mask & (tmp >> 23);
86             k7 = tmp - (k7 << 1);
87             cur += width;
88             pred += 16;
89 
90             /* fdct_1 */
91             k0 = k0 + k7;
92             k7 = k0 - (k7 << 1);
93             k1 = k1 + k6;
94             k6 = k1 - (k6 << 1);
95             k2 = k2 + k5;
96             k5 = k2 - (k5 << 1);
97             k3 = k3 + k4;
98             k4 = k3 - (k4 << 1);
99 
100             k0 = k0 + k3;
101             k3 = k0 - (k3 << 1);
102             k1 = k1 + k2;
103             k2 = k1 - (k2 << 1);
104 
105             k0 = k0 + k1;
106             k1 = k0 - (k1 << 1);
107             /**********/
108             dst[0] = k0;
109             dst[4] = k1; /* col. 4 */
110             /* fdct_2 */
111             k4 = k4 + k5;
112             k5 = k5 + k6;
113             k6 = k6 + k7;
114             k2 = k2 + k3;
115             /* MUL2C k2,k5,724,FDCT_SHIFT */
116             /* k0, k1 become scratch */
117             /* assume FAST MULTIPLY */
118             k1 = mla724(k12, k5, round);
119             k0 = mla724(k12, k2, round);
120 
121             k5 = k1 >> FDCT_SHIFT;
122             k2 = k0 >> FDCT_SHIFT;
123             /*****************/
124             k2 = k2 + k3;
125             k3 = (k3 << 1) - k2;
126             /********/
127             dst[2] = k2;        /* col. 2 */
128             k3 <<= 1;       /* scale up col. 6 */
129             dst[6] = k3; /* col. 6 */
130             /* fdct_3 */
131             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
132             /* assume FAST MULTIPLY */
133             /* k0, k1 are output */
134             k0 = k4 - k6;
135 
136             k1 = mla392(k0, k14, round);
137             k0 = mla554(k4, k12, k1);
138             k1 = mla1338(k6, k14, k1);
139 
140             k4 = k0 >> FDCT_SHIFT;
141             k6 = k1 >> FDCT_SHIFT;
142             /***********************/
143             k5 = k5 + k7;
144             k7 = (k7 << 1) - k5;
145             k4 = k4 + k7;
146             k7 = (k7 << 1) - k4;
147             k5 = k5 + k6;
148             k4 <<= 1;       /* scale up col.5 */
149             k6 = k5 - (k6 << 1);
150             /********/
151             dst[5] = k4;    /* col. 5 */
152             k6 <<= 2;       /* scale up col. 7 */
153             dst[1] = k5;    /* col. 1 */
154             dst[7] = k6;    /* col. 7 */
155             dst[3] = k7;    /* col. 3 */
156             dst += 8;
157         }
158         while (dst < out);
159 
160         out -= 64;
161         dst = out + 8;
162 
163         /*  Vertical Block Loop  */
164         do  /* Vertical 8xDCT loop */
165         {
166             k0 = out[0];
167             k1 = out[8];
168             k2 = out[16];
169             k3 = out[24];
170             k4 = out[32];
171             k5 = out[40];
172             k6 = out[48];
173             k7 = out[56];
174             /* deadzone thresholding for column */
175 
176             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
177 
178             if (abs_sum < ColTh)
179             {
180                 out[0] = 0x7fff;
181                 out++;
182                 continue;
183             }
184 
185             /* fdct_1 */
186             k0 = k0 + k7;
187             k7 = k0 - (k7 << 1);
188             k1 = k1 + k6;
189             k6 = k1 - (k6 << 1);
190             k2 = k2 + k5;
191             k5 = k2 - (k5 << 1);
192             k3 = k3 + k4;
193             k4 = k3 - (k4 << 1);
194 
195             k0 = k0 + k3;
196             k3 = k0 - (k3 << 1);
197             k1 = k1 + k2;
198             k2 = k1 - (k2 << 1);
199 
200             k0 = k0 + k1;
201             k1 = k0 - (k1 << 1);
202             /**********/
203             out[32] = k1; /* row 4 */
204             out[0] = k0; /* row 0 */
205             /* fdct_2 */
206             k4 = k4 + k5;
207             k5 = k5 + k6;
208             k6 = k6 + k7;
209             k2 = k2 + k3;
210             /* MUL2C k2,k5,724,FDCT_SHIFT */
211             /* k0, k1 become scratch */
212             /* assume FAST MULTIPLY */
213             k1 = mla724(k12, k5, round);
214             k0 = mla724(k12, k2, round);
215 
216             k5 = k1 >> FDCT_SHIFT;
217             k2 = k0 >> FDCT_SHIFT;
218             /*****************/
219             k2 = k2 + k3;
220             k3 = (k3 << 1) - k2;
221             k3 <<= 1;       /* scale up col. 6 */
222             /********/
223             out[48] = k3;   /* row 6 */
224             out[16] = k2;   /* row 2 */
225             /* fdct_3 */
226             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
227             /* assume FAST MULTIPLY */
228             /* k0, k1 are output */
229             k0 = k4 - k6;
230 
231             k1 = mla392(k0, k14, round);
232             k0 = mla554(k4, k12, k1);
233             k1 = mla1338(k6, k14, k1);
234 
235             k4 = k0 >> FDCT_SHIFT;
236             k6 = k1 >> FDCT_SHIFT;
237             /***********************/
238             k5 = k5 + k7;
239             k7 = (k7 << 1) - k5;
240             k4 = k4 + k7;
241             k7 = (k7 << 1) - k4;
242             k5 = k5 + k6;
243             k4 <<= 1;       /* scale up col. 5 */
244             k6 = k5 - (k6 << 1);
245             /********/
246             out[24] = k7 ;    /* row 3 */
247             k6 <<= 2;       /* scale up col. 7 */
248             out[56] = k6 ;   /* row 7 */
249             out[8] = k5 ;    /* row 1 */
250             out[40] = k4 ;   /* row 5 */
251             out++;
252         }
253         while ((uintptr_t)out < (uintptr_t)dst) ;
254 
255         return ;
256     }
257 
258     /**************************************************************************/
259     /*  Function:   Block4x4DCT_AANwSub
260         Date:       7/31/01
261         Input:
262         Output:     out[64] ==> next block
263         Purpose:    Do subtraction for zero MV first before 4x4 DCT
264         Modified:
265     **************************************************************************/
266 
Block4x4DCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)267     Void Block4x4DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
268     {
269         Short *dst;
270         Int k0, k1, k2, k3, k4, k5, k6, k7;
271         Int round;
272         Int k12 = 0x022A02D4;
273         Int k14 = 0x0188053A;
274         Int mask;
275         Int tmp, tmp2;
276         Int abs_sum;
277         Int ColTh;
278 
279         dst = out + 64 ;
280         ColTh = *dst;
281         out += 128;
282         round = 1 << (FDCT_SHIFT - 1);
283 
284         do  /* fdct_nextrow */
285         {
286             /* assuming the block is word-aligned */
287             mask = 0x1FE;
288             tmp = *((Int*) cur);    /* contains 4 pixels */
289             tmp2 = *((Int*) pred); /* prediction 4 pixels */
290             k0 = tmp2 & 0xFF;
291             k1 = mask & (tmp << 1);
292             k0 = k1 - (k0 << 1);
293             k1 = (tmp2 >> 8) & 0xFF;
294             k2 = mask & (tmp >> 7);
295             k1 = k2 - (k1 << 1);
296             k2 = (tmp2 >> 16) & 0xFF;
297             k3 = mask & (tmp >> 15);
298             k2 = k3 - (k2 << 1);
299             k3 = (tmp2 >> 24) & 0xFF;
300             k4 = mask & (tmp >> 23);
301             k3 = k4 - (k3 << 1);
302             tmp = *((Int*)(cur + 4));   /* another 4 pixels */
303             tmp2 = *((Int*)(pred + 4));
304             k4 = tmp2 & 0xFF;
305             k5 = mask & (tmp << 1);
306             k4 = k5 - (k4 << 1);
307             k5 = (tmp2 >> 8) & 0xFF;
308             k6 = mask & (tmp >> 7);
309             k5 = k6 - (k5 << 1);
310             k6 = (tmp2 >> 16) & 0xFF;
311             k7 = mask & (tmp >> 15);
312             k6 = k7 - (k6 << 1);
313             k7 = (tmp2 >> 24) & 0xFF;
314             tmp = mask & (tmp >> 23);
315             k7 = tmp - (k7 << 1);
316             cur += width;
317             pred += 16;
318 
319             /* fdct_1 */
320             k0 = k0 + k7;
321             k7 = k0 - (k7 << 1);
322             k1 = k1 + k6;
323             k6 = k1 - (k6 << 1);
324             k2 = k2 + k5;
325             k5 = k2 - (k5 << 1);
326             k3 = k3 + k4;
327             k4 = k3 - (k4 << 1);
328 
329             k0 = k0 + k3;
330             k3 = k0 - (k3 << 1);
331             k1 = k1 + k2;
332             k2 = k1 - (k2 << 1);
333 
334             k0 = k0 + k1;
335             /**********/
336             dst[0] = k0;
337             /* fdct_2 */
338             k4 = k4 + k5;
339             k5 = k5 + k6;
340             k6 = k6 + k7;
341             k2 = k2 + k3;
342             /* MUL2C k2,k5,724,FDCT_SHIFT */
343             /* k0, k1 become scratch */
344             /* assume FAST MULTIPLY */
345             k1 = mla724(k12, k5, round);
346             k0 = mla724(k12, k2, round);
347 
348             k5 = k1 >> FDCT_SHIFT;
349             k2 = k0 >> FDCT_SHIFT;
350             /*****************/
351             k2 = k2 + k3;
352             /********/
353             dst[2] = k2;        /* col. 2 */
354             /* fdct_3 */
355             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
356             /* assume FAST MULTIPLY */
357             /* k0, k1 are output */
358             k0 = k4 - k6;
359 
360             k1 = mla392(k0, k14, round);
361             k0 = mla554(k4, k12, k1);
362             k1 = mla1338(k6, k14, k1);
363 
364             k4 = k0 >> FDCT_SHIFT;
365             k6 = k1 >> FDCT_SHIFT;
366             /***********************/
367             k5 = k5 + k7;
368             k7 = (k7 << 1) - k5;
369             k7 = k7 - k4;
370             k5 = k5 + k6;
371             /********/
372             dst[1] = k5;        /* col. 1 */
373             dst[3] = k7;        /* col. 3 */
374             dst += 8;
375         }
376         while (dst < out);
377 
378         out -= 64;
379         dst = out + 4;
380 
381         /*  Vertical Block Loop  */
382         do  /* Vertical 8xDCT loop */
383         {
384             k0 = out[0];
385             k1 = out[8];
386             k2 = out[16];
387             k3 = out[24];
388             k4 = out[32];
389             k5 = out[40];
390             k6 = out[48];
391             k7 = out[56];
392 
393             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
394 
395             if (abs_sum < ColTh)
396             {
397                 out[0] = 0x7fff;
398                 out++;
399                 continue;
400             }
401             /* fdct_1 */
402             k0 = k0 + k7;
403             k7 = k0 - (k7 << 1);
404             k1 = k1 + k6;
405             k6 = k1 - (k6 << 1);
406             k2 = k2 + k5;
407             k5 = k2 - (k5 << 1);
408             k3 = k3 + k4;
409             k4 = k3 - (k4 << 1);
410 
411             k0 = k0 + k3;
412             k3 = k0 - (k3 << 1);
413             k1 = k1 + k2;
414             k2 = k1 - (k2 << 1);
415 
416             k0 = k0 + k1;
417             /**********/
418             out[0] = k0;   /* row 0 */
419             /* fdct_2 */
420             k4 = k4 + k5;
421             k5 = k5 + k6;
422             k6 = k6 + k7;
423             k2 = k2 + k3;
424             /* MUL2C k2,k5,724,FDCT_SHIFT */
425             /* k0, k1 become scratch */
426             /* assume FAST MULTIPLY */
427             k1 = mla724(k12, k5, round);
428             k0 = mla724(k12, k2, round);
429 
430             k5 = k1 >> FDCT_SHIFT;
431             k2 = k0 >> FDCT_SHIFT;
432             /*****************/
433             k2 = k2 + k3;
434             /********/
435             out[16] = k2;           /* row 2 */
436             /* fdct_3 */
437             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
438             /* assume FAST MULTIPLY */
439             /* k0, k1 are output */
440             k0 = k4 - k6;
441 
442             k1 = mla392(k0, k14, round);
443             k0 = mla554(k4, k12, k1);
444             k1 = mla1338(k6, k14, k1);
445 
446             k4 = k0 >> FDCT_SHIFT;
447             k6 = k1 >> FDCT_SHIFT;
448             /***********************/
449             k5 = k5 + k7;
450             k7 = (k7 << 1) - k5;
451             k7 = k7 - k4 ;
452             k5 = k5 + k6;
453             /********/
454             out[24] = k7 ;      /* row 3 */
455             out[8] = k5 ;       /* row 1 */
456             out++;
457         }
458         while ((uintptr_t)out < (uintptr_t)dst) ;
459 
460         return ;
461     }
462 
463     /**************************************************************************/
464     /*  Function:   Block2x2DCT_AANwSub
465         Date:       7/31/01
466         Input:
467         Output:     out[64] ==> next block
468         Purpose:    Do subtraction for zero MV first before 2x2 DCT
469         Modified:
470     **************************************************************************/
471 
472 
Block2x2DCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)473     Void Block2x2DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
474     {
475         Short *dst;
476         Int k0, k1, k2, k3, k4, k5, k6, k7;
477         Int round;
478         Int k12 = 0x022A02D4;
479         Int k14 = 0x018803B2;
480         Int mask;
481         Int tmp, tmp2;
482         Int abs_sum;
483         Int ColTh;
484 
485         dst = out + 64 ;
486         ColTh = *dst;
487         out += 128;
488         round = 1 << (FDCT_SHIFT - 1);
489 
490         do  /* fdct_nextrow */
491         {
492             /* assuming the block is word-aligned */
493             mask = 0x1FE;
494             tmp = *((Int*) cur);    /* contains 4 pixels */
495             tmp2 = *((Int*) pred); /* prediction 4 pixels */
496             k0 = tmp2 & 0xFF;
497             k1 = mask & (tmp << 1);
498             k0 = k1 - (k0 << 1);
499             k1 = (tmp2 >> 8) & 0xFF;
500             k2 = mask & (tmp >> 7);
501             k1 = k2 - (k1 << 1);
502             k2 = (tmp2 >> 16) & 0xFF;
503             k3 = mask & (tmp >> 15);
504             k2 = k3 - (k2 << 1);
505             k3 = (tmp2 >> 24) & 0xFF;
506             k4 = mask & (tmp >> 23);
507             k3 = k4 - (k3 << 1);
508             tmp = *((Int*)(cur + 4));   /* another 4 pixels */
509             tmp2 = *((Int*)(pred + 4));
510             k4 = tmp2 & 0xFF;
511             k5 = mask & (tmp << 1);
512             k4 = k5 - (k4 << 1);
513             k5 = (tmp2 >> 8) & 0xFF;
514             k6 = mask & (tmp >> 7);
515             k5 = k6 - (k5 << 1);
516             k6 = (tmp2 >> 16) & 0xFF;
517             k7 = mask & (tmp >> 15);
518             k6 = k7 - (k6 << 1);
519             k7 = (tmp2 >> 24) & 0xFF;
520             tmp = mask & (tmp >> 23);
521             k7 = tmp - (k7 << 1);
522             cur += width;
523             pred += 16;
524 
525             /* fdct_1 */
526             k0 = k0 + k7;
527             k7 = k0 - (k7 << 1);
528             k1 = k1 + k6;
529             k6 = k1 - (k6 << 1);
530             k2 = k2 + k5;
531             k5 = k2 - (k5 << 1);
532             k3 = k3 + k4;
533             k4 = k3 - (k4 << 1);
534 
535             k0 = k0 + k3;
536             k3 = k0 - (k3 << 1);
537             k1 = k1 + k2;
538             k2 = k1 - (k2 << 1);
539 
540             k0 = k0 + k1;
541             /**********/
542             dst[0] = k0;
543             /* fdct_2 */
544             k4 = k4 + k5;
545             k5 = k5 + k6;
546             k6 = k6 + k7;
547             /* MUL2C k2,k5,724,FDCT_SHIFT */
548             /* k0, k1 become scratch */
549             /* assume FAST MULTIPLY */
550             k1 = mla724(k12, k5, round);
551 
552             k5 = k1 >> FDCT_SHIFT;
553             /*****************/
554             /********/
555             /* fdct_3 */
556             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
557             /* assume FAST MULTIPLY */
558             /* k0, k1 are output */
559             k1 = mla392(k4, k14, round);
560             k1 = mla946(k6, k14, k1);
561 
562             k6 = k1 >> FDCT_SHIFT;
563             /***********************/
564             k5 = k5 + k7;
565             k5 = k5 + k6;
566             /********/
567             dst[1] = k5;
568             dst += 8;
569         }
570         while (dst < out);
571         out -= 64;
572         dst = out + 2;
573         /*  Vertical Block Loop  */
574         do  /* Vertical 8xDCT loop */
575         {
576             k0 = out[0];
577             k1 = out[8];
578             k2 = out[16];
579             k3 = out[24];
580             k4 = out[32];
581             k5 = out[40];
582             k6 = out[48];
583             k7 = out[56];
584 
585             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
586 
587             if (abs_sum < ColTh)
588             {
589                 out[0] = 0x7fff;
590                 out++;
591                 continue;
592             }
593             /* fdct_1 */
594             k0 = k0 + k7;
595             k7 = k0 - (k7 << 1);
596             k1 = k1 + k6;
597             k6 = k1 - (k6 << 1);
598             k2 = k2 + k5;
599             k5 = k2 - (k5 << 1);
600             k3 = k3 + k4;
601             k4 = k3 - (k4 << 1);
602 
603             k0 = k0 + k3;
604             k3 = k0 - (k3 << 1);
605             k1 = k1 + k2;
606             k2 = k1 - (k2 << 1);
607 
608             k0 = k0 + k1;
609             /**********/
610             out[0] = k0;        /* row 0 */
611             /* fdct_2 */
612             k4 = k4 + k5;
613             k5 = k5 + k6;
614             k6 = k6 + k7;
615             /* MUL2C k2,k5,724,FDCT_SHIFT */
616             /* k0, k1 become scratch */
617             /* assume FAST MULTIPLY */
618             k1 = mla724(k12, k5, round);
619 
620             k5 = k1 >> FDCT_SHIFT;
621             /*****************/
622             /********/
623             /* fdct_3 */
624             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
625             /* assume FAST MULTIPLY */
626             /* k0, k1 are output */
627             k1 = mla392(k4, k14, round);
628             k1 = mla946(k6, k14, k1);
629 
630             k6 = k1 >> FDCT_SHIFT;
631             /***********************/
632             k5 = k5 + k7;
633             k5 = k5 + k6;
634             /********/
635             out[8] = k5 ;       /* row 1 */
636             out++;
637         }
638         while ((uintptr_t)out < (uintptr_t)dst) ;
639 
640         return ;
641     }
642 
643     /**************************************************************************/
644     /*  Function:   BlockDCT_AANIntra
645         Date:       8/9/01
646         Input:      rec
647         Output:     out[64] ==> next block
648         Purpose:    Input directly from rec frame.
649         Modified:
650     **************************************************************************/
651 
BlockDCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)652     Void BlockDCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
653     {
654         Short *dst;
655         Int k0, k1, k2, k3, k4, k5, k6, k7;
656         Int round;
657         Int k12 = 0x022A02D4;
658         Int k14 = 0x0188053A;
659         Int abs_sum;
660         Int mask;
661         Int *curInt, tmp;
662         Int ColTh;
663 
664         OSCL_UNUSED_ARG(dummy2);
665 
666         dst = out + 64 ;
667         ColTh = *dst;
668         out += 128;
669         round = 1 << (FDCT_SHIFT - 1);
670 
671         do  /* fdct_nextrow */
672         {
673             mask = 0x1FE;
674             curInt = (Int*) cur;
675             tmp = curInt[0];    /* contains 4 pixels */
676             k0 = mask & (tmp << 1);
677             k1 = mask & (tmp >> 7);
678             k2 = mask & (tmp >> 15);
679             k3 = mask & (tmp >> 23);
680             tmp = curInt[1];    /* another 4 pixels */
681             k4 =  mask & (tmp << 1);
682             k5 =  mask & (tmp >> 7);
683             k6 =  mask & (tmp >> 15);
684             k7 =  mask & (tmp >> 23);
685             cur += width;
686             /* fdct_1 */
687             k0 = k0 + k7;
688             k7 = k0 - (k7 << 1);
689             k1 = k1 + k6;
690             k6 = k1 - (k6 << 1);
691             k2 = k2 + k5;
692             k5 = k2 - (k5 << 1);
693             k3 = k3 + k4;
694             k4 = k3 - (k4 << 1);
695 
696             k0 = k0 + k3;
697             k3 = k0 - (k3 << 1);
698             k1 = k1 + k2;
699             k2 = k1 - (k2 << 1);
700 
701             k0 = k0 + k1;
702             k1 = k0 - (k1 << 1);
703             /**********/
704             dst[0] = k0;
705             dst[4] = k1; /* col. 4 */
706             /* fdct_2 */
707             k4 = k4 + k5;
708             k5 = k5 + k6;
709             k6 = k6 + k7;
710             k2 = k2 + k3;
711             /* MUL2C k2,k5,724,FDCT_SHIFT */
712             /* k0, k1 become scratch */
713             /* assume FAST MULTIPLY */
714             k1 = mla724(k12, k5, round);
715             k0 = mla724(k12, k2, round);
716 
717             k5 = k1 >> FDCT_SHIFT;
718             k2 = k0 >> FDCT_SHIFT;
719             /*****************/
720             k2 = k2 + k3;
721             k3 = (k3 << 1) - k2;
722             /********/
723             dst[2] = k2;        /* col. 2 */
724             k3 <<= 1;       /* scale up col. 6 */
725             dst[6] = k3; /* col. 6 */
726             /* fdct_3 */
727             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
728             /* assume FAST MULTIPLY */
729             /* k0, k1 are output */
730             k0 = k4 - k6;
731 
732             k1 = mla392(k0, k14, round);
733             k0 = mla554(k4, k12, k1);
734             k1 = mla1338(k6, k14, k1);
735 
736             k4 = k0 >> FDCT_SHIFT;
737             k6 = k1 >> FDCT_SHIFT;
738             /***********************/
739             k5 = k5 + k7;
740             k7 = (k7 << 1) - k5;
741             k4 = k4 + k7;
742             k7 = (k7 << 1) - k4;
743             k5 = k5 + k6;
744             k4 <<= 1;       /* scale up col.5 */
745             k6 = k5 - (k6 << 1);
746             /********/
747             dst[5] = k4;    /* col. 5 */
748             k6 <<= 2;       /* scale up col. 7 */
749             dst[1] = k5;    /* col. 1 */
750             dst[7] = k6;    /* col. 7 */
751             dst[3] = k7;    /* col. 3 */
752             dst += 8;
753         }
754         while (dst < out);
755 
756         out -= 64;
757         dst = out + 8;
758 
759         /*  Vertical Block Loop  */
760         do  /* Vertical 8xDCT loop */
761         {
762             k0 = out[0];
763             k1 = out[8];
764             k2 = out[16];
765             k3 = out[24];
766             k4 = out[32];
767             k5 = out[40];
768             k6 = out[48];
769             k7 = out[56];
770             /* deadzone thresholding for column */
771 
772             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
773 
774             if (abs_sum < ColTh)
775             {
776                 out[0] = 0x7fff;
777                 out++;
778                 continue;
779             }
780 
781             /* fdct_1 */
782             k0 = k0 + k7;
783             k7 = k0 - (k7 << 1);
784             k1 = k1 + k6;
785             k6 = k1 - (k6 << 1);
786             k2 = k2 + k5;
787             k5 = k2 - (k5 << 1);
788             k3 = k3 + k4;
789             k4 = k3 - (k4 << 1);
790 
791             k0 = k0 + k3;
792             k3 = k0 - (k3 << 1);
793             k1 = k1 + k2;
794             k2 = k1 - (k2 << 1);
795 
796             k0 = k0 + k1;
797             k1 = k0 - (k1 << 1);
798             /**********/
799             out[32] = k1; /* row 4 */
800             out[0] = k0; /* row 0 */
801             /* fdct_2 */
802             k4 = k4 + k5;
803             k5 = k5 + k6;
804             k6 = k6 + k7;
805             k2 = k2 + k3;
806             /* MUL2C k2,k5,724,FDCT_SHIFT */
807             /* k0, k1 become scratch */
808             /* assume FAST MULTIPLY */
809             k1 = mla724(k12, k5, round);
810             k0 = mla724(k12, k2, round);
811 
812             k5 = k1 >> FDCT_SHIFT;
813             k2 = k0 >> FDCT_SHIFT;
814             /*****************/
815             k2 = k2 + k3;
816             k3 = (k3 << 1) - k2;
817             k3 <<= 1;       /* scale up col. 6 */
818             /********/
819             out[48] = k3;   /* row 6 */
820             out[16] = k2;   /* row 2 */
821             /* fdct_3 */
822             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
823             /* assume FAST MULTIPLY */
824             /* k0, k1 are output */
825             k0 = k4 - k6;
826 
827             k1 = mla392(k0, k14, round);
828             k0 = mla554(k4, k12, k1);
829             k1 = mla1338(k6, k14, k1);
830 
831             k4 = k0 >> FDCT_SHIFT;
832             k6 = k1 >> FDCT_SHIFT;
833             /***********************/
834             k5 = k5 + k7;
835             k7 = (k7 << 1) - k5;
836             k4 = k4 + k7;
837             k7 = (k7 << 1) - k4;
838             k5 = k5 + k6;
839             k4 <<= 1;       /* scale up col. 5 */
840             k6 = k5 - (k6 << 1);
841             /********/
842             out[24] = k7 ;    /* row 3 */
843             k6 <<= 2;       /* scale up col. 7 */
844             out[56] = k6 ;   /* row 7 */
845             out[8] = k5 ;    /* row 1 */
846             out[40] = k4 ;   /* row 5 */
847             out++;
848         }
849         while ((uintptr_t)out < (uintptr_t)dst) ;
850 
851         return ;
852     }
853 
854     /**************************************************************************/
855     /*  Function:   Block4x4DCT_AANIntra
856         Date:       8/9/01
857         Input:      prev
858         Output:     out[64] ==> next block
859         Purpose:    Input directly from prev frame. output 2x2 DCT
860         Modified:
861     **************************************************************************/
862 
Block4x4DCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)863     Void Block4x4DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
864     {
865         Short *dst;
866         Int k0, k1, k2, k3, k4, k5, k6, k7;
867         Int round;
868         Int k12 = 0x022A02D4;
869         Int k14 = 0x0188053A;
870         Int mask;
871         Int *curInt, tmp;
872         Int abs_sum;
873         Int ColTh;
874 
875         OSCL_UNUSED_ARG(dummy2);
876 
877         dst = out + 64 ;
878         ColTh = *dst;
879         out += 128;
880         round = 1 << (FDCT_SHIFT - 1);
881 
882         do  /* fdct_nextrow */
883         {
884             mask = 0x1FE;
885             curInt = (Int*) cur;
886             tmp = curInt[0];    /* contains 4 pixels */
887             k0 = mask & (tmp << 1);
888             k1 = mask & (tmp >> 7);
889             k2 = mask & (tmp >> 15);
890             k3 = mask & (tmp >> 23);
891             tmp = curInt[1];    /* another 4 pixels */
892             k4 =  mask & (tmp << 1);
893             k5 =  mask & (tmp >> 7);
894             k6 =  mask & (tmp >> 15);
895             k7 =  mask & (tmp >> 23);
896             cur += width;
897             /* fdct_1 */
898             k0 = k0 + k7;
899             k7 = k0 - (k7 << 1);
900             k1 = k1 + k6;
901             k6 = k1 - (k6 << 1);
902             k2 = k2 + k5;
903             k5 = k2 - (k5 << 1);
904             k3 = k3 + k4;
905             k4 = k3 - (k4 << 1);
906 
907             k0 = k0 + k3;
908             k3 = k0 - (k3 << 1);
909             k1 = k1 + k2;
910             k2 = k1 - (k2 << 1);
911 
912             k0 = k0 + k1;
913             /**********/
914             dst[0] = k0;
915             /* fdct_2 */
916             k4 = k4 + k5;
917             k5 = k5 + k6;
918             k6 = k6 + k7;
919             k2 = k2 + k3;
920             /* MUL2C k2,k5,724,FDCT_SHIFT */
921             /* k0, k1 become scratch */
922             /* assume FAST MULTIPLY */
923             k1 = mla724(k12, k5, round);
924             k0 = mla724(k12, k2, round);
925 
926             k5 = k1 >> FDCT_SHIFT;
927             k2 = k0 >> FDCT_SHIFT;
928             /*****************/
929             k2 = k2 + k3;
930             /********/
931             dst[2] = k2;        /* col. 2 */
932             /* fdct_3 */
933             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
934             /* assume FAST MULTIPLY */
935             /* k0, k1 are output */
936             k0 = k4 - k6;
937 
938             k1 = mla392(k0, k14, round);
939             k0 = mla554(k4, k12, k1);
940             k1 = mla1338(k6, k14, k1);
941 
942             k4 = k0 >> FDCT_SHIFT;
943             k6 = k1 >> FDCT_SHIFT;
944             /***********************/
945             k5 = k5 + k7;
946             k7 = (k7 << 1) - k5;
947             k7 = k7 - k4;
948             k5 = k5 + k6;
949             /********/
950             dst[1] = k5;        /* col. 1 */
951             dst[3] = k7;        /* col. 3 */
952             dst += 8;
953         }
954         while (dst < out);
955 
956         out -= 64;
957         dst = out + 4;
958 
959         /*  Vertical Block Loop  */
960         do  /* Vertical 8xDCT loop */
961         {
962             k0 = out[0];
963             k1 = out[8];
964             k2 = out[16];
965             k3 = out[24];
966             k4 = out[32];
967             k5 = out[40];
968             k6 = out[48];
969             k7 = out[56];
970 
971             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
972 
973             if (abs_sum < ColTh)
974             {
975                 out[0] = 0x7fff;
976                 out++;
977                 continue;
978             }
979             /* fdct_1 */
980             k0 = k0 + k7;
981             k7 = k0 - (k7 << 1);
982             k1 = k1 + k6;
983             k6 = k1 - (k6 << 1);
984             k2 = k2 + k5;
985             k5 = k2 - (k5 << 1);
986             k3 = k3 + k4;
987             k4 = k3 - (k4 << 1);
988 
989             k0 = k0 + k3;
990             k3 = k0 - (k3 << 1);
991             k1 = k1 + k2;
992             k2 = k1 - (k2 << 1);
993 
994             k0 = k0 + k1;
995             /**********/
996             out[0] = k0;   /* row 0 */
997             /* fdct_2 */
998             k4 = k4 + k5;
999             k5 = k5 + k6;
1000             k6 = k6 + k7;
1001             k2 = k2 + k3;
1002             /* MUL2C k2,k5,724,FDCT_SHIFT */
1003             /* k0, k1 become scratch */
1004             /* assume FAST MULTIPLY */
1005             k1 = mla724(k12, k5, round);
1006             k0 = mla724(k12, k2, round);
1007 
1008             k5 = k1 >> FDCT_SHIFT;
1009             k2 = k0 >> FDCT_SHIFT;
1010             /*****************/
1011             k2 = k2 + k3;
1012             /********/
1013             out[16] = k2;           /* row 2 */
1014             /* fdct_3 */
1015             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
1016             /* assume FAST MULTIPLY */
1017             /* k0, k1 are output */
1018             k0 = k4 - k6;
1019 
1020             k1 = mla392(k0, k14, round);
1021             k0 = mla554(k4, k12, k1);
1022             k1 = mla1338(k6, k14, k1);
1023 
1024             k4 = k0 >> FDCT_SHIFT;
1025             k6 = k1 >> FDCT_SHIFT;
1026             /***********************/
1027             k5 = k5 + k7;
1028             k7 = (k7 << 1) - k5;
1029             k7 = k7 - k4 ;
1030             k5 = k5 + k6;
1031             /********/
1032             out[24] = k7 ;      /* row 3 */
1033             out[8] = k5 ;       /* row 1 */
1034             out++;
1035         }
1036         while ((uintptr_t)out < (uintptr_t)dst) ;
1037 
1038         return ;
1039     }
1040 
1041     /**************************************************************************/
1042     /*  Function:   Block2x2DCT_AANIntra
1043         Date:       8/9/01
1044         Input:      prev
1045         Output:     out[64] ==> next block
1046         Purpose:    Input directly from prev frame. output 2x2 DCT
1047         Modified:
1048     **************************************************************************/
1049 
Block2x2DCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)1050     Void Block2x2DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
1051     {
1052         Short *dst;
1053         Int k0, k1, k2, k3, k4, k5, k6, k7;
1054         Int round;
1055         Int k12 = 0x022A02D4;
1056         Int k14 = 0x018803B2;
1057         Int mask;
1058         Int *curInt, tmp;
1059         Int abs_sum;
1060         Int ColTh;
1061 
1062         OSCL_UNUSED_ARG(dummy2);
1063 
1064         dst = out + 64 ;
1065         ColTh = *dst;
1066         out += 128;
1067         round = 1 << (FDCT_SHIFT - 1);
1068 
1069         do  /* fdct_nextrow */
1070         {
1071             mask = 0x1FE;
1072             curInt = (Int*) cur;
1073             tmp = curInt[0];    /* contains 4 pixels */
1074             k0 = mask & (tmp << 1);
1075             k1 = mask & (tmp >> 7);
1076             k2 = mask & (tmp >> 15);
1077             k3 = mask & (tmp >> 23);
1078             tmp = curInt[1];    /* another 4 pixels */
1079             k4 =  mask & (tmp << 1);
1080             k5 =  mask & (tmp >> 7);
1081             k6 =  mask & (tmp >> 15);
1082             k7 =  mask & (tmp >> 23);
1083             cur += width;
1084 
1085             /* fdct_1 */
1086             k0 = k0 + k7;
1087             k7 = k0 - (k7 << 1);
1088             k1 = k1 + k6;
1089             k6 = k1 - (k6 << 1);
1090             k2 = k2 + k5;
1091             k5 = k2 - (k5 << 1);
1092             k3 = k3 + k4;
1093             k4 = k3 - (k4 << 1);
1094 
1095             k0 = k0 + k3;
1096             k3 = k0 - (k3 << 1);
1097             k1 = k1 + k2;
1098             k2 = k1 - (k2 << 1);
1099 
1100             k0 = k0 + k1;
1101             /**********/
1102             dst[0] = k0;
1103             /* fdct_2 */
1104             k4 = k4 + k5;
1105             k5 = k5 + k6;
1106             k6 = k6 + k7;
1107             /* MUL2C k2,k5,724,FDCT_SHIFT */
1108             /* k0, k1 become scratch */
1109             /* assume FAST MULTIPLY */
1110             k1 = mla724(k12, k5, round);
1111 
1112             k5 = k1 >> FDCT_SHIFT;
1113             /*****************/
1114             /********/
1115             /* fdct_3 */
1116             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
1117             /* assume FAST MULTIPLY */
1118             /* k0, k1 are output */
1119             k1 = mla392(k4, k14, round);
1120             k1 = mla946(k6, k14, k1);
1121 
1122             k6 = k1 >> FDCT_SHIFT;
1123             /***********************/
1124             k5 = k5 + k7;
1125             k5 = k5 + k6;
1126             /********/
1127             dst[1] = k5;
1128             dst += 8;
1129         }
1130         while (dst < out);
1131         out -= 64;
1132         dst = out + 2;
1133         /*  Vertical Block Loop  */
1134         do  /* Vertical 8xDCT loop */
1135         {
1136             k0 = out[0];
1137             k1 = out[8];
1138             k2 = out[16];
1139             k3 = out[24];
1140             k4 = out[32];
1141             k5 = out[40];
1142             k6 = out[48];
1143             k7 = out[56];
1144 
1145             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
1146 
1147             if (abs_sum < ColTh)
1148             {
1149                 out[0] = 0x7fff;
1150                 out++;
1151                 continue;
1152             }
1153             /* fdct_1 */
1154             k0 = k0 + k7;
1155             k7 = k0 - (k7 << 1);
1156             k1 = k1 + k6;
1157             k6 = k1 - (k6 << 1);
1158             k2 = k2 + k5;
1159             k5 = k2 - (k5 << 1);
1160             k3 = k3 + k4;
1161             k4 = k3 - (k4 << 1);
1162 
1163             k0 = k0 + k3;
1164             k3 = k0 - (k3 << 1);
1165             k1 = k1 + k2;
1166             k2 = k1 - (k2 << 1);
1167 
1168             k0 = k0 + k1;
1169             /**********/
1170             out[0] = k0;        /* row 0 */
1171             /* fdct_2 */
1172             k4 = k4 + k5;
1173             k5 = k5 + k6;
1174             k6 = k6 + k7;
1175             /* MUL2C k2,k5,724,FDCT_SHIFT */
1176             /* k0, k1 become scratch */
1177             /* assume FAST MULTIPLY */
1178             k1 = mla724(k12, k5, round);
1179 
1180             k5 = k1 >> FDCT_SHIFT;
1181             /*****************/
1182             /********/
1183             /* fdct_3 */
1184             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
1185             /* assume FAST MULTIPLY */
1186             /* k0, k1 are output */
1187             k1 = mla392(k4, k14, round);
1188             k1 = mla946(k6, k14, k1);
1189 
1190             k6 = k1 >> FDCT_SHIFT;
1191             /***********************/
1192             k5 = k5 + k7;
1193             k5 = k5 + k6;
1194             /********/
1195             out[8] = k5 ;       /* row 1 */
1196             out++;
1197         }
1198         while ((uintptr_t)out < (uintptr_t)dst) ;
1199 
1200         return ;
1201     }
1202     /**************************************************************************/
1203     /*  Function:   Block1x1DCTwSub
1204         Date:       8/9/01
1205         Input:      block
1206         Output:     y
1207         Purpose:    Compute DC value only
1208         Modified:
1209     **************************************************************************/
Block1x1DCTwSub(Short * out,UChar * cur,UChar * pred,Int width)1210     void Block1x1DCTwSub(Short *out, UChar *cur, UChar *pred, Int width)
1211     {
1212         UChar *end;
1213         Int temp = 0;
1214         Int offset2;
1215 
1216         offset2 = width - 8;
1217         end = pred + (16 << 3);
1218         do
1219         {
1220             temp += (*cur++ - *pred++);
1221             temp += (*cur++ - *pred++);
1222             temp += (*cur++ - *pred++);
1223             temp += (*cur++ - *pred++);
1224             temp += (*cur++ - *pred++);
1225             temp += (*cur++ - *pred++);
1226             temp += (*cur++ - *pred++);
1227             temp += (*cur++ - *pred++);
1228             cur += offset2;
1229             pred += 8;
1230         }
1231         while (pred < end) ;
1232 
1233         out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0;
1234         out[0] = temp >> 3;
1235 
1236         return ;
1237     }
1238 
1239     /**************************************************************************/
1240     /*  Function:   Block1x1DCTIntra
1241         Date:       8/9/01
1242         Input:      prev
1243         Output:     out
1244         Purpose:    Compute DC value only
1245         Modified:
1246     **************************************************************************/
Block1x1DCTIntra(Short * out,UChar * cur,UChar * dummy2,Int width)1247     void Block1x1DCTIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
1248     {
1249         UChar *end;
1250         Int temp = 0;
1251         ULong word;
1252 
1253         OSCL_UNUSED_ARG(dummy2);
1254 
1255         end = cur + (width << 3);
1256         do
1257         {
1258             word = *((ULong*)cur);
1259             temp += (word >> 24);
1260             temp += ((word >> 16) & 0xFF);
1261             temp += ((word >> 8) & 0xFF);
1262             temp += (word & 0xFF);
1263 
1264             word = *((ULong*)(cur + 4));
1265             temp += (word >> 24);
1266             temp += ((word >> 16) & 0xFF);
1267             temp += ((word >> 8) & 0xFF);
1268             temp += (word & 0xFF);
1269 
1270             cur += width;
1271         }
1272         while (cur < end) ;
1273 
1274         out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0;
1275         out[0] = temp >> 3;
1276 
1277         return ;
1278     }
1279 
1280 #ifdef __cplusplus
1281 }
1282 #endif
1283 
1284