1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "avcenc_lib.h"
19 
20 /* subtract with the prediction and do transformation */
trans(uint8 * cur,int pitch,uint8 * predBlock,int16 * dataBlock)21 void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock)
22 {
23     int16 *ptr = dataBlock;
24     int r0, r1, r2, r3, j;
25     int curpitch = (uint)pitch >> 16;
26     int predpitch = (pitch & 0xFFFF);
27 
28     /* horizontal */
29     j = 4;
30     while (j > 0)
31     {
32         /* calculate the residue first */
33         r0 = cur[0] - predBlock[0];
34         r1 = cur[1] - predBlock[1];
35         r2 = cur[2] - predBlock[2];
36         r3 = cur[3] - predBlock[3];
37 
38         r0 += r3;           //ptr[0] + ptr[3];
39         r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
40         r1 += r2;           //ptr[1] + ptr[2];
41         r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
42 
43         ptr[0] = r0 + r1;
44         ptr[2] = r0 - r1;
45         ptr[1] = (r3 << 1) + r2;
46         ptr[3] = r3 - (r2 << 1);
47 
48         ptr += 16;
49         predBlock += predpitch;
50         cur += curpitch;
51         j--;
52     }
53     /* vertical */
54     ptr = dataBlock;
55     j = 4;
56     while (j > 0)
57     {
58         r0 = ptr[0] + ptr[48];
59         r3 = ptr[0] - ptr[48];
60         r1 = ptr[16] + ptr[32];
61         r2 = ptr[16] - ptr[32];
62 
63         ptr[0] = r0 + r1;
64         ptr[32] = r0 - r1;
65         ptr[16] = (r3 << 1) + r2;
66         ptr[48] = r3 - (r2 << 1);
67 
68         ptr++;
69         j--;
70     }
71 
72     return ;
73 }
74 
75 
76 /* do residue transform quant invquant, invtrans and write output out */
dct_luma(AVCEncObject * encvid,int blkidx,uint8 * cur,uint8 * org,int * coef_cost)77 int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost)
78 {
79     AVCCommonObj *video = encvid->common;
80     int org_pitch = encvid->currInput->pitch;
81     int pitch = video->currPic->pitch;
82     int16 *coef = video->block;
83     uint8 *pred = video->pred_block; // size 16 for a 4x4 block
84     int pred_pitch = video->pred_pitch;
85     int r0, r1, r2, r3, j, k, idx;
86     int *level, *run;
87     int Qq, Rq, q_bits, qp_const, quant;
88     int data, lev, zero_run;
89     int numcoeff;
90 
91     coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */
92 
93     /* first take a 4x4 transform */
94     /* horizontal */
95     j = 4;
96     while (j > 0)
97     {
98         /* calculate the residue first */
99         r0 = org[0] - pred[0];   /* OPTIMIZEABLE */
100         r1 = org[1] - pred[1];
101         r2 = org[2] - pred[2];
102         r3 = org[3] - pred[3];
103 
104         r0 += r3;           //ptr[0] + ptr[3];
105         r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
106         r1 += r2;           //ptr[1] + ptr[2];
107         r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
108 
109         coef[0] = r0 + r1;
110         coef[2] = r0 - r1;
111         coef[1] = (r3 << 1) + r2;
112         coef[3] = r3 - (r2 << 1);
113 
114         coef += 16;
115         org += org_pitch;
116         pred += pred_pitch;
117         j--;
118     }
119     /* vertical */
120     coef -= 64;
121     pred -= (pred_pitch << 2);
122     j = 4;
123     while (j > 0)   /* OPTIMIZABLE */
124     {
125         r0 = coef[0] + coef[48];
126         r3 = coef[0] - coef[48];
127         r1 = coef[16] + coef[32];
128         r2 = coef[16] - coef[32];
129 
130         coef[0] = r0 + r1;
131         coef[32] = r0 - r1;
132         coef[16] = (r3 << 1) + r2;
133         coef[48] = r3 - (r2 << 1);
134 
135         coef++;
136         j--;
137     }
138 
139     coef -= 4;
140 
141     /* quant */
142     level = encvid->level[ras2dec[blkidx]];
143     run = encvid->run[ras2dec[blkidx]];
144 
145     Rq = video->QPy_mod_6;
146     Qq = video->QPy_div_6;
147     qp_const = encvid->qp_const;
148     q_bits = 15 + Qq;
149 
150     zero_run = 0;
151     numcoeff = 0;
152     for (k = 0; k < 16; k++)
153     {
154         idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
155         data = coef[idx];
156         quant = quant_coef[Rq][k];
157         if (data > 0)
158         {
159             lev = data * quant + qp_const;
160         }
161         else
162         {
163             lev = -data * quant + qp_const;
164         }
165         lev >>= q_bits;
166         if (lev)
167         {
168             *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]);
169 
170             /* dequant */
171             quant = dequant_coefres[Rq][k];
172             if (data > 0)
173             {
174                 level[numcoeff] = lev;
175                 coef[idx] = (lev * quant) << Qq;
176             }
177             else
178             {
179                 level[numcoeff] = -lev;
180                 coef[idx] = (-lev * quant) << Qq;
181             }
182             run[numcoeff++] = zero_run;
183             zero_run = 0;
184         }
185         else
186         {
187             zero_run++;
188             coef[idx] = 0;
189         }
190     }
191 
192     if (video->currMB->mb_intra) // only do inverse transform with intra block
193     {
194         if (numcoeff) /* then do inverse transform */
195         {
196             for (j = 4; j > 0; j--) /* horizontal */
197             {
198                 r0 = coef[0] + coef[2];
199                 r1 = coef[0] - coef[2];
200                 r2 = (coef[1] >> 1) - coef[3];
201                 r3 = coef[1] + (coef[3] >> 1);
202 
203                 coef[0] = r0 + r3;
204                 coef[1] = r1 + r2;
205                 coef[2] = r1 - r2;
206                 coef[3] = r0 - r3;
207 
208                 coef += 16;
209             }
210 
211             coef -= 64;
212             for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
213             {
214                 r0 = coef[0] + coef[32];
215                 r1 = coef[0] - coef[32];
216                 r2 = (coef[16] >> 1) - coef[48];
217                 r3 = coef[16] + (coef[48] >> 1);
218                 r0 += r3;
219                 r3 = (r0 - (r3 << 1)); /* r0-r3 */
220                 r1 += r2;
221                 r2 = (r1 - (r2 << 1)); /* r1-r2 */
222                 r0 += 32;
223                 r1 += 32;
224                 r2 += 32;
225                 r3 += 32;
226 
227                 r0 = pred[0] + (r0 >> 6);
228                 if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
229                 r1 = *(pred += pred_pitch) + (r1 >> 6);
230                 if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
231                 r2 = *(pred += pred_pitch) + (r2 >> 6);
232                 if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
233                 r3 = pred[pred_pitch] + (r3 >> 6);
234                 if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
235 
236                 *cur = r0;
237                 *(cur += pitch) = r1;
238                 *(cur += pitch) = r2;
239                 cur[pitch] = r3;
240                 cur -= (pitch << 1);
241                 cur++;
242                 pred -= (pred_pitch << 1);
243                 pred++;
244                 coef++;
245             }
246         }
247         else  // copy from pred to cur
248         {
249             *((uint32*)cur) = *((uint32*)pred);
250             *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
251             *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
252             *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
253         }
254     }
255 
256     return numcoeff;
257 }
258 
259 
MBInterIdct(AVCCommonObj * video,uint8 * curL,AVCMacroblock * currMB,int picPitch)260 void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch)
261 {
262     int16 *coef, *coef8 = video->block;
263     uint8 *cur;  // the same as curL
264     int b8, b4;
265     int r0, r1, r2, r3, j, blkidx;
266 
267     for (b8 = 0; b8 < 4; b8++)
268     {
269         cur = curL;
270         coef = coef8;
271 
272         if (currMB->CBP&(1 << b8))
273         {
274             for (b4 = 0; b4 < 4; b4++)
275             {
276                 blkidx = blkIdx2blkXY[b8][b4];
277                 /* do IDCT */
278                 if (currMB->nz_coeff[blkidx])
279                 {
280                     for (j = 4; j > 0; j--) /* horizontal */
281                     {
282                         r0 = coef[0] + coef[2];
283                         r1 = coef[0] - coef[2];
284                         r2 = (coef[1] >> 1) - coef[3];
285                         r3 = coef[1] + (coef[3] >> 1);
286 
287                         coef[0] = r0 + r3;
288                         coef[1] = r1 + r2;
289                         coef[2] = r1 - r2;
290                         coef[3] = r0 - r3;
291 
292                         coef += 16;
293                     }
294 
295                     coef -= 64;
296                     for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
297                     {
298                         r0 = coef[0] + coef[32];
299                         r1 = coef[0] - coef[32];
300                         r2 = (coef[16] >> 1) - coef[48];
301                         r3 = coef[16] + (coef[48] >> 1);
302                         r0 += r3;
303                         r3 = (r0 - (r3 << 1)); /* r0-r3 */
304                         r1 += r2;
305                         r2 = (r1 - (r2 << 1)); /* r1-r2 */
306                         r0 += 32;
307                         r1 += 32;
308                         r2 += 32;
309                         r3 += 32;
310 
311                         r0 = cur[0] + (r0 >> 6);
312                         if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
313                         *cur = r0;
314                         r1 = *(cur += picPitch) + (r1 >> 6);
315                         if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
316                         *cur = r1;
317                         r2 = *(cur += picPitch) + (r2 >> 6);
318                         if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
319                         *cur = r2;
320                         r3 = cur[picPitch] + (r3 >> 6);
321                         if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
322                         cur[picPitch] = r3;
323 
324                         cur -= (picPitch << 1);
325                         cur++;
326                         coef++;
327                     }
328                     cur -= 4;
329                     coef -= 4;
330                 }
331                 if (b4&1)
332                 {
333                     cur += ((picPitch << 2) - 4);
334                     coef += 60;
335                 }
336                 else
337                 {
338                     cur += 4;
339                     coef += 4;
340                 }
341             }
342         }
343 
344         if (b8&1)
345         {
346             curL += ((picPitch << 3) - 8);
347             coef8 += 120;
348         }
349         else
350         {
351             curL += 8;
352             coef8 += 8;
353         }
354     }
355 
356     return ;
357 }
358 
359 /* performa dct, quant, iquant, idct for the entire MB */
dct_luma_16x16(AVCEncObject * encvid,uint8 * curL,uint8 * orgL)360 void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL)
361 {
362     AVCCommonObj *video = encvid->common;
363     int pitch = video->currPic->pitch;
364     int org_pitch = encvid->currInput->pitch;
365     AVCMacroblock *currMB = video->currMB;
366     int16 *coef = video->block;
367     uint8 *pred = encvid->pred_i16[currMB->i16Mode];
368     int blk_x, blk_y, j, k, idx, b8, b4;
369     int r0, r1, r2, r3, m0, m1, m2 , m3;
370     int data, lev;
371     int *level, *run, zero_run, ncoeff;
372     int Rq, Qq, quant, q_bits, qp_const;
373     int offset_cur[4], offset_pred[4], offset;
374 
375     /* horizontal */
376     for (j = 16; j > 0; j--)
377     {
378         for (blk_x = 4; blk_x > 0; blk_x--)
379         {
380             /* calculate the residue first */
381             r0 = *orgL++ - *pred++;
382             r1 = *orgL++ - *pred++;
383             r2 = *orgL++ - *pred++;
384             r3 = *orgL++ - *pred++;
385 
386             r0 += r3;           //ptr[0] + ptr[3];
387             r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
388             r1 += r2;           //ptr[1] + ptr[2];
389             r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
390 
391             *coef++ = r0 + r1;
392             *coef++ = (r3 << 1) + r2;
393             *coef++ = r0 - r1;
394             *coef++ = r3 - (r2 << 1);
395         }
396         orgL += (org_pitch - 16);
397     }
398     pred -= 256;
399     coef -= 256;
400     /* vertical */
401     for (blk_y = 4; blk_y > 0; blk_y--)
402     {
403         for (j = 16; j > 0; j--)
404         {
405             r0 = coef[0] + coef[48];
406             r3 = coef[0] - coef[48];
407             r1 = coef[16] + coef[32];
408             r2 = coef[16] - coef[32];
409 
410             coef[0] = r0 + r1;
411             coef[32] = r0 - r1;
412             coef[16] = (r3 << 1) + r2;
413             coef[48] = r3 - (r2 << 1);
414 
415             coef++;
416         }
417         coef += 48;
418     }
419 
420     /* then perform DC transform */
421     coef -= 256;
422     for (j = 4; j > 0; j--)
423     {
424         r0 = coef[0] + coef[12];
425         r3 = coef[0] - coef[12];
426         r1 = coef[4] + coef[8];
427         r2 = coef[4] - coef[8];
428 
429         coef[0] = r0 + r1;
430         coef[8] = r0 - r1;
431         coef[4] = r3 + r2;
432         coef[12] = r3 - r2;
433         coef += 64;
434     }
435     coef -= 256;
436     for (j = 4; j > 0; j--)
437     {
438         r0 = coef[0] + coef[192];
439         r3 = coef[0] - coef[192];
440         r1 = coef[64] + coef[128];
441         r2 = coef[64] - coef[128];
442 
443         coef[0] = (r0 + r1) >> 1;
444         coef[128] = (r0 - r1) >> 1;
445         coef[64] = (r3 + r2) >> 1;
446         coef[192] = (r3 - r2) >> 1;
447         coef += 4;
448     }
449 
450     coef -= 16;
451     // then quantize DC
452     level = encvid->leveldc;
453     run = encvid->rundc;
454 
455     Rq = video->QPy_mod_6;
456     Qq = video->QPy_div_6;
457     quant = quant_coef[Rq][0];
458     q_bits = 15 + Qq;
459     qp_const = encvid->qp_const;
460 
461     zero_run = 0;
462     ncoeff = 0;
463     for (k = 0; k < 16; k++) /* in zigzag scan order */
464     {
465         idx = ZIGZAG2RASTERDC[k];
466         data = coef[idx];
467         if (data > 0)   // quant
468         {
469             lev = data * quant + (qp_const << 1);
470         }
471         else
472         {
473             lev = -data * quant + (qp_const << 1);
474         }
475         lev >>= (q_bits + 1);
476         if (lev) // dequant
477         {
478             if (data > 0)
479             {
480                 level[ncoeff] = lev;
481                 coef[idx] = lev;
482             }
483             else
484             {
485                 level[ncoeff] = -lev;
486                 coef[idx] = -lev;
487             }
488             run[ncoeff++] = zero_run;
489             zero_run = 0;
490         }
491         else
492         {
493             zero_run++;
494             coef[idx] = 0;
495         }
496     }
497 
498     /* inverse transform DC */
499     encvid->numcoefdc = ncoeff;
500     if (ncoeff)
501     {
502         quant = dequant_coefres[Rq][0];
503 
504         for (j = 0; j < 4; j++)
505         {
506             m0 = coef[0] + coef[4];
507             m1 = coef[0] - coef[4];
508             m2 = coef[8] + coef[12];
509             m3 = coef[8] - coef[12];
510 
511 
512             coef[0] = m0 + m2;
513             coef[4] = m0 - m2;
514             coef[8] = m1 - m3;
515             coef[12] = m1 + m3;
516             coef += 64;
517         }
518 
519         coef -= 256;
520 
521         if (Qq >= 2)  /* this way should be faster than JM */
522         {           /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
523             Qq -= 2;
524             for (j = 0; j < 4; j++)
525             {
526                 m0 = coef[0] + coef[64];
527                 m1 = coef[0] - coef[64];
528                 m2 = coef[128] + coef[192];
529                 m3 = coef[128] - coef[192];
530 
531                 coef[0] = ((m0 + m2) * quant) << Qq;
532                 coef[64] = ((m0 - m2) * quant) << Qq;
533                 coef[128] = ((m1 - m3) * quant) << Qq;
534                 coef[192] = ((m1 + m3) * quant) << Qq;
535                 coef += 4;
536             }
537             Qq += 2; /* restore the value */
538         }
539         else
540         {
541             Qq = 2 - Qq;
542             offset = 1 << (Qq - 1);
543 
544             for (j = 0; j < 4; j++)
545             {
546                 m0 = coef[0] + coef[64];
547                 m1 = coef[0] - coef[64];
548                 m2 = coef[128] + coef[192];
549                 m3 = coef[128] - coef[192];
550 
551                 coef[0] = (((m0 + m2) * quant + offset) >> Qq);
552                 coef[64] = (((m0 - m2) * quant + offset) >> Qq);
553                 coef[128] = (((m1 - m3) * quant + offset) >> Qq);
554                 coef[192] = (((m1 + m3) * quant + offset) >> Qq);
555                 coef += 4;
556             }
557             Qq = 2 - Qq; /* restore the value */
558         }
559         coef -= 16; /* back to the origin */
560     }
561 
562     /* now zigzag scan ac coefs, quant, iquant and itrans */
563     run = encvid->run[0];
564     level = encvid->level[0];
565 
566     /* offset btw 4x4 block */
567     offset_cur[0] = 0;
568     offset_cur[1] = (pitch << 2) - 8;
569 
570     /* offset btw 8x8 block */
571     offset_cur[2] = 8 - (pitch << 3);
572     offset_cur[3] = -8;
573 
574     /* similarly for pred */
575     offset_pred[0] = 0;
576     offset_pred[1] = 56;
577     offset_pred[2] = -120;
578     offset_pred[3] = -8;
579 
580     currMB->CBP = 0;
581 
582     for (b8 = 0; b8 < 4; b8++)
583     {
584         for (b4 = 0; b4 < 4; b4++)
585         {
586 
587             zero_run = 0;
588             ncoeff = 0;
589 
590             for (k = 1; k < 16; k++)
591             {
592                 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
593                 data = coef[idx];
594                 quant = quant_coef[Rq][k];
595                 if (data > 0)
596                 {
597                     lev = data * quant + qp_const;
598                 }
599                 else
600                 {
601                     lev = -data * quant + qp_const;
602                 }
603                 lev >>= q_bits;
604                 if (lev)
605                 {   /* dequant */
606                     quant = dequant_coefres[Rq][k];
607                     if (data > 0)
608                     {
609                         level[ncoeff] = lev;
610                         coef[idx] = (lev * quant) << Qq;
611                     }
612                     else
613                     {
614                         level[ncoeff] = -lev;
615                         coef[idx] = (-lev * quant) << Qq;
616                     }
617                     run[ncoeff++] = zero_run;
618                     zero_run = 0;
619                 }
620                 else
621                 {
622                     zero_run++;
623                     coef[idx] = 0;
624                 }
625             }
626 
627             currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */
628             if (ncoeff)
629             {
630                 currMB->CBP |= (1 << b8);
631 
632                 // do inverse transform here
633                 for (j = 4; j > 0; j--)
634                 {
635                     r0 = coef[0] + coef[2];
636                     r1 = coef[0] - coef[2];
637                     r2 = (coef[1] >> 1) - coef[3];
638                     r3 = coef[1] + (coef[3] >> 1);
639 
640                     coef[0] = r0 + r3;
641                     coef[1] = r1 + r2;
642                     coef[2] = r1 - r2;
643                     coef[3] = r0 - r3;
644 
645                     coef += 16;
646                 }
647                 coef -= 64;
648                 for (j = 4; j > 0; j--)
649                 {
650                     r0 = coef[0] + coef[32];
651                     r1 = coef[0] - coef[32];
652                     r2 = (coef[16] >> 1) - coef[48];
653                     r3 = coef[16] + (coef[48] >> 1);
654 
655                     r0 += r3;
656                     r3 = (r0 - (r3 << 1)); /* r0-r3 */
657                     r1 += r2;
658                     r2 = (r1 - (r2 << 1)); /* r1-r2 */
659                     r0 += 32;
660                     r1 += 32;
661                     r2 += 32;
662                     r3 += 32;
663                     r0 = pred[0] + (r0 >> 6);
664                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
665                     r1 = pred[16] + (r1 >> 6);
666                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
667                     r2 = pred[32] + (r2 >> 6);
668                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
669                     r3 = pred[48] + (r3 >> 6);
670                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
671                     *curL = r0;
672                     *(curL += pitch) = r1;
673                     *(curL += pitch) = r2;
674                     curL[pitch] = r3;
675                     curL -= (pitch << 1);
676                     curL++;
677                     pred++;
678                     coef++;
679                 }
680             }
681             else  // do DC-only inverse
682             {
683                 m0 = coef[0] + 32;
684 
685                 for (j = 4; j > 0; j--)
686                 {
687                     r0 = pred[0] + (m0 >> 6);
688                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
689                     r1 = pred[16] + (m0 >> 6);
690                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
691                     r2 = pred[32] + (m0 >> 6);
692                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
693                     r3 = pred[48] + (m0 >> 6);
694                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
695                     *curL = r0;
696                     *(curL += pitch) = r1;
697                     *(curL += pitch) = r2;
698                     curL[pitch] = r3;
699                     curL -= (pitch << 1);
700                     curL++;
701                     pred++;
702                 }
703                 coef += 4;
704             }
705 
706             run += 16;  // follow coding order
707             level += 16;
708             curL += offset_cur[b4&1];
709             pred += offset_pred[b4&1];
710             coef += offset_pred[b4&1];
711         }
712 
713         curL += offset_cur[2 + (b8&1)];
714         pred += offset_pred[2 + (b8&1)];
715         coef += offset_pred[2 + (b8&1)];
716     }
717 
718     return ;
719 }
720 
721 
dct_chroma(AVCEncObject * encvid,uint8 * curC,uint8 * orgC,int cr)722 void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr)
723 {
724     AVCCommonObj *video = encvid->common;
725     AVCMacroblock *currMB = video->currMB;
726     int org_pitch = (encvid->currInput->pitch) >> 1;
727     int pitch = (video->currPic->pitch) >> 1;
728     int pred_pitch = 16;
729     int16 *coef = video->block + 256;
730     uint8 *pred = video->pred_block;
731     int j, blk_x, blk_y, k, idx, b4;
732     int r0, r1, r2, r3, m0;
733     int Qq, Rq, qp_const, q_bits, quant;
734     int *level, *run, zero_run, ncoeff;
735     int data, lev;
736     int offset_cur[2], offset_pred[2], offset_coef[2];
737     uint8 nz_temp[4];
738     int  coeff_cost;
739 
740     if (cr)
741     {
742         coef += 8;
743         pred += 8;
744     }
745 
746     if (currMB->mb_intra == 0) // inter mode
747     {
748         pred = curC;
749         pred_pitch = pitch;
750     }
751 
752     /* do 4x4 transform */
753     /* horizontal */
754     for (j = 8; j > 0; j--)
755     {
756         for (blk_x = 2; blk_x > 0; blk_x--)
757         {
758             /* calculate the residue first */
759             r0 = *orgC++ - *pred++;
760             r1 = *orgC++ - *pred++;
761             r2 = *orgC++ - *pred++;
762             r3 = *orgC++ - *pred++;
763 
764             r0 += r3;           //ptr[0] + ptr[3];
765             r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
766             r1 += r2;           //ptr[1] + ptr[2];
767             r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
768 
769             *coef++ = r0 + r1;
770             *coef++ = (r3 << 1) + r2;
771             *coef++ = r0 - r1;
772             *coef++ = r3 - (r2 << 1);
773 
774         }
775         coef += 8; // coef pitch is 16
776         pred += (pred_pitch - 8); // pred_pitch is 16
777         orgC += (org_pitch - 8);
778     }
779     pred -= (pred_pitch << 3);
780     coef -= 128;
781     /* vertical */
782     for (blk_y = 2; blk_y > 0; blk_y--)
783     {
784         for (j = 8; j > 0; j--)
785         {
786             r0 = coef[0] + coef[48];
787             r3 = coef[0] - coef[48];
788             r1 = coef[16] + coef[32];
789             r2 = coef[16] - coef[32];
790 
791             coef[0] = r0 + r1;
792             coef[32] = r0 - r1;
793             coef[16] = (r3 << 1) + r2;
794             coef[48] = r3 - (r2 << 1);
795 
796             coef++;
797         }
798         coef += 56;
799     }
800     /* then perform DC transform */
801     coef -= 128;
802 
803     /* 2x2 transform of DC components*/
804     r0 = coef[0];
805     r1 = coef[4];
806     r2 = coef[64];
807     r3 = coef[68];
808 
809     coef[0] = r0 + r1 + r2 + r3;
810     coef[4] = r0 - r1 + r2 - r3;
811     coef[64] = r0 + r1 - r2 - r3;
812     coef[68] = r0 - r1 - r2 + r3;
813 
814     Qq    = video->QPc_div_6;
815     Rq    = video->QPc_mod_6;
816     quant = quant_coef[Rq][0];
817     q_bits    = 15 + Qq;
818     qp_const = encvid->qp_const_c;
819 
820     zero_run = 0;
821     ncoeff = 0;
822     run = encvid->runcdc + (cr << 2);
823     level = encvid->levelcdc + (cr << 2);
824 
825     /* in zigzag scan order */
826     for (k = 0; k < 4; k++)
827     {
828         idx = ((k >> 1) << 6) + ((k & 1) << 2);
829         data = coef[idx];
830         if (data > 0)
831         {
832             lev = data * quant + (qp_const << 1);
833         }
834         else
835         {
836             lev = -data * quant + (qp_const << 1);
837         }
838         lev >>= (q_bits + 1);
839         if (lev)
840         {
841             if (data > 0)
842             {
843                 level[ncoeff] = lev;
844                 coef[idx] = lev;
845             }
846             else
847             {
848                 level[ncoeff] = -lev;
849                 coef[idx] = -lev;
850             }
851             run[ncoeff++] = zero_run;
852             zero_run = 0;
853         }
854         else
855         {
856             zero_run++;
857             coef[idx] = 0;
858         }
859     }
860 
861     encvid->numcoefcdc[cr] = ncoeff;
862 
863     if (ncoeff)
864     {
865         currMB->CBP |= (1 << 4); // DC present
866         // do inverse transform
867         quant = dequant_coefres[Rq][0];
868 
869         r0 = coef[0] + coef[4];
870         r1 = coef[0] - coef[4];
871         r2 = coef[64] + coef[68];
872         r3 = coef[64] - coef[68];
873 
874         r0 += r2;
875         r2 = r0 - (r2 << 1);
876         r1 += r3;
877         r3 = r1 - (r3 << 1);
878 
879         if (Qq >= 1)
880         {
881             Qq -= 1;
882             coef[0] = (r0 * quant) << Qq;
883             coef[4] = (r1 * quant) << Qq;
884             coef[64] = (r2 * quant) << Qq;
885             coef[68] = (r3 * quant) << Qq;
886             Qq++;
887         }
888         else
889         {
890             coef[0] = (r0 * quant) >> 1;
891             coef[4] = (r1 * quant) >> 1;
892             coef[64] = (r2 * quant) >> 1;
893             coef[68] = (r3 * quant) >> 1;
894         }
895     }
896 
897     /* now do AC zigzag scan, quant, iquant and itrans */
898     if (cr)
899     {
900         run = encvid->run[20];
901         level = encvid->level[20];
902     }
903     else
904     {
905         run = encvid->run[16];
906         level = encvid->level[16];
907     }
908 
909     /* offset btw 4x4 block */
910     offset_cur[0] = 0;
911     offset_cur[1] = (pitch << 2) - 8;
912     offset_pred[0] = 0;
913     offset_pred[1] = (pred_pitch << 2) - 8;
914     offset_coef[0] = 0;
915     offset_coef[1] = 56;
916 
917     coeff_cost = 0;
918 
919     for (b4 = 0; b4 < 4; b4++)
920     {
921         zero_run = 0;
922         ncoeff = 0;
923         for (k = 1; k < 16; k++) /* in zigzag scan order */
924         {
925             idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
926             data = coef[idx];
927             quant = quant_coef[Rq][k];
928             if (data > 0)
929             {
930                 lev = data * quant + qp_const;
931             }
932             else
933             {
934                 lev = -data * quant + qp_const;
935             }
936             lev >>= q_bits;
937             if (lev)
938             {
939                 /* for RD performance*/
940                 if (lev > 1)
941                     coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
942                 else
943                     coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run];
944 
945                 /* dequant */
946                 quant = dequant_coefres[Rq][k];
947                 if (data > 0)
948                 {
949                     level[ncoeff] = lev;
950                     coef[idx] = (lev * quant) << Qq;
951                 }
952                 else
953                 {
954                     level[ncoeff] = -lev;
955                     coef[idx] = (-lev * quant) << Qq;
956                 }
957                 run[ncoeff++] = zero_run;
958                 zero_run = 0;
959             }
960             else
961             {
962                 zero_run++;
963                 coef[idx] = 0;
964             }
965         }
966 
967         nz_temp[b4] = ncoeff; // raster scan
968 
969         // just advance the pointers for now, do IDCT later
970         coef += 4;
971         run += 16;
972         level += 16;
973         coef += offset_coef[b4&1];
974     }
975 
976     /* rewind the pointers */
977     coef -= 128;
978 
979     if (coeff_cost < _CHROMA_COEFF_COST_)
980     {
981         /* if it's not efficient to encode any blocks.
982         Just do DC only */
983         /* We can reset level and run also, but setting nz to zero should be enough. */
984         currMB->nz_coeff[16+(cr<<1)] = 0;
985         currMB->nz_coeff[17+(cr<<1)] = 0;
986         currMB->nz_coeff[20+(cr<<1)] = 0;
987         currMB->nz_coeff[21+(cr<<1)] = 0;
988 
989         for (b4 = 0; b4 < 4; b4++)
990         {
991             // do DC-only inverse
992             m0 = coef[0] + 32;
993 
994             for (j = 4; j > 0; j--)
995             {
996                 r0 = pred[0] + (m0 >> 6);
997                 if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
998                 r1 = *(pred += pred_pitch) + (m0 >> 6);
999                 if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
1000                 r2 = pred[pred_pitch] + (m0 >> 6);
1001                 if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
1002                 r3 = pred[pred_pitch<<1] + (m0 >> 6);
1003                 if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
1004                 *curC = r0;
1005                 *(curC += pitch) = r1;
1006                 *(curC += pitch) = r2;
1007                 curC[pitch] = r3;
1008                 curC -= (pitch << 1);
1009                 curC++;
1010                 pred += (1 - pred_pitch);
1011             }
1012             coef += 4;
1013             curC += offset_cur[b4&1];
1014             pred += offset_pred[b4&1];
1015             coef += offset_coef[b4&1];
1016         }
1017     }
1018     else // not dropping anything, continue with the IDCT
1019     {
1020         for (b4 = 0; b4 < 4; b4++)
1021         {
1022             ncoeff = nz_temp[b4] ; // in raster scan
1023             currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan
1024 
1025             if (ncoeff) // do a check on the nonzero-coeff
1026             {
1027                 currMB->CBP |= (2 << 4);
1028 
1029                 // do inverse transform here
1030                 for (j = 4; j > 0; j--)
1031                 {
1032                     r0 = coef[0] + coef[2];
1033                     r1 = coef[0] - coef[2];
1034                     r2 = (coef[1] >> 1) - coef[3];
1035                     r3 = coef[1] + (coef[3] >> 1);
1036 
1037                     coef[0] = r0 + r3;
1038                     coef[1] = r1 + r2;
1039                     coef[2] = r1 - r2;
1040                     coef[3] = r0 - r3;
1041 
1042                     coef += 16;
1043                 }
1044                 coef -= 64;
1045                 for (j = 4; j > 0; j--)
1046                 {
1047                     r0 = coef[0] + coef[32];
1048                     r1 = coef[0] - coef[32];
1049                     r2 = (coef[16] >> 1) - coef[48];
1050                     r3 = coef[16] + (coef[48] >> 1);
1051 
1052                     r0 += r3;
1053                     r3 = (r0 - (r3 << 1)); /* r0-r3 */
1054                     r1 += r2;
1055                     r2 = (r1 - (r2 << 1)); /* r1-r2 */
1056                     r0 += 32;
1057                     r1 += 32;
1058                     r2 += 32;
1059                     r3 += 32;
1060                     r0 = pred[0] + (r0 >> 6);
1061                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
1062                     r1 = *(pred += pred_pitch) + (r1 >> 6);
1063                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
1064                     r2 = pred[pred_pitch] + (r2 >> 6);
1065                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
1066                     r3 = pred[pred_pitch<<1] + (r3 >> 6);
1067                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
1068                     *curC = r0;
1069                     *(curC += pitch) = r1;
1070                     *(curC += pitch) = r2;
1071                     curC[pitch] = r3;
1072                     curC -= (pitch << 1);
1073                     curC++;
1074                     pred += (1 - pred_pitch);
1075                     coef++;
1076                 }
1077             }
1078             else
1079             {
1080                 // do DC-only inverse
1081                 m0 = coef[0] + 32;
1082 
1083                 for (j = 4; j > 0; j--)
1084                 {
1085                     r0 = pred[0] + (m0 >> 6);
1086                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
1087                     r1 = *(pred += pred_pitch) + (m0 >> 6);
1088                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
1089                     r2 = pred[pred_pitch] + (m0 >> 6);
1090                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
1091                     r3 = pred[pred_pitch<<1] + (m0 >> 6);
1092                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
1093                     *curC = r0;
1094                     *(curC += pitch) = r1;
1095                     *(curC += pitch) = r2;
1096                     curC[pitch] = r3;
1097                     curC -= (pitch << 1);
1098                     curC++;
1099                     pred += (1 - pred_pitch);
1100                 }
1101                 coef += 4;
1102             }
1103             curC += offset_cur[b4&1];
1104             pred += offset_pred[b4&1];
1105             coef += offset_coef[b4&1];
1106         }
1107     }
1108 
1109     return ;
1110 }
1111 
1112 
1113 /* only DC transform */
TransQuantIntra16DC(AVCEncObject * encvid)1114 int TransQuantIntra16DC(AVCEncObject *encvid)
1115 {
1116     AVCCommonObj *video = encvid->common;
1117     int16 *block = video->block;
1118     int *level = encvid->leveldc;
1119     int *run = encvid->rundc;
1120     int16 *ptr = block;
1121     int r0, r1, r2, r3, j;
1122     int Qq = video->QPy_div_6;
1123     int Rq = video->QPy_mod_6;
1124     int q_bits, qp_const, quant;
1125     int data, lev, zero_run;
1126     int k, ncoeff, idx;
1127 
1128     /* DC transform */
1129     /* horizontal */
1130     j = 4;
1131     while (j)
1132     {
1133         r0 = ptr[0] + ptr[12];
1134         r3 = ptr[0] - ptr[12];
1135         r1 = ptr[4] + ptr[8];
1136         r2 = ptr[4] - ptr[8];
1137 
1138         ptr[0] = r0 + r1;
1139         ptr[8] = r0 - r1;
1140         ptr[4] = r3 + r2;
1141         ptr[12] = r3 - r2;
1142         ptr += 64;
1143         j--;
1144     }
1145     /* vertical */
1146     ptr = block;
1147     j = 4;
1148     while (j)
1149     {
1150         r0 = ptr[0] + ptr[192];
1151         r3 = ptr[0] - ptr[192];
1152         r1 = ptr[64] + ptr[128];
1153         r2 = ptr[64] - ptr[128];
1154 
1155         ptr[0] = (r0 + r1) >> 1;
1156         ptr[128] = (r0 - r1) >> 1;
1157         ptr[64] = (r3 + r2) >> 1;
1158         ptr[192] = (r3 - r2) >> 1;
1159         ptr += 4;
1160         j--;
1161     }
1162 
1163     quant = quant_coef[Rq][0];
1164     q_bits    = 15 + Qq;
1165     qp_const = (1 << q_bits) / 3;    // intra
1166 
1167     zero_run = 0;
1168     ncoeff = 0;
1169 
1170     for (k = 0; k < 16; k++) /* in zigzag scan order */
1171     {
1172         idx = ZIGZAG2RASTERDC[k];
1173         data = block[idx];
1174         if (data > 0)
1175         {
1176             lev = data * quant + (qp_const << 1);
1177         }
1178         else
1179         {
1180             lev = -data * quant + (qp_const << 1);
1181         }
1182         lev >>= (q_bits + 1);
1183         if (lev)
1184         {
1185             if (data > 0)
1186             {
1187                 level[ncoeff] = lev;
1188                 block[idx] = lev;
1189             }
1190             else
1191             {
1192                 level[ncoeff] = -lev;
1193                 block[idx] = -lev;
1194             }
1195             run[ncoeff++] = zero_run;
1196             zero_run = 0;
1197         }
1198         else
1199         {
1200             zero_run++;
1201             block[idx] = 0;
1202         }
1203     }
1204     return ncoeff;
1205 }
1206 
TransQuantChromaDC(AVCEncObject * encvid,int16 * block,int slice_type,int cr)1207 int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr)
1208 {
1209     AVCCommonObj *video = encvid->common;
1210     int *level, *run;
1211     int r0, r1, r2, r3;
1212     int Qq, Rq, q_bits, qp_const, quant;
1213     int data, lev, zero_run;
1214     int k, ncoeff, idx;
1215 
1216     level = encvid->levelcdc + (cr << 2); /* cb or cr */
1217     run = encvid->runcdc + (cr << 2);
1218 
1219     /* 2x2 transform of DC components*/
1220     r0 = block[0];
1221     r1 = block[4];
1222     r2 = block[64];
1223     r3 = block[68];
1224 
1225     block[0] = r0 + r1 + r2 + r3;
1226     block[4] = r0 - r1 + r2 - r3;
1227     block[64] = r0 + r1 - r2 - r3;
1228     block[68] = r0 - r1 - r2 + r3;
1229 
1230     Qq    = video->QPc_div_6;
1231     Rq    = video->QPc_mod_6;
1232     quant = quant_coef[Rq][0];
1233     q_bits    = 15 + Qq;
1234     if (slice_type == AVC_I_SLICE)
1235     {
1236         qp_const = (1 << q_bits) / 3;
1237     }
1238     else
1239     {
1240         qp_const = (1 << q_bits) / 6;
1241     }
1242 
1243     zero_run = 0;
1244     ncoeff = 0;
1245 
1246     for (k = 0; k < 4; k++) /* in zigzag scan order */
1247     {
1248         idx = ((k >> 1) << 6) + ((k & 1) << 2);
1249         data = block[idx];
1250         if (data > 0)
1251         {
1252             lev = data * quant + (qp_const << 1);
1253         }
1254         else
1255         {
1256             lev = -data * quant + (qp_const << 1);
1257         }
1258         lev >>= (q_bits + 1);
1259         if (lev)
1260         {
1261             if (data > 0)
1262             {
1263                 level[ncoeff] = lev;
1264                 block[idx] = lev;
1265             }
1266             else
1267             {
1268                 level[ncoeff] = -lev;
1269                 block[idx] = -lev;
1270             }
1271             run[ncoeff++] = zero_run;
1272             zero_run = 0;
1273         }
1274         else
1275         {
1276             zero_run++;
1277             block[idx] = 0;
1278         }
1279     }
1280     return ncoeff;
1281 }
1282 
1283 
1284