1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 #include "avcenc_lib.h"
19
20 /* subtract with the prediction and do transformation */
trans(uint8 * cur,int pitch,uint8 * predBlock,int16 * dataBlock)21 void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock)
22 {
23 int16 *ptr = dataBlock;
24 int r0, r1, r2, r3, j;
25 int curpitch = (uint)pitch >> 16;
26 int predpitch = (pitch & 0xFFFF);
27
28 /* horizontal */
29 j = 4;
30 while (j > 0)
31 {
32 /* calculate the residue first */
33 r0 = cur[0] - predBlock[0];
34 r1 = cur[1] - predBlock[1];
35 r2 = cur[2] - predBlock[2];
36 r3 = cur[3] - predBlock[3];
37
38 r0 += r3; //ptr[0] + ptr[3];
39 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
40 r1 += r2; //ptr[1] + ptr[2];
41 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
42
43 ptr[0] = r0 + r1;
44 ptr[2] = r0 - r1;
45 ptr[1] = (r3 << 1) + r2;
46 ptr[3] = r3 - (r2 << 1);
47
48 ptr += 16;
49 predBlock += predpitch;
50 cur += curpitch;
51 j--;
52 }
53 /* vertical */
54 ptr = dataBlock;
55 j = 4;
56 while (j > 0)
57 {
58 r0 = ptr[0] + ptr[48];
59 r3 = ptr[0] - ptr[48];
60 r1 = ptr[16] + ptr[32];
61 r2 = ptr[16] - ptr[32];
62
63 ptr[0] = r0 + r1;
64 ptr[32] = r0 - r1;
65 ptr[16] = (r3 << 1) + r2;
66 ptr[48] = r3 - (r2 << 1);
67
68 ptr++;
69 j--;
70 }
71
72 return ;
73 }
74
75
76 /* do residue transform quant invquant, invtrans and write output out */
dct_luma(AVCEncObject * encvid,int blkidx,uint8 * cur,uint8 * org,int * coef_cost)77 int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost)
78 {
79 AVCCommonObj *video = encvid->common;
80 int org_pitch = encvid->currInput->pitch;
81 int pitch = video->currPic->pitch;
82 int16 *coef = video->block;
83 uint8 *pred = video->pred_block; // size 16 for a 4x4 block
84 int pred_pitch = video->pred_pitch;
85 int r0, r1, r2, r3, j, k, idx;
86 int *level, *run;
87 int Qq, Rq, q_bits, qp_const, quant;
88 int data, lev, zero_run;
89 int numcoeff;
90
91 coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */
92
93 /* first take a 4x4 transform */
94 /* horizontal */
95 j = 4;
96 while (j > 0)
97 {
98 /* calculate the residue first */
99 r0 = org[0] - pred[0]; /* OPTIMIZEABLE */
100 r1 = org[1] - pred[1];
101 r2 = org[2] - pred[2];
102 r3 = org[3] - pred[3];
103
104 r0 += r3; //ptr[0] + ptr[3];
105 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
106 r1 += r2; //ptr[1] + ptr[2];
107 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
108
109 coef[0] = r0 + r1;
110 coef[2] = r0 - r1;
111 coef[1] = (r3 << 1) + r2;
112 coef[3] = r3 - (r2 << 1);
113
114 coef += 16;
115 org += org_pitch;
116 pred += pred_pitch;
117 j--;
118 }
119 /* vertical */
120 coef -= 64;
121 pred -= (pred_pitch << 2);
122 j = 4;
123 while (j > 0) /* OPTIMIZABLE */
124 {
125 r0 = coef[0] + coef[48];
126 r3 = coef[0] - coef[48];
127 r1 = coef[16] + coef[32];
128 r2 = coef[16] - coef[32];
129
130 coef[0] = r0 + r1;
131 coef[32] = r0 - r1;
132 coef[16] = (r3 << 1) + r2;
133 coef[48] = r3 - (r2 << 1);
134
135 coef++;
136 j--;
137 }
138
139 coef -= 4;
140
141 /* quant */
142 level = encvid->level[ras2dec[blkidx]];
143 run = encvid->run[ras2dec[blkidx]];
144
145 Rq = video->QPy_mod_6;
146 Qq = video->QPy_div_6;
147 qp_const = encvid->qp_const;
148 q_bits = 15 + Qq;
149
150 zero_run = 0;
151 numcoeff = 0;
152 for (k = 0; k < 16; k++)
153 {
154 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
155 data = coef[idx];
156 quant = quant_coef[Rq][k];
157 if (data > 0)
158 {
159 lev = data * quant + qp_const;
160 }
161 else
162 {
163 lev = -data * quant + qp_const;
164 }
165 lev >>= q_bits;
166 if (lev)
167 {
168 *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]);
169
170 /* dequant */
171 quant = dequant_coefres[Rq][k];
172 if (data > 0)
173 {
174 level[numcoeff] = lev;
175 coef[idx] = (lev * quant) << Qq;
176 }
177 else
178 {
179 level[numcoeff] = -lev;
180 coef[idx] = (-lev * quant) << Qq;
181 }
182 run[numcoeff++] = zero_run;
183 zero_run = 0;
184 }
185 else
186 {
187 zero_run++;
188 coef[idx] = 0;
189 }
190 }
191
192 if (video->currMB->mb_intra) // only do inverse transform with intra block
193 {
194 if (numcoeff) /* then do inverse transform */
195 {
196 for (j = 4; j > 0; j--) /* horizontal */
197 {
198 r0 = coef[0] + coef[2];
199 r1 = coef[0] - coef[2];
200 r2 = (coef[1] >> 1) - coef[3];
201 r3 = coef[1] + (coef[3] >> 1);
202
203 coef[0] = r0 + r3;
204 coef[1] = r1 + r2;
205 coef[2] = r1 - r2;
206 coef[3] = r0 - r3;
207
208 coef += 16;
209 }
210
211 coef -= 64;
212 for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
213 {
214 r0 = coef[0] + coef[32];
215 r1 = coef[0] - coef[32];
216 r2 = (coef[16] >> 1) - coef[48];
217 r3 = coef[16] + (coef[48] >> 1);
218 r0 += r3;
219 r3 = (r0 - (r3 << 1)); /* r0-r3 */
220 r1 += r2;
221 r2 = (r1 - (r2 << 1)); /* r1-r2 */
222 r0 += 32;
223 r1 += 32;
224 r2 += 32;
225 r3 += 32;
226
227 r0 = pred[0] + (r0 >> 6);
228 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
229 r1 = *(pred += pred_pitch) + (r1 >> 6);
230 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
231 r2 = *(pred += pred_pitch) + (r2 >> 6);
232 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
233 r3 = pred[pred_pitch] + (r3 >> 6);
234 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
235
236 *cur = r0;
237 *(cur += pitch) = r1;
238 *(cur += pitch) = r2;
239 cur[pitch] = r3;
240 cur -= (pitch << 1);
241 cur++;
242 pred -= (pred_pitch << 1);
243 pred++;
244 coef++;
245 }
246 }
247 else // copy from pred to cur
248 {
249 *((uint32*)cur) = *((uint32*)pred);
250 *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
251 *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
252 *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
253 }
254 }
255
256 return numcoeff;
257 }
258
259
MBInterIdct(AVCCommonObj * video,uint8 * curL,AVCMacroblock * currMB,int picPitch)260 void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch)
261 {
262 int16 *coef, *coef8 = video->block;
263 uint8 *cur; // the same as curL
264 int b8, b4;
265 int r0, r1, r2, r3, j, blkidx;
266
267 for (b8 = 0; b8 < 4; b8++)
268 {
269 cur = curL;
270 coef = coef8;
271
272 if (currMB->CBP&(1 << b8))
273 {
274 for (b4 = 0; b4 < 4; b4++)
275 {
276 blkidx = blkIdx2blkXY[b8][b4];
277 /* do IDCT */
278 if (currMB->nz_coeff[blkidx])
279 {
280 for (j = 4; j > 0; j--) /* horizontal */
281 {
282 r0 = coef[0] + coef[2];
283 r1 = coef[0] - coef[2];
284 r2 = (coef[1] >> 1) - coef[3];
285 r3 = coef[1] + (coef[3] >> 1);
286
287 coef[0] = r0 + r3;
288 coef[1] = r1 + r2;
289 coef[2] = r1 - r2;
290 coef[3] = r0 - r3;
291
292 coef += 16;
293 }
294
295 coef -= 64;
296 for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
297 {
298 r0 = coef[0] + coef[32];
299 r1 = coef[0] - coef[32];
300 r2 = (coef[16] >> 1) - coef[48];
301 r3 = coef[16] + (coef[48] >> 1);
302 r0 += r3;
303 r3 = (r0 - (r3 << 1)); /* r0-r3 */
304 r1 += r2;
305 r2 = (r1 - (r2 << 1)); /* r1-r2 */
306 r0 += 32;
307 r1 += 32;
308 r2 += 32;
309 r3 += 32;
310
311 r0 = cur[0] + (r0 >> 6);
312 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
313 *cur = r0;
314 r1 = *(cur += picPitch) + (r1 >> 6);
315 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
316 *cur = r1;
317 r2 = *(cur += picPitch) + (r2 >> 6);
318 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
319 *cur = r2;
320 r3 = cur[picPitch] + (r3 >> 6);
321 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
322 cur[picPitch] = r3;
323
324 cur -= (picPitch << 1);
325 cur++;
326 coef++;
327 }
328 cur -= 4;
329 coef -= 4;
330 }
331 if (b4&1)
332 {
333 cur += ((picPitch << 2) - 4);
334 coef += 60;
335 }
336 else
337 {
338 cur += 4;
339 coef += 4;
340 }
341 }
342 }
343
344 if (b8&1)
345 {
346 curL += ((picPitch << 3) - 8);
347 coef8 += 120;
348 }
349 else
350 {
351 curL += 8;
352 coef8 += 8;
353 }
354 }
355
356 return ;
357 }
358
359 /* performa dct, quant, iquant, idct for the entire MB */
dct_luma_16x16(AVCEncObject * encvid,uint8 * curL,uint8 * orgL)360 void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL)
361 {
362 AVCCommonObj *video = encvid->common;
363 int pitch = video->currPic->pitch;
364 int org_pitch = encvid->currInput->pitch;
365 AVCMacroblock *currMB = video->currMB;
366 int16 *coef = video->block;
367 uint8 *pred = encvid->pred_i16[currMB->i16Mode];
368 int blk_x, blk_y, j, k, idx, b8, b4;
369 int r0, r1, r2, r3, m0, m1, m2 , m3;
370 int data, lev;
371 int *level, *run, zero_run, ncoeff;
372 int Rq, Qq, quant, q_bits, qp_const;
373 int offset_cur[4], offset_pred[4], offset;
374
375 /* horizontal */
376 for (j = 16; j > 0; j--)
377 {
378 for (blk_x = 4; blk_x > 0; blk_x--)
379 {
380 /* calculate the residue first */
381 r0 = *orgL++ - *pred++;
382 r1 = *orgL++ - *pred++;
383 r2 = *orgL++ - *pred++;
384 r3 = *orgL++ - *pred++;
385
386 r0 += r3; //ptr[0] + ptr[3];
387 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
388 r1 += r2; //ptr[1] + ptr[2];
389 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
390
391 *coef++ = r0 + r1;
392 *coef++ = (r3 << 1) + r2;
393 *coef++ = r0 - r1;
394 *coef++ = r3 - (r2 << 1);
395 }
396 orgL += (org_pitch - 16);
397 }
398 pred -= 256;
399 coef -= 256;
400 /* vertical */
401 for (blk_y = 4; blk_y > 0; blk_y--)
402 {
403 for (j = 16; j > 0; j--)
404 {
405 r0 = coef[0] + coef[48];
406 r3 = coef[0] - coef[48];
407 r1 = coef[16] + coef[32];
408 r2 = coef[16] - coef[32];
409
410 coef[0] = r0 + r1;
411 coef[32] = r0 - r1;
412 coef[16] = (r3 << 1) + r2;
413 coef[48] = r3 - (r2 << 1);
414
415 coef++;
416 }
417 coef += 48;
418 }
419
420 /* then perform DC transform */
421 coef -= 256;
422 for (j = 4; j > 0; j--)
423 {
424 r0 = coef[0] + coef[12];
425 r3 = coef[0] - coef[12];
426 r1 = coef[4] + coef[8];
427 r2 = coef[4] - coef[8];
428
429 coef[0] = r0 + r1;
430 coef[8] = r0 - r1;
431 coef[4] = r3 + r2;
432 coef[12] = r3 - r2;
433 coef += 64;
434 }
435 coef -= 256;
436 for (j = 4; j > 0; j--)
437 {
438 r0 = coef[0] + coef[192];
439 r3 = coef[0] - coef[192];
440 r1 = coef[64] + coef[128];
441 r2 = coef[64] - coef[128];
442
443 coef[0] = (r0 + r1) >> 1;
444 coef[128] = (r0 - r1) >> 1;
445 coef[64] = (r3 + r2) >> 1;
446 coef[192] = (r3 - r2) >> 1;
447 coef += 4;
448 }
449
450 coef -= 16;
451 // then quantize DC
452 level = encvid->leveldc;
453 run = encvid->rundc;
454
455 Rq = video->QPy_mod_6;
456 Qq = video->QPy_div_6;
457 quant = quant_coef[Rq][0];
458 q_bits = 15 + Qq;
459 qp_const = encvid->qp_const;
460
461 zero_run = 0;
462 ncoeff = 0;
463 for (k = 0; k < 16; k++) /* in zigzag scan order */
464 {
465 idx = ZIGZAG2RASTERDC[k];
466 data = coef[idx];
467 if (data > 0) // quant
468 {
469 lev = data * quant + (qp_const << 1);
470 }
471 else
472 {
473 lev = -data * quant + (qp_const << 1);
474 }
475 lev >>= (q_bits + 1);
476 if (lev) // dequant
477 {
478 if (data > 0)
479 {
480 level[ncoeff] = lev;
481 coef[idx] = lev;
482 }
483 else
484 {
485 level[ncoeff] = -lev;
486 coef[idx] = -lev;
487 }
488 run[ncoeff++] = zero_run;
489 zero_run = 0;
490 }
491 else
492 {
493 zero_run++;
494 coef[idx] = 0;
495 }
496 }
497
498 /* inverse transform DC */
499 encvid->numcoefdc = ncoeff;
500 if (ncoeff)
501 {
502 quant = dequant_coefres[Rq][0];
503
504 for (j = 0; j < 4; j++)
505 {
506 m0 = coef[0] + coef[4];
507 m1 = coef[0] - coef[4];
508 m2 = coef[8] + coef[12];
509 m3 = coef[8] - coef[12];
510
511
512 coef[0] = m0 + m2;
513 coef[4] = m0 - m2;
514 coef[8] = m1 - m3;
515 coef[12] = m1 + m3;
516 coef += 64;
517 }
518
519 coef -= 256;
520
521 if (Qq >= 2) /* this way should be faster than JM */
522 { /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
523 Qq -= 2;
524 for (j = 0; j < 4; j++)
525 {
526 m0 = coef[0] + coef[64];
527 m1 = coef[0] - coef[64];
528 m2 = coef[128] + coef[192];
529 m3 = coef[128] - coef[192];
530
531 coef[0] = ((m0 + m2) * quant) << Qq;
532 coef[64] = ((m0 - m2) * quant) << Qq;
533 coef[128] = ((m1 - m3) * quant) << Qq;
534 coef[192] = ((m1 + m3) * quant) << Qq;
535 coef += 4;
536 }
537 Qq += 2; /* restore the value */
538 }
539 else
540 {
541 Qq = 2 - Qq;
542 offset = 1 << (Qq - 1);
543
544 for (j = 0; j < 4; j++)
545 {
546 m0 = coef[0] + coef[64];
547 m1 = coef[0] - coef[64];
548 m2 = coef[128] + coef[192];
549 m3 = coef[128] - coef[192];
550
551 coef[0] = (((m0 + m2) * quant + offset) >> Qq);
552 coef[64] = (((m0 - m2) * quant + offset) >> Qq);
553 coef[128] = (((m1 - m3) * quant + offset) >> Qq);
554 coef[192] = (((m1 + m3) * quant + offset) >> Qq);
555 coef += 4;
556 }
557 Qq = 2 - Qq; /* restore the value */
558 }
559 coef -= 16; /* back to the origin */
560 }
561
562 /* now zigzag scan ac coefs, quant, iquant and itrans */
563 run = encvid->run[0];
564 level = encvid->level[0];
565
566 /* offset btw 4x4 block */
567 offset_cur[0] = 0;
568 offset_cur[1] = (pitch << 2) - 8;
569
570 /* offset btw 8x8 block */
571 offset_cur[2] = 8 - (pitch << 3);
572 offset_cur[3] = -8;
573
574 /* similarly for pred */
575 offset_pred[0] = 0;
576 offset_pred[1] = 56;
577 offset_pred[2] = -120;
578 offset_pred[3] = -8;
579
580 currMB->CBP = 0;
581
582 for (b8 = 0; b8 < 4; b8++)
583 {
584 for (b4 = 0; b4 < 4; b4++)
585 {
586
587 zero_run = 0;
588 ncoeff = 0;
589
590 for (k = 1; k < 16; k++)
591 {
592 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
593 data = coef[idx];
594 quant = quant_coef[Rq][k];
595 if (data > 0)
596 {
597 lev = data * quant + qp_const;
598 }
599 else
600 {
601 lev = -data * quant + qp_const;
602 }
603 lev >>= q_bits;
604 if (lev)
605 { /* dequant */
606 quant = dequant_coefres[Rq][k];
607 if (data > 0)
608 {
609 level[ncoeff] = lev;
610 coef[idx] = (lev * quant) << Qq;
611 }
612 else
613 {
614 level[ncoeff] = -lev;
615 coef[idx] = (-lev * quant) << Qq;
616 }
617 run[ncoeff++] = zero_run;
618 zero_run = 0;
619 }
620 else
621 {
622 zero_run++;
623 coef[idx] = 0;
624 }
625 }
626
627 currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */
628 if (ncoeff)
629 {
630 currMB->CBP |= (1 << b8);
631
632 // do inverse transform here
633 for (j = 4; j > 0; j--)
634 {
635 r0 = coef[0] + coef[2];
636 r1 = coef[0] - coef[2];
637 r2 = (coef[1] >> 1) - coef[3];
638 r3 = coef[1] + (coef[3] >> 1);
639
640 coef[0] = r0 + r3;
641 coef[1] = r1 + r2;
642 coef[2] = r1 - r2;
643 coef[3] = r0 - r3;
644
645 coef += 16;
646 }
647 coef -= 64;
648 for (j = 4; j > 0; j--)
649 {
650 r0 = coef[0] + coef[32];
651 r1 = coef[0] - coef[32];
652 r2 = (coef[16] >> 1) - coef[48];
653 r3 = coef[16] + (coef[48] >> 1);
654
655 r0 += r3;
656 r3 = (r0 - (r3 << 1)); /* r0-r3 */
657 r1 += r2;
658 r2 = (r1 - (r2 << 1)); /* r1-r2 */
659 r0 += 32;
660 r1 += 32;
661 r2 += 32;
662 r3 += 32;
663 r0 = pred[0] + (r0 >> 6);
664 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
665 r1 = pred[16] + (r1 >> 6);
666 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
667 r2 = pred[32] + (r2 >> 6);
668 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
669 r3 = pred[48] + (r3 >> 6);
670 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
671 *curL = r0;
672 *(curL += pitch) = r1;
673 *(curL += pitch) = r2;
674 curL[pitch] = r3;
675 curL -= (pitch << 1);
676 curL++;
677 pred++;
678 coef++;
679 }
680 }
681 else // do DC-only inverse
682 {
683 m0 = coef[0] + 32;
684
685 for (j = 4; j > 0; j--)
686 {
687 r0 = pred[0] + (m0 >> 6);
688 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
689 r1 = pred[16] + (m0 >> 6);
690 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
691 r2 = pred[32] + (m0 >> 6);
692 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
693 r3 = pred[48] + (m0 >> 6);
694 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
695 *curL = r0;
696 *(curL += pitch) = r1;
697 *(curL += pitch) = r2;
698 curL[pitch] = r3;
699 curL -= (pitch << 1);
700 curL++;
701 pred++;
702 }
703 coef += 4;
704 }
705
706 run += 16; // follow coding order
707 level += 16;
708 curL += offset_cur[b4&1];
709 pred += offset_pred[b4&1];
710 coef += offset_pred[b4&1];
711 }
712
713 curL += offset_cur[2 + (b8&1)];
714 pred += offset_pred[2 + (b8&1)];
715 coef += offset_pred[2 + (b8&1)];
716 }
717
718 return ;
719 }
720
721
dct_chroma(AVCEncObject * encvid,uint8 * curC,uint8 * orgC,int cr)722 void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr)
723 {
724 AVCCommonObj *video = encvid->common;
725 AVCMacroblock *currMB = video->currMB;
726 int org_pitch = (encvid->currInput->pitch) >> 1;
727 int pitch = (video->currPic->pitch) >> 1;
728 int pred_pitch = 16;
729 int16 *coef = video->block + 256;
730 uint8 *pred = video->pred_block;
731 int j, blk_x, blk_y, k, idx, b4;
732 int r0, r1, r2, r3, m0;
733 int Qq, Rq, qp_const, q_bits, quant;
734 int *level, *run, zero_run, ncoeff;
735 int data, lev;
736 int offset_cur[2], offset_pred[2], offset_coef[2];
737 uint8 nz_temp[4];
738 int coeff_cost;
739
740 if (cr)
741 {
742 coef += 8;
743 pred += 8;
744 }
745
746 if (currMB->mb_intra == 0) // inter mode
747 {
748 pred = curC;
749 pred_pitch = pitch;
750 }
751
752 /* do 4x4 transform */
753 /* horizontal */
754 for (j = 8; j > 0; j--)
755 {
756 for (blk_x = 2; blk_x > 0; blk_x--)
757 {
758 /* calculate the residue first */
759 r0 = *orgC++ - *pred++;
760 r1 = *orgC++ - *pred++;
761 r2 = *orgC++ - *pred++;
762 r3 = *orgC++ - *pred++;
763
764 r0 += r3; //ptr[0] + ptr[3];
765 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
766 r1 += r2; //ptr[1] + ptr[2];
767 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
768
769 *coef++ = r0 + r1;
770 *coef++ = (r3 << 1) + r2;
771 *coef++ = r0 - r1;
772 *coef++ = r3 - (r2 << 1);
773
774 }
775 coef += 8; // coef pitch is 16
776 pred += (pred_pitch - 8); // pred_pitch is 16
777 orgC += (org_pitch - 8);
778 }
779 pred -= (pred_pitch << 3);
780 coef -= 128;
781 /* vertical */
782 for (blk_y = 2; blk_y > 0; blk_y--)
783 {
784 for (j = 8; j > 0; j--)
785 {
786 r0 = coef[0] + coef[48];
787 r3 = coef[0] - coef[48];
788 r1 = coef[16] + coef[32];
789 r2 = coef[16] - coef[32];
790
791 coef[0] = r0 + r1;
792 coef[32] = r0 - r1;
793 coef[16] = (r3 << 1) + r2;
794 coef[48] = r3 - (r2 << 1);
795
796 coef++;
797 }
798 coef += 56;
799 }
800 /* then perform DC transform */
801 coef -= 128;
802
803 /* 2x2 transform of DC components*/
804 r0 = coef[0];
805 r1 = coef[4];
806 r2 = coef[64];
807 r3 = coef[68];
808
809 coef[0] = r0 + r1 + r2 + r3;
810 coef[4] = r0 - r1 + r2 - r3;
811 coef[64] = r0 + r1 - r2 - r3;
812 coef[68] = r0 - r1 - r2 + r3;
813
814 Qq = video->QPc_div_6;
815 Rq = video->QPc_mod_6;
816 quant = quant_coef[Rq][0];
817 q_bits = 15 + Qq;
818 qp_const = encvid->qp_const_c;
819
820 zero_run = 0;
821 ncoeff = 0;
822 run = encvid->runcdc + (cr << 2);
823 level = encvid->levelcdc + (cr << 2);
824
825 /* in zigzag scan order */
826 for (k = 0; k < 4; k++)
827 {
828 idx = ((k >> 1) << 6) + ((k & 1) << 2);
829 data = coef[idx];
830 if (data > 0)
831 {
832 lev = data * quant + (qp_const << 1);
833 }
834 else
835 {
836 lev = -data * quant + (qp_const << 1);
837 }
838 lev >>= (q_bits + 1);
839 if (lev)
840 {
841 if (data > 0)
842 {
843 level[ncoeff] = lev;
844 coef[idx] = lev;
845 }
846 else
847 {
848 level[ncoeff] = -lev;
849 coef[idx] = -lev;
850 }
851 run[ncoeff++] = zero_run;
852 zero_run = 0;
853 }
854 else
855 {
856 zero_run++;
857 coef[idx] = 0;
858 }
859 }
860
861 encvid->numcoefcdc[cr] = ncoeff;
862
863 if (ncoeff)
864 {
865 currMB->CBP |= (1 << 4); // DC present
866 // do inverse transform
867 quant = dequant_coefres[Rq][0];
868
869 r0 = coef[0] + coef[4];
870 r1 = coef[0] - coef[4];
871 r2 = coef[64] + coef[68];
872 r3 = coef[64] - coef[68];
873
874 r0 += r2;
875 r2 = r0 - (r2 << 1);
876 r1 += r3;
877 r3 = r1 - (r3 << 1);
878
879 if (Qq >= 1)
880 {
881 Qq -= 1;
882 coef[0] = (r0 * quant) << Qq;
883 coef[4] = (r1 * quant) << Qq;
884 coef[64] = (r2 * quant) << Qq;
885 coef[68] = (r3 * quant) << Qq;
886 Qq++;
887 }
888 else
889 {
890 coef[0] = (r0 * quant) >> 1;
891 coef[4] = (r1 * quant) >> 1;
892 coef[64] = (r2 * quant) >> 1;
893 coef[68] = (r3 * quant) >> 1;
894 }
895 }
896
897 /* now do AC zigzag scan, quant, iquant and itrans */
898 if (cr)
899 {
900 run = encvid->run[20];
901 level = encvid->level[20];
902 }
903 else
904 {
905 run = encvid->run[16];
906 level = encvid->level[16];
907 }
908
909 /* offset btw 4x4 block */
910 offset_cur[0] = 0;
911 offset_cur[1] = (pitch << 2) - 8;
912 offset_pred[0] = 0;
913 offset_pred[1] = (pred_pitch << 2) - 8;
914 offset_coef[0] = 0;
915 offset_coef[1] = 56;
916
917 coeff_cost = 0;
918
919 for (b4 = 0; b4 < 4; b4++)
920 {
921 zero_run = 0;
922 ncoeff = 0;
923 for (k = 1; k < 16; k++) /* in zigzag scan order */
924 {
925 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
926 data = coef[idx];
927 quant = quant_coef[Rq][k];
928 if (data > 0)
929 {
930 lev = data * quant + qp_const;
931 }
932 else
933 {
934 lev = -data * quant + qp_const;
935 }
936 lev >>= q_bits;
937 if (lev)
938 {
939 /* for RD performance*/
940 if (lev > 1)
941 coeff_cost += MAX_VALUE; // set high cost, shall not be discarded
942 else
943 coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run];
944
945 /* dequant */
946 quant = dequant_coefres[Rq][k];
947 if (data > 0)
948 {
949 level[ncoeff] = lev;
950 coef[idx] = (lev * quant) << Qq;
951 }
952 else
953 {
954 level[ncoeff] = -lev;
955 coef[idx] = (-lev * quant) << Qq;
956 }
957 run[ncoeff++] = zero_run;
958 zero_run = 0;
959 }
960 else
961 {
962 zero_run++;
963 coef[idx] = 0;
964 }
965 }
966
967 nz_temp[b4] = ncoeff; // raster scan
968
969 // just advance the pointers for now, do IDCT later
970 coef += 4;
971 run += 16;
972 level += 16;
973 coef += offset_coef[b4&1];
974 }
975
976 /* rewind the pointers */
977 coef -= 128;
978
979 if (coeff_cost < _CHROMA_COEFF_COST_)
980 {
981 /* if it's not efficient to encode any blocks.
982 Just do DC only */
983 /* We can reset level and run also, but setting nz to zero should be enough. */
984 currMB->nz_coeff[16+(cr<<1)] = 0;
985 currMB->nz_coeff[17+(cr<<1)] = 0;
986 currMB->nz_coeff[20+(cr<<1)] = 0;
987 currMB->nz_coeff[21+(cr<<1)] = 0;
988
989 for (b4 = 0; b4 < 4; b4++)
990 {
991 // do DC-only inverse
992 m0 = coef[0] + 32;
993
994 for (j = 4; j > 0; j--)
995 {
996 r0 = pred[0] + (m0 >> 6);
997 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
998 r1 = *(pred += pred_pitch) + (m0 >> 6);
999 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
1000 r2 = pred[pred_pitch] + (m0 >> 6);
1001 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
1002 r3 = pred[pred_pitch<<1] + (m0 >> 6);
1003 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
1004 *curC = r0;
1005 *(curC += pitch) = r1;
1006 *(curC += pitch) = r2;
1007 curC[pitch] = r3;
1008 curC -= (pitch << 1);
1009 curC++;
1010 pred += (1 - pred_pitch);
1011 }
1012 coef += 4;
1013 curC += offset_cur[b4&1];
1014 pred += offset_pred[b4&1];
1015 coef += offset_coef[b4&1];
1016 }
1017 }
1018 else // not dropping anything, continue with the IDCT
1019 {
1020 for (b4 = 0; b4 < 4; b4++)
1021 {
1022 ncoeff = nz_temp[b4] ; // in raster scan
1023 currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan
1024
1025 if (ncoeff) // do a check on the nonzero-coeff
1026 {
1027 currMB->CBP |= (2 << 4);
1028
1029 // do inverse transform here
1030 for (j = 4; j > 0; j--)
1031 {
1032 r0 = coef[0] + coef[2];
1033 r1 = coef[0] - coef[2];
1034 r2 = (coef[1] >> 1) - coef[3];
1035 r3 = coef[1] + (coef[3] >> 1);
1036
1037 coef[0] = r0 + r3;
1038 coef[1] = r1 + r2;
1039 coef[2] = r1 - r2;
1040 coef[3] = r0 - r3;
1041
1042 coef += 16;
1043 }
1044 coef -= 64;
1045 for (j = 4; j > 0; j--)
1046 {
1047 r0 = coef[0] + coef[32];
1048 r1 = coef[0] - coef[32];
1049 r2 = (coef[16] >> 1) - coef[48];
1050 r3 = coef[16] + (coef[48] >> 1);
1051
1052 r0 += r3;
1053 r3 = (r0 - (r3 << 1)); /* r0-r3 */
1054 r1 += r2;
1055 r2 = (r1 - (r2 << 1)); /* r1-r2 */
1056 r0 += 32;
1057 r1 += 32;
1058 r2 += 32;
1059 r3 += 32;
1060 r0 = pred[0] + (r0 >> 6);
1061 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
1062 r1 = *(pred += pred_pitch) + (r1 >> 6);
1063 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
1064 r2 = pred[pred_pitch] + (r2 >> 6);
1065 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
1066 r3 = pred[pred_pitch<<1] + (r3 >> 6);
1067 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
1068 *curC = r0;
1069 *(curC += pitch) = r1;
1070 *(curC += pitch) = r2;
1071 curC[pitch] = r3;
1072 curC -= (pitch << 1);
1073 curC++;
1074 pred += (1 - pred_pitch);
1075 coef++;
1076 }
1077 }
1078 else
1079 {
1080 // do DC-only inverse
1081 m0 = coef[0] + 32;
1082
1083 for (j = 4; j > 0; j--)
1084 {
1085 r0 = pred[0] + (m0 >> 6);
1086 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
1087 r1 = *(pred += pred_pitch) + (m0 >> 6);
1088 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
1089 r2 = pred[pred_pitch] + (m0 >> 6);
1090 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
1091 r3 = pred[pred_pitch<<1] + (m0 >> 6);
1092 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
1093 *curC = r0;
1094 *(curC += pitch) = r1;
1095 *(curC += pitch) = r2;
1096 curC[pitch] = r3;
1097 curC -= (pitch << 1);
1098 curC++;
1099 pred += (1 - pred_pitch);
1100 }
1101 coef += 4;
1102 }
1103 curC += offset_cur[b4&1];
1104 pred += offset_pred[b4&1];
1105 coef += offset_coef[b4&1];
1106 }
1107 }
1108
1109 return ;
1110 }
1111
1112
1113 /* only DC transform */
TransQuantIntra16DC(AVCEncObject * encvid)1114 int TransQuantIntra16DC(AVCEncObject *encvid)
1115 {
1116 AVCCommonObj *video = encvid->common;
1117 int16 *block = video->block;
1118 int *level = encvid->leveldc;
1119 int *run = encvid->rundc;
1120 int16 *ptr = block;
1121 int r0, r1, r2, r3, j;
1122 int Qq = video->QPy_div_6;
1123 int Rq = video->QPy_mod_6;
1124 int q_bits, qp_const, quant;
1125 int data, lev, zero_run;
1126 int k, ncoeff, idx;
1127
1128 /* DC transform */
1129 /* horizontal */
1130 j = 4;
1131 while (j)
1132 {
1133 r0 = ptr[0] + ptr[12];
1134 r3 = ptr[0] - ptr[12];
1135 r1 = ptr[4] + ptr[8];
1136 r2 = ptr[4] - ptr[8];
1137
1138 ptr[0] = r0 + r1;
1139 ptr[8] = r0 - r1;
1140 ptr[4] = r3 + r2;
1141 ptr[12] = r3 - r2;
1142 ptr += 64;
1143 j--;
1144 }
1145 /* vertical */
1146 ptr = block;
1147 j = 4;
1148 while (j)
1149 {
1150 r0 = ptr[0] + ptr[192];
1151 r3 = ptr[0] - ptr[192];
1152 r1 = ptr[64] + ptr[128];
1153 r2 = ptr[64] - ptr[128];
1154
1155 ptr[0] = (r0 + r1) >> 1;
1156 ptr[128] = (r0 - r1) >> 1;
1157 ptr[64] = (r3 + r2) >> 1;
1158 ptr[192] = (r3 - r2) >> 1;
1159 ptr += 4;
1160 j--;
1161 }
1162
1163 quant = quant_coef[Rq][0];
1164 q_bits = 15 + Qq;
1165 qp_const = (1 << q_bits) / 3; // intra
1166
1167 zero_run = 0;
1168 ncoeff = 0;
1169
1170 for (k = 0; k < 16; k++) /* in zigzag scan order */
1171 {
1172 idx = ZIGZAG2RASTERDC[k];
1173 data = block[idx];
1174 if (data > 0)
1175 {
1176 lev = data * quant + (qp_const << 1);
1177 }
1178 else
1179 {
1180 lev = -data * quant + (qp_const << 1);
1181 }
1182 lev >>= (q_bits + 1);
1183 if (lev)
1184 {
1185 if (data > 0)
1186 {
1187 level[ncoeff] = lev;
1188 block[idx] = lev;
1189 }
1190 else
1191 {
1192 level[ncoeff] = -lev;
1193 block[idx] = -lev;
1194 }
1195 run[ncoeff++] = zero_run;
1196 zero_run = 0;
1197 }
1198 else
1199 {
1200 zero_run++;
1201 block[idx] = 0;
1202 }
1203 }
1204 return ncoeff;
1205 }
1206
TransQuantChromaDC(AVCEncObject * encvid,int16 * block,int slice_type,int cr)1207 int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr)
1208 {
1209 AVCCommonObj *video = encvid->common;
1210 int *level, *run;
1211 int r0, r1, r2, r3;
1212 int Qq, Rq, q_bits, qp_const, quant;
1213 int data, lev, zero_run;
1214 int k, ncoeff, idx;
1215
1216 level = encvid->levelcdc + (cr << 2); /* cb or cr */
1217 run = encvid->runcdc + (cr << 2);
1218
1219 /* 2x2 transform of DC components*/
1220 r0 = block[0];
1221 r1 = block[4];
1222 r2 = block[64];
1223 r3 = block[68];
1224
1225 block[0] = r0 + r1 + r2 + r3;
1226 block[4] = r0 - r1 + r2 - r3;
1227 block[64] = r0 + r1 - r2 - r3;
1228 block[68] = r0 - r1 - r2 + r3;
1229
1230 Qq = video->QPc_div_6;
1231 Rq = video->QPc_mod_6;
1232 quant = quant_coef[Rq][0];
1233 q_bits = 15 + Qq;
1234 if (slice_type == AVC_I_SLICE)
1235 {
1236 qp_const = (1 << q_bits) / 3;
1237 }
1238 else
1239 {
1240 qp_const = (1 << q_bits) / 6;
1241 }
1242
1243 zero_run = 0;
1244 ncoeff = 0;
1245
1246 for (k = 0; k < 4; k++) /* in zigzag scan order */
1247 {
1248 idx = ((k >> 1) << 6) + ((k & 1) << 2);
1249 data = block[idx];
1250 if (data > 0)
1251 {
1252 lev = data * quant + (qp_const << 1);
1253 }
1254 else
1255 {
1256 lev = -data * quant + (qp_const << 1);
1257 }
1258 lev >>= (q_bits + 1);
1259 if (lev)
1260 {
1261 if (data > 0)
1262 {
1263 level[ncoeff] = lev;
1264 block[idx] = lev;
1265 }
1266 else
1267 {
1268 level[ncoeff] = -lev;
1269 block[idx] = -lev;
1270 }
1271 run[ncoeff++] = zero_run;
1272 zero_run = 0;
1273 }
1274 else
1275 {
1276 zero_run++;
1277 block[idx] = 0;
1278 }
1279 }
1280 return ncoeff;
1281 }
1282
1283
1284