1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 #include "avcenc_lib.h"
19
20 #define TH_I4 0 /* threshold biasing toward I16 mode instead of I4 mode */
21 #define TH_Intra 0 /* threshold biasing toward INTER mode instead of intra mode */
22
23 #define FIXED_INTRAPRED_MODE AVC_I16
24 #define FIXED_I16_MODE AVC_I16_DC
25 #define FIXED_I4_MODE AVC_I4_Diagonal_Down_Left
26 #define FIXED_INTRA_CHROMA_MODE AVC_IC_DC
27
28 #define CLIP_RESULT(x) if((uint)x > 0xFF){ \
29 x = 0xFF & (~(x>>31));}
30
31
IntraDecisionABE(AVCEncObject * encvid,int min_cost,uint8 * curL,int picPitch)32 bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch)
33 {
34 AVCCommonObj *video = encvid->common;
35 AVCFrameIO *currInput = encvid->currInput;
36 int orgPitch = currInput->pitch;
37 int x_pos = (video->mb_x) << 4;
38 int y_pos = (video->mb_y) << 4;
39 uint8 *orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
40 int j;
41 uint8 *topL, *leftL, *orgY_2, *orgY_3;
42 int temp, SBE, offset;
43 OsclFloat ABE;
44 bool intra = true;
45
46 if (((x_pos >> 4) != (int)video->PicWidthInMbs - 1) &&
47 ((y_pos >> 4) != (int)video->PicHeightInMbs - 1) &&
48 video->intraAvailA &&
49 video->intraAvailB)
50 {
51 SBE = 0;
52 /* top neighbor */
53 topL = curL - picPitch;
54 /* left neighbor */
55 leftL = curL - 1;
56 orgY_2 = orgY - orgPitch;
57
58 for (j = 0; j < 16; j++)
59 {
60 temp = *topL++ - orgY[j];
61 SBE += ((temp >= 0) ? temp : -temp);
62 temp = *(leftL += picPitch) - *(orgY_2 += orgPitch);
63 SBE += ((temp >= 0) ? temp : -temp);
64 }
65
66 /* calculate chroma */
67 offset = (y_pos >> 2) * picPitch + (x_pos >> 1);
68 topL = video->currPic->Scb + offset;
69 orgY_2 = currInput->YCbCr[1] + offset + (y_pos >> 2) * (orgPitch - picPitch);
70
71 leftL = topL - 1;
72 topL -= (picPitch >> 1);
73 orgY_3 = orgY_2 - (orgPitch >> 1);
74 for (j = 0; j < 8; j++)
75 {
76 temp = *topL++ - orgY_2[j];
77 SBE += ((temp >= 0) ? temp : -temp);
78 temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
79 SBE += ((temp >= 0) ? temp : -temp);
80 }
81
82 topL = video->currPic->Scr + offset;
83 orgY_2 = currInput->YCbCr[2] + offset + (y_pos >> 2) * (orgPitch - picPitch);
84
85 leftL = topL - 1;
86 topL -= (picPitch >> 1);
87 orgY_3 = orgY_2 - (orgPitch >> 1);
88 for (j = 0; j < 8; j++)
89 {
90 temp = *topL++ - orgY_2[j];
91 SBE += ((temp >= 0) ? temp : -temp);
92 temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
93 SBE += ((temp >= 0) ? temp : -temp);
94 }
95
96 /* compare mincost/384 and SBE/64 */
97 ABE = SBE / 64.0;
98 if (ABE*0.8 >= min_cost / 384.0)
99 {
100 intra = false;
101 }
102 }
103
104 return intra;
105 }
106
107 /* perform searching for MB mode */
108 /* assuming that this is done inside the encoding loop,
109 no need to call InitNeighborAvailability */
110
MBIntraSearch(AVCEncObject * encvid,int mbnum,uint8 * curL,int picPitch)111 void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch)
112 {
113 AVCCommonObj *video = encvid->common;
114 AVCFrameIO *currInput = encvid->currInput;
115 AVCMacroblock *currMB = video->currMB;
116 int min_cost;
117 uint8 *orgY;
118 int x_pos = (video->mb_x) << 4;
119 int y_pos = (video->mb_y) << 4;
120 uint32 *saved_inter;
121 int j;
122 int orgPitch = currInput->pitch;
123 bool intra = true;
124
125 currMB->CBP = 0;
126
127 /* first do motion vector and variable block size search */
128 min_cost = encvid->min_cost[mbnum];
129
130 /* now perform intra prediction search */
131 /* need to add the check for encvid->intraSearch[video->mbNum] to skip intra
132 if it's not worth checking. */
133 if (video->slice_type == AVC_P_SLICE)
134 {
135 /* Decide whether intra search is necessary or not */
136 /* This one, we do it in the encoding loop so the neighboring pixel are the
137 actual reconstructed pixels. */
138 intra = IntraDecisionABE(encvid, min_cost, curL, picPitch);
139 }
140
141 if (intra == true || video->slice_type == AVC_I_SLICE)
142 {
143 orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
144
145 /* i16 mode search */
146 /* generate all the predictions */
147 intrapred_luma_16x16(encvid);
148
149 /* evaluate them one by one */
150 find_cost_16x16(encvid, orgY, &min_cost);
151
152 if (video->slice_type == AVC_P_SLICE)
153 {
154 /* save current inter prediction */
155 saved_inter = encvid->subpel_pred; /* reuse existing buffer */
156 j = 16;
157 curL -= 4;
158 picPitch -= 16;
159 while (j--)
160 {
161 *saved_inter++ = *((uint32*)(curL += 4));
162 *saved_inter++ = *((uint32*)(curL += 4));
163 *saved_inter++ = *((uint32*)(curL += 4));
164 *saved_inter++ = *((uint32*)(curL += 4));
165 curL += picPitch;
166 }
167
168 }
169
170 /* i4 mode search */
171 mb_intra4x4_search(encvid, &min_cost);
172
173 encvid->min_cost[mbnum] = min_cost; /* update min_cost */
174 }
175
176
177 if (currMB->mb_intra)
178 {
179 chroma_intra_search(encvid);
180
181 /* need to set this in order for the MBInterPrediction to work!! */
182 memset(currMB->mvL0, 0, sizeof(int32)*16);
183 currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
184 currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = -1;
185 }
186 else if (video->slice_type == AVC_P_SLICE && intra == true)
187 {
188 /* restore current inter prediction */
189 saved_inter = encvid->subpel_pred; /* reuse existing buffer */
190 j = 16;
191 curL -= ((picPitch + 16) << 4);
192 while (j--)
193 {
194 *((uint32*)(curL += 4)) = *saved_inter++;
195 *((uint32*)(curL += 4)) = *saved_inter++;
196 *((uint32*)(curL += 4)) = *saved_inter++;
197 *((uint32*)(curL += 4)) = *saved_inter++;
198 curL += picPitch;
199 }
200 }
201
202 return ;
203 }
204
205 /* generate all the prediction values */
intrapred_luma_16x16(AVCEncObject * encvid)206 void intrapred_luma_16x16(AVCEncObject *encvid)
207 {
208 AVCCommonObj *video = encvid->common;
209 AVCPictureData *currPic = video->currPic;
210
211 int x_pos = (video->mb_x) << 4;
212 int y_pos = (video->mb_y) << 4;
213 int pitch = currPic->pitch;
214
215 int offset = y_pos * pitch + x_pos;
216
217 uint8 *pred, *top, *left;
218 uint8 *curL = currPic->Sl + offset; /* point to reconstructed frame */
219 uint32 word1, word2, word3, word4;
220 uint32 sum = 0;
221
222 int a_16, b, c, factor_c;
223 uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
224 int H = 0, V = 0, tmp, value;
225 int i;
226
227 if (video->intraAvailB)
228 {
229 //get vertical prediction mode
230 top = curL - pitch;
231
232 pred = encvid->pred_i16[AVC_I16_Vertical] - 16;
233
234 word1 = *((uint32*)(top)); /* read 4 bytes from top */
235 word2 = *((uint32*)(top + 4)); /* read 4 bytes from top */
236 word3 = *((uint32*)(top + 8)); /* read 4 bytes from top */
237 word4 = *((uint32*)(top + 12)); /* read 4 bytes from top */
238
239 for (i = 0; i < 16; i++)
240 {
241 *((uint32*)(pred += 16)) = word1;
242 *((uint32*)(pred + 4)) = word2;
243 *((uint32*)(pred + 8)) = word3;
244 *((uint32*)(pred + 12)) = word4;
245
246 }
247
248 sum = word1 & 0xFF00FF;
249 word1 = (word1 >> 8) & 0xFF00FF;
250 sum += word1;
251 word1 = (word2 & 0xFF00FF);
252 sum += word1;
253 word2 = (word2 >> 8) & 0xFF00FF;
254 sum += word2;
255 word1 = (word3 & 0xFF00FF);
256 sum += word1;
257 word3 = (word3 >> 8) & 0xFF00FF;
258 sum += word3;
259 word1 = (word4 & 0xFF00FF);
260 sum += word1;
261 word4 = (word4 >> 8) & 0xFF00FF;
262 sum += word4;
263
264 sum += (sum >> 16);
265 sum &= 0xFFFF;
266
267 if (!video->intraAvailA)
268 {
269 sum = (sum + 8) >> 4;
270 }
271 }
272
273 if (video->intraAvailA)
274 {
275 // get horizontal mode
276 left = curL - 1 - pitch;
277
278 pred = encvid->pred_i16[AVC_I16_Horizontal] - 16;
279
280 for (i = 0; i < 16; i++)
281 {
282 word1 = *(left += pitch);
283 sum += word1;
284
285 word1 = (word1 << 8) | word1;
286 word1 = (word1 << 16) | word1; /* make it 4 */
287
288 *(uint32*)(pred += 16) = word1;
289 *(uint32*)(pred + 4) = word1;
290 *(uint32*)(pred + 8) = word1;
291 *(uint32*)(pred + 12) = word1;
292 }
293
294 if (!video->intraAvailB)
295 {
296 sum = (sum + 8) >> 4;
297 }
298 else
299 {
300 sum = (sum + 16) >> 5;
301 }
302 }
303
304 // get DC mode
305 if (!video->intraAvailA && !video->intraAvailB)
306 {
307 sum = 0x80808080;
308 }
309 else
310 {
311 sum = (sum << 8) | sum;
312 sum = (sum << 16) | sum;
313 }
314
315 pred = encvid->pred_i16[AVC_I16_DC] - 16;
316 for (i = 0; i < 16; i++)
317 {
318 *((uint32*)(pred += 16)) = sum;
319 *((uint32*)(pred + 4)) = sum;
320 *((uint32*)(pred + 8)) = sum;
321 *((uint32*)(pred + 12)) = sum;
322 }
323
324 // get plane mode
325 if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
326 {
327 pred = encvid->pred_i16[AVC_I16_Plane] - 16;
328
329 comp_ref_x0 = curL - pitch + 8;
330 comp_ref_x1 = curL - pitch + 6;
331 comp_ref_y0 = curL - 1 + (pitch << 3);
332 comp_ref_y1 = curL - 1 + 6 * pitch;
333
334 for (i = 1; i < 8; i++)
335 {
336 H += i * (*comp_ref_x0++ - *comp_ref_x1--);
337 V += i * (*comp_ref_y0 - *comp_ref_y1);
338 comp_ref_y0 += pitch;
339 comp_ref_y1 -= pitch;
340 }
341
342 H += i * (*comp_ref_x0++ - curL[-pitch-1]);
343 V += i * (*comp_ref_y0 - *comp_ref_y1);
344
345
346 a_16 = ((*(curL - pitch + 15) + *(curL - 1 + 15 * pitch)) << 4) + 16;;
347 b = (5 * H + 32) >> 6;
348 c = (5 * V + 32) >> 6;
349
350 tmp = 0;
351 for (i = 0; i < 16; i++)
352 {
353 factor_c = a_16 + c * (tmp++ - 7);
354 factor_c -= 7 * b;
355
356 value = factor_c >> 5;
357 factor_c += b;
358 CLIP_RESULT(value)
359 word1 = value;
360 value = factor_c >> 5;
361 factor_c += b;
362 CLIP_RESULT(value)
363 word1 = (word1) | (value << 8);
364 value = factor_c >> 5;
365 factor_c += b;
366 CLIP_RESULT(value)
367 word1 = (word1) | (value << 16);
368 value = factor_c >> 5;
369 factor_c += b;
370 CLIP_RESULT(value)
371 word1 = (word1) | (value << 24);
372 *((uint32*)(pred += 16)) = word1;
373 value = factor_c >> 5;
374 factor_c += b;
375 CLIP_RESULT(value)
376 word1 = value;
377 value = factor_c >> 5;
378 factor_c += b;
379 CLIP_RESULT(value)
380 word1 = (word1) | (value << 8);
381 value = factor_c >> 5;
382 factor_c += b;
383 CLIP_RESULT(value)
384 word1 = (word1) | (value << 16);
385 value = factor_c >> 5;
386 factor_c += b;
387 CLIP_RESULT(value)
388 word1 = (word1) | (value << 24);
389 *((uint32*)(pred + 4)) = word1;
390 value = factor_c >> 5;
391 factor_c += b;
392 CLIP_RESULT(value)
393 word1 = value;
394 value = factor_c >> 5;
395 factor_c += b;
396 CLIP_RESULT(value)
397 word1 = (word1) | (value << 8);
398 value = factor_c >> 5;
399 factor_c += b;
400 CLIP_RESULT(value)
401 word1 = (word1) | (value << 16);
402 value = factor_c >> 5;
403 factor_c += b;
404 CLIP_RESULT(value)
405 word1 = (word1) | (value << 24);
406 *((uint32*)(pred + 8)) = word1;
407 value = factor_c >> 5;
408 factor_c += b;
409 CLIP_RESULT(value)
410 word1 = value;
411 value = factor_c >> 5;
412 factor_c += b;
413 CLIP_RESULT(value)
414 word1 = (word1) | (value << 8);
415 value = factor_c >> 5;
416 factor_c += b;
417 CLIP_RESULT(value)
418 word1 = (word1) | (value << 16);
419 value = factor_c >> 5;
420 CLIP_RESULT(value)
421 word1 = (word1) | (value << 24);
422 *((uint32*)(pred + 12)) = word1;
423 }
424 }
425
426 return ;
427 }
428
429
430 /* evaluate each prediction mode of I16 */
find_cost_16x16(AVCEncObject * encvid,uint8 * orgY,int * min_cost)431 void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost)
432 {
433 AVCCommonObj *video = encvid->common;
434 AVCMacroblock *currMB = video->currMB;
435 int cost;
436 int org_pitch = encvid->currInput->pitch;
437
438 /* evaluate vertical mode */
439 if (video->intraAvailB)
440 {
441 cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Vertical], *min_cost);
442 if (cost < *min_cost)
443 {
444 *min_cost = cost;
445 currMB->mbMode = AVC_I16;
446 currMB->mb_intra = 1;
447 currMB->i16Mode = AVC_I16_Vertical;
448 }
449 }
450
451
452 /* evaluate horizontal mode */
453 if (video->intraAvailA)
454 {
455 cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Horizontal], *min_cost);
456 if (cost < *min_cost)
457 {
458 *min_cost = cost;
459 currMB->mbMode = AVC_I16;
460 currMB->mb_intra = 1;
461 currMB->i16Mode = AVC_I16_Horizontal;
462 }
463 }
464
465 /* evaluate DC mode */
466 cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_DC], *min_cost);
467 if (cost < *min_cost)
468 {
469 *min_cost = cost;
470 currMB->mbMode = AVC_I16;
471 currMB->mb_intra = 1;
472 currMB->i16Mode = AVC_I16_DC;
473 }
474
475 /* evaluate plane mode */
476 if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
477 {
478 cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Plane], *min_cost);
479 if (cost < *min_cost)
480 {
481 *min_cost = cost;
482 currMB->mbMode = AVC_I16;
483 currMB->mb_intra = 1;
484 currMB->i16Mode = AVC_I16_Plane;
485 }
486 }
487
488 return ;
489 }
490
491
cost_i16(uint8 * org,int org_pitch,uint8 * pred,int min_cost)492 int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost)
493 {
494
495 int cost;
496 int j, k;
497 int16 res[256], *pres; // residue
498 int m0, m1, m2, m3;
499
500 // calculate SATD
501 org_pitch -= 16;
502 pres = res;
503 // horizontal transform
504 for (j = 0; j < 16; j++)
505 {
506 k = 4;
507 while (k > 0)
508 {
509 m0 = org[0] - pred[0];
510 m3 = org[3] - pred[3];
511 m0 += m3;
512 m3 = m0 - (m3 << 1);
513 m1 = org[1] - pred[1];
514 m2 = org[2] - pred[2];
515 m1 += m2;
516 m2 = m1 - (m2 << 1);
517 pres[0] = m0 + m1;
518 pres[2] = m0 - m1;
519 pres[1] = m2 + m3;
520 pres[3] = m3 - m2;
521
522 org += 4;
523 pres += 4;
524 pred += 4;
525 k--;
526 }
527 org += org_pitch;
528 }
529 /* vertical transform */
530 cost = 0;
531 for (j = 0; j < 4; j++)
532 {
533 pres = res + (j << 6);
534 k = 16;
535 while (k > 0)
536 {
537 m0 = pres[0];
538 m3 = pres[3<<4];
539 m0 += m3;
540 m3 = m0 - (m3 << 1);
541 m1 = pres[1<<4];
542 m2 = pres[2<<4];
543 m1 += m2;
544 m2 = m1 - (m2 << 1);
545 pres[0] = m0 = m0 + m1;
546
547 if (k&0x3) // only sum up non DC values.
548 {
549 cost += ((m0 > 0) ? m0 : -m0);
550 }
551
552 m1 = m0 - (m1 << 1);
553 cost += ((m1 > 0) ? m1 : -m1);
554 m3 = m2 + m3;
555 cost += ((m3 > 0) ? m3 : -m3);
556 m2 = m3 - (m2 << 1);
557 cost += ((m2 > 0) ? m2 : -m2);
558
559 pres++;
560 k--;
561 }
562 if ((cost >> 1) > min_cost) /* early drop out */
563 {
564 return (cost >> 1);
565 }
566 }
567
568 /* Hadamard of the DC coefficient */
569 pres = res;
570 k = 4;
571 while (k > 0)
572 {
573 m0 = pres[0];
574 m3 = pres[3<<2];
575 m0 >>= 2;
576 m0 += (m3 >> 2);
577 m3 = m0 - (m3 >> 1);
578 m1 = pres[1<<2];
579 m2 = pres[2<<2];
580 m1 >>= 2;
581 m1 += (m2 >> 2);
582 m2 = m1 - (m2 >> 1);
583 pres[0] = (m0 + m1);
584 pres[2<<2] = (m0 - m1);
585 pres[1<<2] = (m2 + m3);
586 pres[3<<2] = (m3 - m2);
587 pres += (4 << 4);
588 k--;
589 }
590
591 pres = res;
592 k = 4;
593 while (k > 0)
594 {
595 m0 = pres[0];
596 m3 = pres[3<<6];
597 m0 += m3;
598 m3 = m0 - (m3 << 1);
599 m1 = pres[1<<6];
600 m2 = pres[2<<6];
601 m1 += m2;
602 m2 = m1 - (m2 << 1);
603 m0 = m0 + m1;
604 cost += ((m0 >= 0) ? m0 : -m0);
605 m1 = m0 - (m1 << 1);
606 cost += ((m1 >= 0) ? m1 : -m1);
607 m3 = m2 + m3;
608 cost += ((m3 >= 0) ? m3 : -m3);
609 m2 = m3 - (m2 << 1);
610 cost += ((m2 >= 0) ? m2 : -m2);
611 pres += 4;
612
613 if ((cost >> 1) > min_cost) /* early drop out */
614 {
615 return (cost >> 1);
616 }
617
618 k--;
619 }
620
621 return (cost >> 1);
622 }
623
624
mb_intra4x4_search(AVCEncObject * encvid,int * min_cost)625 void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost)
626 {
627 AVCCommonObj *video = encvid->common;
628 AVCMacroblock *currMB = video->currMB;
629 AVCPictureData *currPic = video->currPic;
630 AVCFrameIO *currInput = encvid->currInput;
631 int pitch = currPic->pitch;
632 int org_pitch = currInput->pitch;
633 int offset;
634 uint8 *curL, *comp, *org4, *org8;
635 int y = video->mb_y << 4;
636 int x = video->mb_x << 4;
637
638 int b8, b4, cost4x4, blkidx;
639 int cost = 0;
640 int numcoef;
641 int dummy = 0;
642 int mb_intra = currMB->mb_intra; // save the original value
643
644 offset = y * pitch + x;
645
646 curL = currPic->Sl + offset;
647 org8 = currInput->YCbCr[0] + y * org_pitch + x;
648 video->pred_pitch = 4;
649
650 cost = (int)(6.0 * encvid->lambda_mode + 0.4999);
651 cost <<= 2;
652
653 currMB->mb_intra = 1; // temporary set this to one to enable the IDCT
654 // operation inside dct_luma
655
656 for (b8 = 0; b8 < 4; b8++)
657 {
658 comp = curL;
659 org4 = org8;
660
661 for (b4 = 0; b4 < 4; b4++)
662 {
663 blkidx = blkIdx2blkXY[b8][b4];
664 cost4x4 = blk_intra4x4_search(encvid, blkidx, comp, org4);
665 cost += cost4x4;
666 if (cost > *min_cost)
667 {
668 currMB->mb_intra = mb_intra; // restore the value
669 return ;
670 }
671
672 /* do residue, Xfrm, Q, invQ, invXfrm, recon and save the DCT coefs.*/
673 video->pred_block = encvid->pred_i4[currMB->i4Mode[blkidx]];
674 numcoef = dct_luma(encvid, blkidx, comp, org4, &dummy);
675 currMB->nz_coeff[blkidx] = numcoef;
676 if (numcoef)
677 {
678 video->cbp4x4 |= (1 << blkidx);
679 currMB->CBP |= (1 << b8);
680 }
681
682 if (b4&1)
683 {
684 comp += ((pitch << 2) - 4);
685 org4 += ((org_pitch << 2) - 4);
686 }
687 else
688 {
689 comp += 4;
690 org4 += 4;
691 }
692 }
693
694 if (b8&1)
695 {
696 curL += ((pitch << 3) - 8);
697 org8 += ((org_pitch << 3) - 8);
698 }
699 else
700 {
701 curL += 8;
702 org8 += 8;
703 }
704 }
705
706 currMB->mb_intra = mb_intra; // restore the value
707
708 if (cost < *min_cost)
709 {
710 *min_cost = cost;
711 currMB->mbMode = AVC_I4;
712 currMB->mb_intra = 1;
713 }
714
715 return ;
716 }
717
718
719 /* search for i4 mode for a 4x4 block */
blk_intra4x4_search(AVCEncObject * encvid,int blkidx,uint8 * cur,uint8 * org)720 int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org)
721 {
722 AVCCommonObj *video = encvid->common;
723 AVCNeighborAvailability availability;
724 AVCMacroblock *currMB = video->currMB;
725 bool top_left = FALSE;
726 int pitch = video->currPic->pitch;
727 uint8 mode_avail[AVCNumI4PredMode];
728 uint32 temp, DC;
729 uint8 *pred;
730 int org_pitch = encvid->currInput->pitch;
731 uint16 min_cost, cost;
732
733 int P_x, Q_x, R_x, P_y, Q_y, R_y, D, D0, D1;
734 int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2;
735 uint8 P_A, P_B, P_C, P_D, P_E, P_F, P_G, P_H, P_I, P_J, P_K, P_L, P_X;
736 int r0, r1, r2, r3, r4, r5, r6, r7;
737 int x0, x1, x2, x3, x4, x5;
738 uint32 temp1, temp2;
739
740 int ipmode, mostProbableMode;
741 int fixedcost = 4 * encvid->lambda_mode;
742 int min_sad = 0x7FFF;
743
744 availability.left = TRUE;
745 availability.top = TRUE;
746 if (blkidx <= 3) /* top row block (!block_y) */
747 { /* check availability up */
748 availability.top = video->intraAvailB ;
749 }
750 if (!(blkidx&0x3)) /* left column block (!block_x)*/
751 { /* check availability left */
752 availability.left = video->intraAvailA ;
753 }
754 availability.top_right = BlkTopRight[blkidx];
755
756 if (availability.top_right == 2)
757 {
758 availability.top_right = video->intraAvailB;
759 }
760 else if (availability.top_right == 3)
761 {
762 availability.top_right = video->intraAvailC;
763 }
764
765 if (availability.top == TRUE)
766 {
767 temp = *(uint32*)(cur - pitch);
768 P_A = temp & 0xFF;
769 P_B = (temp >> 8) & 0xFF;
770 P_C = (temp >> 16) & 0xFF;
771 P_D = (temp >> 24) & 0xFF;
772 }
773 else
774 {
775 P_A = P_B = P_C = P_D = 128;
776 }
777
778 if (availability.top_right == TRUE)
779 {
780 temp = *(uint32*)(cur - pitch + 4);
781 P_E = temp & 0xFF;
782 P_F = (temp >> 8) & 0xFF;
783 P_G = (temp >> 16) & 0xFF;
784 P_H = (temp >> 24) & 0xFF;
785 }
786 else
787 {
788 P_E = P_F = P_G = P_H = 128;
789 }
790
791 if (availability.left == TRUE)
792 {
793 cur--;
794 P_I = *cur;
795 P_J = *(cur += pitch);
796 P_K = *(cur += pitch);
797 P_L = *(cur + pitch);
798 cur -= (pitch << 1);
799 cur++;
800 }
801 else
802 {
803 P_I = P_J = P_K = P_L = 128;
804 }
805
806 /* check if top-left pixel is available */
807 if (((blkidx > 3) && (blkidx&0x3)) || ((blkidx > 3) && video->intraAvailA)
808 || ((blkidx&0x3) && video->intraAvailB)
809 || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
810 {
811 top_left = TRUE;
812 P_X = *(cur - pitch - 1);
813 }
814 else
815 {
816 P_X = 128;
817 }
818
819 //===== INTRA PREDICTION FOR 4x4 BLOCK =====
820 /* vertical */
821 mode_avail[AVC_I4_Vertical] = 0;
822 if (availability.top)
823 {
824 mode_avail[AVC_I4_Vertical] = 1;
825 pred = encvid->pred_i4[AVC_I4_Vertical];
826
827 temp = (P_D << 24) | (P_C << 16) | (P_B << 8) | P_A ;
828 *((uint32*)pred) = temp; /* write 4 at a time */
829 *((uint32*)(pred += 4)) = temp;
830 *((uint32*)(pred += 4)) = temp;
831 *((uint32*)(pred += 4)) = temp;
832 }
833 /* horizontal */
834 mode_avail[AVC_I4_Horizontal] = 0;
835 mode_avail[AVC_I4_Horizontal_Up] = 0;
836 if (availability.left)
837 {
838 mode_avail[AVC_I4_Horizontal] = 1;
839 pred = encvid->pred_i4[AVC_I4_Horizontal];
840
841 temp = P_I | (P_I << 8);
842 temp = temp | (temp << 16);
843 *((uint32*)pred) = temp;
844 temp = P_J | (P_J << 8);
845 temp = temp | (temp << 16);
846 *((uint32*)(pred += 4)) = temp;
847 temp = P_K | (P_K << 8);
848 temp = temp | (temp << 16);
849 *((uint32*)(pred += 4)) = temp;
850 temp = P_L | (P_L << 8);
851 temp = temp | (temp << 16);
852 *((uint32*)(pred += 4)) = temp;
853
854 mode_avail[AVC_I4_Horizontal_Up] = 1;
855 pred = encvid->pred_i4[AVC_I4_Horizontal_Up];
856
857 Q0 = (P_J + P_K + 1) >> 1;
858 Q1 = (P_J + (P_K << 1) + P_L + 2) >> 2;
859 P0 = ((P_I + P_J + 1) >> 1);
860 P1 = ((P_I + (P_J << 1) + P_K + 2) >> 2);
861
862 temp = P0 | (P1 << 8); // [P0 P1 Q0 Q1]
863 temp |= (Q0 << 16); // [Q0 Q1 R0 DO]
864 temp |= (Q1 << 24); // [R0 D0 D1 D1]
865 *((uint32*)pred) = temp; // [D1 D1 D1 D1]
866
867 D0 = (P_K + 3 * P_L + 2) >> 2;
868 R0 = (P_K + P_L + 1) >> 1;
869
870 temp = Q0 | (Q1 << 8);
871 temp |= (R0 << 16);
872 temp |= (D0 << 24);
873 *((uint32*)(pred += 4)) = temp;
874
875 D1 = P_L;
876
877 temp = R0 | (D0 << 8);
878 temp |= (D1 << 16);
879 temp |= (D1 << 24);
880 *((uint32*)(pred += 4)) = temp;
881
882 temp = D1 | (D1 << 8);
883 temp |= (temp << 16);
884 *((uint32*)(pred += 4)) = temp;
885 }
886 /* DC */
887 mode_avail[AVC_I4_DC] = 1;
888 pred = encvid->pred_i4[AVC_I4_DC];
889 if (availability.left)
890 {
891 DC = P_I + P_J + P_K + P_L;
892
893 if (availability.top)
894 {
895 DC = (P_A + P_B + P_C + P_D + DC + 4) >> 3;
896 }
897 else
898 {
899 DC = (DC + 2) >> 2;
900
901 }
902 }
903 else if (availability.top)
904 {
905 DC = (P_A + P_B + P_C + P_D + 2) >> 2;
906
907 }
908 else
909 {
910 DC = 128;
911 }
912
913 temp = DC | (DC << 8);
914 temp = temp | (temp << 16);
915 *((uint32*)pred) = temp;
916 *((uint32*)(pred += 4)) = temp;
917 *((uint32*)(pred += 4)) = temp;
918 *((uint32*)(pred += 4)) = temp;
919
920 /* Down-left */
921 mode_avail[AVC_I4_Diagonal_Down_Left] = 0;
922
923 if (availability.top)
924 {
925 mode_avail[AVC_I4_Diagonal_Down_Left] = 1;
926
927 pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Left];
928
929 r0 = P_A;
930 r1 = P_B;
931 r2 = P_C;
932 r3 = P_D;
933
934 r0 += (r1 << 1);
935 r0 += r2;
936 r0 += 2;
937 r0 >>= 2;
938 r1 += (r2 << 1);
939 r1 += r3;
940 r1 += 2;
941 r1 >>= 2;
942
943 if (availability.top_right)
944 {
945 r4 = P_E;
946 r5 = P_F;
947 r6 = P_G;
948 r7 = P_H;
949
950 r2 += (r3 << 1);
951 r2 += r4;
952 r2 += 2;
953 r2 >>= 2;
954 r3 += (r4 << 1);
955 r3 += r5;
956 r3 += 2;
957 r3 >>= 2;
958 r4 += (r5 << 1);
959 r4 += r6;
960 r4 += 2;
961 r4 >>= 2;
962 r5 += (r6 << 1);
963 r5 += r7;
964 r5 += 2;
965 r5 >>= 2;
966 r6 += (3 * r7);
967 r6 += 2;
968 r6 >>= 2;
969 temp = r0 | (r1 << 8);
970 temp |= (r2 << 16);
971 temp |= (r3 << 24);
972 *((uint32*)pred) = temp;
973
974 temp = (temp >> 8) | (r4 << 24);
975 *((uint32*)(pred += 4)) = temp;
976
977 temp = (temp >> 8) | (r5 << 24);
978 *((uint32*)(pred += 4)) = temp;
979
980 temp = (temp >> 8) | (r6 << 24);
981 *((uint32*)(pred += 4)) = temp;
982 }
983 else
984 {
985 r2 += (r3 * 3);
986 r2 += 2;
987 r2 >>= 2;
988 r3 = ((r3 << 2) + 2);
989 r3 >>= 2;
990
991 temp = r0 | (r1 << 8);
992 temp |= (r2 << 16);
993 temp |= (r3 << 24);
994 *((uint32*)pred) = temp;
995
996 temp = (temp >> 8) | (r3 << 24);
997 *((uint32*)(pred += 4)) = temp;
998
999 temp = (temp >> 8) | (r3 << 24);
1000 *((uint32*)(pred += 4)) = temp;
1001
1002 temp = (temp >> 8) | (r3 << 24);
1003 *((uint32*)(pred += 4)) = temp;
1004
1005 }
1006 }
1007
1008 /* Down Right */
1009 mode_avail[AVC_I4_Diagonal_Down_Right] = 0;
1010 /* Diagonal Vertical Right */
1011 mode_avail[AVC_I4_Vertical_Right] = 0;
1012 /* Horizontal Down */
1013 mode_avail[AVC_I4_Horizontal_Down] = 0;
1014
1015 if (top_left == TRUE)
1016 {
1017 /* Down Right */
1018 mode_avail[AVC_I4_Diagonal_Down_Right] = 1;
1019 pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Right];
1020
1021 Q_x = (P_A + 2 * P_B + P_C + 2) >> 2;
1022 R_x = (P_B + 2 * P_C + P_D + 2) >> 2;
1023 P_x = (P_X + 2 * P_A + P_B + 2) >> 2;
1024 D = (P_A + 2 * P_X + P_I + 2) >> 2;
1025 P_y = (P_X + 2 * P_I + P_J + 2) >> 2;
1026 Q_y = (P_I + 2 * P_J + P_K + 2) >> 2;
1027 R_y = (P_J + 2 * P_K + P_L + 2) >> 2;
1028
1029 /* we can pack these */
1030 temp = D | (P_x << 8); //[D P_x Q_x R_x]
1031 //[P_y D P_x Q_x]
1032 temp |= (Q_x << 16); //[Q_y P_y D P_x]
1033 temp |= (R_x << 24); //[R_y Q_y P_y D ]
1034 *((uint32*)pred) = temp;
1035
1036 temp = P_y | (D << 8);
1037 temp |= (P_x << 16);
1038 temp |= (Q_x << 24);
1039 *((uint32*)(pred += 4)) = temp;
1040
1041 temp = Q_y | (P_y << 8);
1042 temp |= (D << 16);
1043 temp |= (P_x << 24);
1044 *((uint32*)(pred += 4)) = temp;
1045
1046 temp = R_y | (Q_y << 8);
1047 temp |= (P_y << 16);
1048 temp |= (D << 24);
1049 *((uint32*)(pred += 4)) = temp;
1050
1051
1052 /* Diagonal Vertical Right */
1053 mode_avail[AVC_I4_Vertical_Right] = 1;
1054 pred = encvid->pred_i4[AVC_I4_Vertical_Right];
1055
1056 Q0 = P_A + P_B + 1;
1057 R0 = P_B + P_C + 1;
1058 S0 = P_C + P_D + 1;
1059 P0 = P_X + P_A + 1;
1060 D = (P_I + 2 * P_X + P_A + 2) >> 2;
1061
1062 P1 = (P0 + Q0) >> 2;
1063 Q1 = (Q0 + R0) >> 2;
1064 R1 = (R0 + S0) >> 2;
1065
1066 P0 >>= 1;
1067 Q0 >>= 1;
1068 R0 >>= 1;
1069 S0 >>= 1;
1070
1071 P2 = (P_X + 2 * P_I + P_J + 2) >> 2;
1072 Q2 = (P_I + 2 * P_J + P_K + 2) >> 2;
1073
1074 temp = P0 | (Q0 << 8); //[P0 Q0 R0 S0]
1075 //[D P1 Q1 R1]
1076 temp |= (R0 << 16); //[P2 P0 Q0 R0]
1077 temp |= (S0 << 24); //[Q2 D P1 Q1]
1078 *((uint32*)pred) = temp;
1079
1080 temp = D | (P1 << 8);
1081 temp |= (Q1 << 16);
1082 temp |= (R1 << 24);
1083 *((uint32*)(pred += 4)) = temp;
1084
1085 temp = P2 | (P0 << 8);
1086 temp |= (Q0 << 16);
1087 temp |= (R0 << 24);
1088 *((uint32*)(pred += 4)) = temp;
1089
1090 temp = Q2 | (D << 8);
1091 temp |= (P1 << 16);
1092 temp |= (Q1 << 24);
1093 *((uint32*)(pred += 4)) = temp;
1094
1095
1096 /* Horizontal Down */
1097 mode_avail[AVC_I4_Horizontal_Down] = 1;
1098 pred = encvid->pred_i4[AVC_I4_Horizontal_Down];
1099
1100
1101 Q2 = (P_A + 2 * P_B + P_C + 2) >> 2;
1102 P2 = (P_X + 2 * P_A + P_B + 2) >> 2;
1103 D = (P_I + 2 * P_X + P_A + 2) >> 2;
1104 P0 = P_X + P_I + 1;
1105 Q0 = P_I + P_J + 1;
1106 R0 = P_J + P_K + 1;
1107 S0 = P_K + P_L + 1;
1108
1109 P1 = (P0 + Q0) >> 2;
1110 Q1 = (Q0 + R0) >> 2;
1111 R1 = (R0 + S0) >> 2;
1112
1113 P0 >>= 1;
1114 Q0 >>= 1;
1115 R0 >>= 1;
1116 S0 >>= 1;
1117
1118
1119 /* we can pack these */
1120 temp = P0 | (D << 8); //[P0 D P2 Q2]
1121 //[Q0 P1 P0 D ]
1122 temp |= (P2 << 16); //[R0 Q1 Q0 P1]
1123 temp |= (Q2 << 24); //[S0 R1 R0 Q1]
1124 *((uint32*)pred) = temp;
1125
1126 temp = Q0 | (P1 << 8);
1127 temp |= (P0 << 16);
1128 temp |= (D << 24);
1129 *((uint32*)(pred += 4)) = temp;
1130
1131 temp = R0 | (Q1 << 8);
1132 temp |= (Q0 << 16);
1133 temp |= (P1 << 24);
1134 *((uint32*)(pred += 4)) = temp;
1135
1136 temp = S0 | (R1 << 8);
1137 temp |= (R0 << 16);
1138 temp |= (Q1 << 24);
1139 *((uint32*)(pred += 4)) = temp;
1140
1141 }
1142
1143 /* vertical left */
1144 mode_avail[AVC_I4_Vertical_Left] = 0;
1145 if (availability.top)
1146 {
1147 mode_avail[AVC_I4_Vertical_Left] = 1;
1148 pred = encvid->pred_i4[AVC_I4_Vertical_Left];
1149
1150 x0 = P_A + P_B + 1;
1151 x1 = P_B + P_C + 1;
1152 x2 = P_C + P_D + 1;
1153 if (availability.top_right)
1154 {
1155 x3 = P_D + P_E + 1;
1156 x4 = P_E + P_F + 1;
1157 x5 = P_F + P_G + 1;
1158 }
1159 else
1160 {
1161 x3 = x4 = x5 = (P_D << 1) + 1;
1162 }
1163
1164 temp1 = (x0 >> 1);
1165 temp1 |= ((x1 >> 1) << 8);
1166 temp1 |= ((x2 >> 1) << 16);
1167 temp1 |= ((x3 >> 1) << 24);
1168
1169 *((uint32*)pred) = temp1;
1170
1171 temp2 = ((x0 + x1) >> 2);
1172 temp2 |= (((x1 + x2) >> 2) << 8);
1173 temp2 |= (((x2 + x3) >> 2) << 16);
1174 temp2 |= (((x3 + x4) >> 2) << 24);
1175
1176 *((uint32*)(pred += 4)) = temp2;
1177
1178 temp1 = (temp1 >> 8) | ((x4 >> 1) << 24); /* rotate out old value */
1179 *((uint32*)(pred += 4)) = temp1;
1180
1181 temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
1182 *((uint32*)(pred += 4)) = temp2;
1183 }
1184
1185 //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
1186 // can re-order the search here instead of going in order
1187
1188 // find most probable mode
1189 encvid->mostProbableI4Mode[blkidx] = mostProbableMode = FindMostProbableI4Mode(video, blkidx);
1190
1191 min_cost = 0xFFFF;
1192
1193 for (ipmode = 0; ipmode < AVCNumI4PredMode; ipmode++)
1194 {
1195 if (mode_avail[ipmode] == TRUE)
1196 {
1197 cost = (ipmode == mostProbableMode) ? 0 : fixedcost;
1198 pred = encvid->pred_i4[ipmode];
1199
1200 cost_i4(org, org_pitch, pred, &cost);
1201
1202 if (cost < min_cost)
1203 {
1204 currMB->i4Mode[blkidx] = (AVCIntra4x4PredMode)ipmode;
1205 min_cost = cost;
1206 min_sad = cost - ((ipmode == mostProbableMode) ? 0 : fixedcost);
1207 }
1208 }
1209 }
1210
1211 if (blkidx == 0)
1212 {
1213 encvid->i4_sad = min_sad;
1214 }
1215 else
1216 {
1217 encvid->i4_sad += min_sad;
1218 }
1219
1220 return min_cost;
1221 }
1222
FindMostProbableI4Mode(AVCCommonObj * video,int blkidx)1223 int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx)
1224 {
1225 int dcOnlyPredictionFlag;
1226 AVCMacroblock *currMB = video->currMB;
1227 int intra4x4PredModeA, intra4x4PredModeB, predIntra4x4PredMode;
1228
1229
1230 dcOnlyPredictionFlag = 0;
1231 if (blkidx&0x3)
1232 {
1233 intra4x4PredModeA = currMB->i4Mode[blkidx-1]; // block to the left
1234 }
1235 else /* for blk 0, 4, 8, 12 */
1236 {
1237 if (video->intraAvailA)
1238 {
1239 if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
1240 {
1241 intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[blkidx + 3];
1242 }
1243 else
1244 {
1245 intra4x4PredModeA = AVC_I4_DC;
1246 }
1247 }
1248 else
1249 {
1250 dcOnlyPredictionFlag = 1;
1251 goto PRED_RESULT_READY; // skip below
1252 }
1253 }
1254
1255 if (blkidx >> 2)
1256 {
1257 intra4x4PredModeB = currMB->i4Mode[blkidx-4]; // block above
1258 }
1259 else /* block 0, 1, 2, 3 */
1260 {
1261 if (video->intraAvailB)
1262 {
1263 if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
1264 {
1265 intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[blkidx+12];
1266 }
1267 else
1268 {
1269 intra4x4PredModeB = AVC_I4_DC;
1270 }
1271 }
1272 else
1273 {
1274 dcOnlyPredictionFlag = 1;
1275 }
1276 }
1277
1278 PRED_RESULT_READY:
1279 if (dcOnlyPredictionFlag)
1280 {
1281 intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
1282 }
1283
1284 predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
1285
1286 return predIntra4x4PredMode;
1287 }
1288
cost_i4(uint8 * org,int org_pitch,uint8 * pred,uint16 * cost)1289 void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost)
1290 {
1291 int k;
1292 int16 res[16], *pres;
1293 int m0, m1, m2, m3, tmp1;
1294 int satd = 0;
1295
1296 pres = res;
1297 // horizontal transform
1298 k = 4;
1299 while (k > 0)
1300 {
1301 m0 = org[0] - pred[0];
1302 m3 = org[3] - pred[3];
1303 m0 += m3;
1304 m3 = m0 - (m3 << 1);
1305 m1 = org[1] - pred[1];
1306 m2 = org[2] - pred[2];
1307 m1 += m2;
1308 m2 = m1 - (m2 << 1);
1309 pres[0] = m0 + m1;
1310 pres[2] = m0 - m1;
1311 pres[1] = m2 + m3;
1312 pres[3] = m3 - m2;
1313
1314 org += org_pitch;
1315 pres += 4;
1316 pred += 4;
1317 k--;
1318 }
1319 /* vertical transform */
1320 pres = res;
1321 k = 4;
1322 while (k > 0)
1323 {
1324 m0 = pres[0];
1325 m3 = pres[12];
1326 m0 += m3;
1327 m3 = m0 - (m3 << 1);
1328 m1 = pres[4];
1329 m2 = pres[8];
1330 m1 += m2;
1331 m2 = m1 - (m2 << 1);
1332 pres[0] = m0 + m1;
1333 pres[8] = m0 - m1;
1334 pres[4] = m2 + m3;
1335 pres[12] = m3 - m2;
1336
1337 pres++;
1338 k--;
1339
1340 }
1341
1342 pres = res;
1343 k = 4;
1344 while (k > 0)
1345 {
1346 tmp1 = *pres++;
1347 satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1348 tmp1 = *pres++;
1349 satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1350 tmp1 = *pres++;
1351 satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1352 tmp1 = *pres++;
1353 satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1354 k--;
1355 }
1356
1357 satd = (satd + 1) >> 1;
1358 *cost += satd;
1359
1360 return ;
1361 }
1362
chroma_intra_search(AVCEncObject * encvid)1363 void chroma_intra_search(AVCEncObject *encvid)
1364 {
1365 AVCCommonObj *video = encvid->common;
1366 AVCPictureData *currPic = video->currPic;
1367
1368 int x_pos = video->mb_x << 3;
1369 int y_pos = video->mb_y << 3;
1370 int pitch = currPic->pitch >> 1;
1371 int offset = y_pos * pitch + x_pos;
1372
1373 uint8 *comp_ref_x, *comp_ref_y, *pred;
1374 int sum_x0, sum_x1, sum_y0, sum_y1;
1375 int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
1376 uint32 pred_a, pred_b, pred_c, pred_d;
1377 int i, j, component;
1378 int a_16, b, c, factor_c, topleft;
1379 int H, V, value;
1380 uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
1381
1382 uint8 *curCb = currPic->Scb + offset;
1383 uint8 *curCr = currPic->Scr + offset;
1384
1385 uint8 *orgCb, *orgCr;
1386 AVCFrameIO *currInput = encvid->currInput;
1387 AVCMacroblock *currMB = video->currMB;
1388 int org_pitch;
1389 int cost, mincost;
1390
1391 /* evaluate DC mode */
1392 if (video->intraAvailB & video->intraAvailA)
1393 {
1394 comp_ref_x = curCb - pitch;
1395 comp_ref_y = curCb - 1;
1396
1397 for (i = 0; i < 2; i++)
1398 {
1399 pred_a = *((uint32*)comp_ref_x);
1400 comp_ref_x += 4;
1401 pred_b = (pred_a >> 8) & 0xFF00FF;
1402 pred_a &= 0xFF00FF;
1403 pred_a += pred_b;
1404 pred_a += (pred_a >> 16);
1405 sum_x0 = pred_a & 0xFFFF;
1406
1407 pred_a = *((uint32*)comp_ref_x);
1408 pred_b = (pred_a >> 8) & 0xFF00FF;
1409 pred_a &= 0xFF00FF;
1410 pred_a += pred_b;
1411 pred_a += (pred_a >> 16);
1412 sum_x1 = pred_a & 0xFFFF;
1413
1414 pred_1[i] = (sum_x1 + 2) >> 2;
1415
1416 sum_y0 = *comp_ref_y;
1417 sum_y0 += *(comp_ref_y += pitch);
1418 sum_y0 += *(comp_ref_y += pitch);
1419 sum_y0 += *(comp_ref_y += pitch);
1420
1421 sum_y1 = *(comp_ref_y += pitch);
1422 sum_y1 += *(comp_ref_y += pitch);
1423 sum_y1 += *(comp_ref_y += pitch);
1424 sum_y1 += *(comp_ref_y += pitch);
1425
1426 pred_2[i] = (sum_y1 + 2) >> 2;
1427
1428 pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
1429 pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
1430
1431 comp_ref_x = curCr - pitch;
1432 comp_ref_y = curCr - 1;
1433 }
1434 }
1435
1436 else if (video->intraAvailA)
1437 {
1438 comp_ref_y = curCb - 1;
1439 for (i = 0; i < 2; i++)
1440 {
1441 sum_y0 = *comp_ref_y;
1442 sum_y0 += *(comp_ref_y += pitch);
1443 sum_y0 += *(comp_ref_y += pitch);
1444 sum_y0 += *(comp_ref_y += pitch);
1445
1446 sum_y1 = *(comp_ref_y += pitch);
1447 sum_y1 += *(comp_ref_y += pitch);
1448 sum_y1 += *(comp_ref_y += pitch);
1449 sum_y1 += *(comp_ref_y += pitch);
1450
1451 pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
1452 pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
1453
1454 comp_ref_y = curCr - 1;
1455 }
1456 }
1457 else if (video->intraAvailB)
1458 {
1459 comp_ref_x = curCb - pitch;
1460 for (i = 0; i < 2; i++)
1461 {
1462 pred_a = *((uint32*)comp_ref_x);
1463 comp_ref_x += 4;
1464 pred_b = (pred_a >> 8) & 0xFF00FF;
1465 pred_a &= 0xFF00FF;
1466 pred_a += pred_b;
1467 pred_a += (pred_a >> 16);
1468 sum_x0 = pred_a & 0xFFFF;
1469
1470 pred_a = *((uint32*)comp_ref_x);
1471 pred_b = (pred_a >> 8) & 0xFF00FF;
1472 pred_a &= 0xFF00FF;
1473 pred_a += pred_b;
1474 pred_a += (pred_a >> 16);
1475 sum_x1 = pred_a & 0xFFFF;
1476
1477 pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
1478 pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
1479
1480 comp_ref_x = curCr - pitch;
1481 }
1482 }
1483 else
1484 {
1485 pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
1486 pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
1487 }
1488
1489 pred = encvid->pred_ic[AVC_IC_DC];
1490
1491 pred_a = pred_0[0];
1492 pred_b = pred_1[0];
1493 pred_a |= (pred_a << 8);
1494 pred_a |= (pred_a << 16);
1495 pred_b |= (pred_b << 8);
1496 pred_b |= (pred_b << 16);
1497
1498 pred_c = pred_0[1];
1499 pred_d = pred_1[1];
1500 pred_c |= (pred_c << 8);
1501 pred_c |= (pred_c << 16);
1502 pred_d |= (pred_d << 8);
1503 pred_d |= (pred_d << 16);
1504
1505
1506 for (j = 0; j < 4; j++) /* 4 lines */
1507 {
1508 *((uint32*)pred) = pred_a;
1509 *((uint32*)(pred + 4)) = pred_b;
1510 *((uint32*)(pred + 8)) = pred_c;
1511 *((uint32*)(pred + 12)) = pred_d;
1512 pred += 16; /* move to the next line */
1513 }
1514
1515 pred_a = pred_2[0];
1516 pred_b = pred_3[0];
1517 pred_a |= (pred_a << 8);
1518 pred_a |= (pred_a << 16);
1519 pred_b |= (pred_b << 8);
1520 pred_b |= (pred_b << 16);
1521
1522 pred_c = pred_2[1];
1523 pred_d = pred_3[1];
1524 pred_c |= (pred_c << 8);
1525 pred_c |= (pred_c << 16);
1526 pred_d |= (pred_d << 8);
1527 pred_d |= (pred_d << 16);
1528
1529 for (j = 0; j < 4; j++) /* 4 lines */
1530 {
1531 *((uint32*)pred) = pred_a;
1532 *((uint32*)(pred + 4)) = pred_b;
1533 *((uint32*)(pred + 8)) = pred_c;
1534 *((uint32*)(pred + 12)) = pred_d;
1535 pred += 16; /* move to the next line */
1536 }
1537
1538 /* predict horizontal mode */
1539 if (video->intraAvailA)
1540 {
1541 comp_ref_y = curCb - 1;
1542 comp_ref_x = curCr - 1;
1543 pred = encvid->pred_ic[AVC_IC_Horizontal];
1544
1545 for (i = 4; i < 6; i++)
1546 {
1547 for (j = 0; j < 4; j++)
1548 {
1549 pred_a = *comp_ref_y;
1550 comp_ref_y += pitch;
1551 pred_a |= (pred_a << 8);
1552 pred_a |= (pred_a << 16);
1553 *((uint32*)pred) = pred_a;
1554 *((uint32*)(pred + 4)) = pred_a;
1555
1556 pred_a = *comp_ref_x;
1557 comp_ref_x += pitch;
1558 pred_a |= (pred_a << 8);
1559 pred_a |= (pred_a << 16);
1560 *((uint32*)(pred + 8)) = pred_a;
1561 *((uint32*)(pred + 12)) = pred_a;
1562
1563 pred += 16;
1564 }
1565 }
1566 }
1567
1568 /* vertical mode */
1569 if (video->intraAvailB)
1570 {
1571 comp_ref_x = curCb - pitch;
1572 comp_ref_y = curCr - pitch;
1573 pred = encvid->pred_ic[AVC_IC_Vertical];
1574
1575 pred_a = *((uint32*)comp_ref_x);
1576 pred_b = *((uint32*)(comp_ref_x + 4));
1577 pred_c = *((uint32*)comp_ref_y);
1578 pred_d = *((uint32*)(comp_ref_y + 4));
1579
1580 for (j = 0; j < 8; j++)
1581 {
1582 *((uint32*)pred) = pred_a;
1583 *((uint32*)(pred + 4)) = pred_b;
1584 *((uint32*)(pred + 8)) = pred_c;
1585 *((uint32*)(pred + 12)) = pred_d;
1586 pred += 16;
1587 }
1588 }
1589
1590 /* Intra_Chroma_Plane */
1591 if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
1592 {
1593 comp_ref_x = curCb - pitch;
1594 comp_ref_y = curCb - 1;
1595 topleft = curCb[-pitch-1];
1596
1597 pred = encvid->pred_ic[AVC_IC_Plane];
1598 for (component = 0; component < 2; component++)
1599 {
1600 H = V = 0;
1601 comp_ref_x0 = comp_ref_x + 4;
1602 comp_ref_x1 = comp_ref_x + 2;
1603 comp_ref_y0 = comp_ref_y + (pitch << 2);
1604 comp_ref_y1 = comp_ref_y + (pitch << 1);
1605 for (i = 1; i < 4; i++)
1606 {
1607 H += i * (*comp_ref_x0++ - *comp_ref_x1--);
1608 V += i * (*comp_ref_y0 - *comp_ref_y1);
1609 comp_ref_y0 += pitch;
1610 comp_ref_y1 -= pitch;
1611 }
1612 H += i * (*comp_ref_x0++ - topleft);
1613 V += i * (*comp_ref_y0 - *comp_ref_y1);
1614
1615 a_16 = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
1616 b = (17 * H + 16) >> 5;
1617 c = (17 * V + 16) >> 5;
1618
1619 pred_a = 0;
1620 for (i = 4; i < 6; i++)
1621 {
1622 for (j = 0; j < 4; j++)
1623 {
1624 factor_c = a_16 + c * (pred_a++ - 3);
1625
1626 factor_c -= 3 * b;
1627
1628 value = factor_c >> 5;
1629 factor_c += b;
1630 CLIP_RESULT(value)
1631 pred_b = value;
1632 value = factor_c >> 5;
1633 factor_c += b;
1634 CLIP_RESULT(value)
1635 pred_b |= (value << 8);
1636 value = factor_c >> 5;
1637 factor_c += b;
1638 CLIP_RESULT(value)
1639 pred_b |= (value << 16);
1640 value = factor_c >> 5;
1641 factor_c += b;
1642 CLIP_RESULT(value)
1643 pred_b |= (value << 24);
1644 *((uint32*)pred) = pred_b;
1645
1646 value = factor_c >> 5;
1647 factor_c += b;
1648 CLIP_RESULT(value)
1649 pred_b = value;
1650 value = factor_c >> 5;
1651 factor_c += b;
1652 CLIP_RESULT(value)
1653 pred_b |= (value << 8);
1654 value = factor_c >> 5;
1655 factor_c += b;
1656 CLIP_RESULT(value)
1657 pred_b |= (value << 16);
1658 value = factor_c >> 5;
1659 factor_c += b;
1660 CLIP_RESULT(value)
1661 pred_b |= (value << 24);
1662 *((uint32*)(pred + 4)) = pred_b;
1663 pred += 16;
1664 }
1665 }
1666
1667 pred -= 120; /* point to cr */
1668 comp_ref_x = curCr - pitch;
1669 comp_ref_y = curCr - 1;
1670 topleft = curCr[-pitch-1];
1671 }
1672 }
1673
1674 /* now evaluate it */
1675
1676 org_pitch = (currInput->pitch) >> 1;
1677 offset = x_pos + y_pos * org_pitch;
1678
1679 orgCb = currInput->YCbCr[1] + offset;
1680 orgCr = currInput->YCbCr[2] + offset;
1681
1682 mincost = 0x7fffffff;
1683 cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_DC], mincost);
1684 if (cost < mincost)
1685 {
1686 mincost = cost;
1687 currMB->intra_chroma_pred_mode = AVC_IC_DC;
1688 }
1689
1690 if (video->intraAvailA)
1691 {
1692 cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Horizontal], mincost);
1693 if (cost < mincost)
1694 {
1695 mincost = cost;
1696 currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
1697 }
1698 }
1699
1700 if (video->intraAvailB)
1701 {
1702 cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Vertical], mincost);
1703 if (cost < mincost)
1704 {
1705 mincost = cost;
1706 currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
1707 }
1708 }
1709
1710 if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
1711 {
1712 cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Plane], mincost);
1713 if (cost < mincost)
1714 {
1715 mincost = cost;
1716 currMB->intra_chroma_pred_mode = AVC_IC_Plane;
1717 }
1718 }
1719
1720
1721 return ;
1722 }
1723
1724
SATDChroma(uint8 * orgCb,uint8 * orgCr,int org_pitch,uint8 * pred,int min_cost)1725 int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int min_cost)
1726 {
1727 int cost;
1728 /* first take difference between orgCb, orgCr and pred */
1729 int16 res[128], *pres; // residue
1730 int m0, m1, m2, m3, tmp1;
1731 int j, k;
1732
1733 pres = res;
1734 org_pitch -= 8;
1735 // horizontal transform
1736 for (j = 0; j < 8; j++)
1737 {
1738 k = 2;
1739 while (k > 0)
1740 {
1741 m0 = orgCb[0] - pred[0];
1742 m3 = orgCb[3] - pred[3];
1743 m0 += m3;
1744 m3 = m0 - (m3 << 1);
1745 m1 = orgCb[1] - pred[1];
1746 m2 = orgCb[2] - pred[2];
1747 m1 += m2;
1748 m2 = m1 - (m2 << 1);
1749 pres[0] = m0 + m1;
1750 pres[2] = m0 - m1;
1751 pres[1] = m2 + m3;
1752 pres[3] = m3 - m2;
1753
1754 orgCb += 4;
1755 pres += 4;
1756 pred += 4;
1757 k--;
1758 }
1759 orgCb += org_pitch;
1760 k = 2;
1761 while (k > 0)
1762 {
1763 m0 = orgCr[0] - pred[0];
1764 m3 = orgCr[3] - pred[3];
1765 m0 += m3;
1766 m3 = m0 - (m3 << 1);
1767 m1 = orgCr[1] - pred[1];
1768 m2 = orgCr[2] - pred[2];
1769 m1 += m2;
1770 m2 = m1 - (m2 << 1);
1771 pres[0] = m0 + m1;
1772 pres[2] = m0 - m1;
1773 pres[1] = m2 + m3;
1774 pres[3] = m3 - m2;
1775
1776 orgCr += 4;
1777 pres += 4;
1778 pred += 4;
1779 k--;
1780 }
1781 orgCr += org_pitch;
1782 }
1783
1784 /* vertical transform */
1785 for (j = 0; j < 2; j++)
1786 {
1787 pres = res + (j << 6);
1788 k = 16;
1789 while (k > 0)
1790 {
1791 m0 = pres[0];
1792 m3 = pres[3<<4];
1793 m0 += m3;
1794 m3 = m0 - (m3 << 1);
1795 m1 = pres[1<<4];
1796 m2 = pres[2<<4];
1797 m1 += m2;
1798 m2 = m1 - (m2 << 1);
1799 pres[0] = m0 + m1;
1800 pres[2<<4] = m0 - m1;
1801 pres[1<<4] = m2 + m3;
1802 pres[3<<4] = m3 - m2;
1803
1804 pres++;
1805 k--;
1806 }
1807 }
1808
1809 /* now sum of absolute value */
1810 pres = res;
1811 cost = 0;
1812 k = 128;
1813 while (k > 0)
1814 {
1815 tmp1 = *pres++;
1816 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1817 tmp1 = *pres++;
1818 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1819 tmp1 = *pres++;
1820 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1821 tmp1 = *pres++;
1822 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1823 tmp1 = *pres++;
1824 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1825 tmp1 = *pres++;
1826 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1827 tmp1 = *pres++;
1828 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1829 tmp1 = *pres++;
1830 cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1831 k -= 8;
1832 if (cost > min_cost) /* early drop out */
1833 {
1834 return cost;
1835 }
1836 }
1837
1838 return cost;
1839 }
1840
1841
1842
1843 ///////////////////////////////// old code, unused
1844 /* find the best intra mode based on original (unencoded) frame */
1845 /* output is
1846 currMB->mb_intra, currMB->mbMode,
1847 currMB->i16Mode (if currMB->mbMode == AVC_I16)
1848 currMB->i4Mode[..] (if currMB->mbMode == AVC_I4) */
1849
1850 #ifdef FIXED_INTRAPRED_MODE
MBIntraSearch(AVCEncObject * encvid,AVCMacroblock * currMB,int mbNum)1851 void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
1852 {
1853 (void)(mbNum);
1854
1855 AVCCommonObj *video = encvid->common;
1856 int indx, block_x, block_y;
1857
1858 video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
1859
1860 if (!video->currPicParams->constrained_intra_pred_flag)
1861 {
1862 video->intraAvailA = video->mbAvailA;
1863 video->intraAvailB = video->mbAvailB;
1864 video->intraAvailC = video->mbAvailC;
1865 video->intraAvailD = video->mbAvailD;
1866 }
1867 else
1868 {
1869 if (video->mbAvailA)
1870 {
1871 video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
1872 }
1873 if (video->mbAvailB)
1874 {
1875 video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
1876 }
1877 if (video->mbAvailC)
1878 {
1879 video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
1880 }
1881 if (video->mbAvailD)
1882 {
1883 video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
1884 }
1885 }
1886
1887 currMB->mb_intra = TRUE;
1888 currMB->mbMode = FIXED_INTRAPRED_MODE;
1889
1890 if (currMB->mbMode == AVC_I16)
1891 {
1892 currMB->i16Mode = FIXED_I16_MODE;
1893
1894 if (FIXED_I16_MODE == AVC_I16_Vertical && !video->intraAvailB)
1895 {
1896 currMB->i16Mode = AVC_I16_DC;
1897 }
1898
1899 if (FIXED_I16_MODE == AVC_I16_Horizontal && !video->intraAvailA)
1900 {
1901 currMB->i16Mode = AVC_I16_DC;
1902 }
1903
1904 if (FIXED_I16_MODE == AVC_I16_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
1905 {
1906 currMB->i16Mode = AVC_I16_DC;
1907 }
1908 }
1909 else //if(currMB->mbMode == AVC_I4)
1910 {
1911 for (indx = 0; indx < 16; indx++)
1912 {
1913 block_x = blkIdx2blkX[indx];
1914 block_y = blkIdx2blkY[indx];
1915
1916 currMB->i4Mode[(block_y<<2)+block_x] = FIXED_I4_MODE;
1917
1918 if (FIXED_I4_MODE == AVC_I4_Vertical && !(block_y > 0 || video->intraAvailB))
1919 {
1920 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1921 }
1922
1923 if (FIXED_I4_MODE == AVC_I4_Horizontal && !(block_x || video->intraAvailA))
1924 {
1925 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1926 }
1927
1928 if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Left &&
1929 (block_y == 0 && !video->intraAvailB))
1930 {
1931 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1932 }
1933
1934 if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Right &&
1935 !((block_y && block_x)
1936 || (block_y && video->intraAvailA)
1937 || (block_x && video->intraAvailB)
1938 || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1939 {
1940 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1941 }
1942
1943 if (FIXED_I4_MODE == AVC_I4_Vertical_Right &&
1944 !((block_y && block_x)
1945 || (block_y && video->intraAvailA)
1946 || (block_x && video->intraAvailB)
1947 || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1948 {
1949 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1950 }
1951
1952 if (FIXED_I4_MODE == AVC_I4_Horizontal_Down &&
1953 !((block_y && block_x)
1954 || (block_y && video->intraAvailA)
1955 || (block_x && video->intraAvailB)
1956 || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1957 {
1958 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1959 }
1960
1961 if (FIXED_I4_MODE == AVC_I4_Vertical_Left &&
1962 (block_y == 0 && !video->intraAvailB))
1963 {
1964 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1965 }
1966
1967 if (FIXED_I4_MODE == AVC_I4_Horizontal_Up && !(block_x || video->intraAvailA))
1968 {
1969 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1970 }
1971 }
1972 }
1973
1974 currMB->intra_chroma_pred_mode = FIXED_INTRA_CHROMA_MODE;
1975
1976 if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Horizontal && !(video->intraAvailA))
1977 {
1978 currMB->intra_chroma_pred_mode = AVC_IC_DC;
1979 }
1980
1981 if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Vertical && !(video->intraAvailB))
1982 {
1983 currMB->intra_chroma_pred_mode = AVC_IC_DC;
1984 }
1985
1986 if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
1987 {
1988 currMB->intra_chroma_pred_mode = AVC_IC_DC;
1989 }
1990
1991 /* also reset the motion vectors */
1992 /* set MV and Ref_Idx codes of Intra blocks in P-slices */
1993 memset(currMB->mvL0, 0, sizeof(int32)*16);
1994 currMB->ref_idx_L0[0] = -1;
1995 currMB->ref_idx_L0[1] = -1;
1996 currMB->ref_idx_L0[2] = -1;
1997 currMB->ref_idx_L0[3] = -1;
1998
1999 // output from this function, currMB->mbMode should be set to either
2000 // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
2001 return ;
2002 }
2003 #else // faster combined prediction+SAD calculation
MBIntraSearch(AVCEncObject * encvid,AVCMacroblock * currMB,int mbNum)2004 void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
2005 {
2006 AVCCommonObj *video = encvid->common;
2007 AVCFrameIO *currInput = encvid->currInput;
2008 uint8 *curL, *curCb, *curCr;
2009 uint8 *comp, *pred_block;
2010 int block_x, block_y, offset;
2011 uint sad, sad4, sadI4, sadI16;
2012 int component, SubBlock_indx, temp;
2013 int pitch = video->currPic->pitch;
2014
2015 /* calculate the cost of each intra prediction mode and compare to the
2016 inter mode */
2017 /* full search for all intra prediction */
2018 offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
2019 curL = currInput->YCbCr[0] + offset;
2020 pred_block = video->pred_block + 84;
2021
2022 /* Assuming that InitNeighborAvailability has been called prior to this function */
2023 video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
2024
2025 if (!video->currPicParams->constrained_intra_pred_flag)
2026 {
2027 video->intraAvailA = video->mbAvailA;
2028 video->intraAvailB = video->mbAvailB;
2029 video->intraAvailC = video->mbAvailC;
2030 video->intraAvailD = video->mbAvailD;
2031 }
2032 else
2033 {
2034 if (video->mbAvailA)
2035 {
2036 video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
2037 }
2038 if (video->mbAvailB)
2039 {
2040 video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
2041 }
2042 if (video->mbAvailC)
2043 {
2044 video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
2045 }
2046 if (video->mbAvailD)
2047 {
2048 video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
2049 }
2050 }
2051
2052 /* currently we're doing exhaustive search. Smart search will be used later */
2053
2054 /* I16 modes */
2055 curL = currInput->YCbCr[0] + offset;
2056 video->pintra_pred_top = curL - pitch;
2057 video->pintra_pred_left = curL - 1;
2058 if (video->mb_y)
2059 {
2060 video->intra_pred_topleft = *(curL - pitch - 1);
2061 }
2062
2063 /* Intra_16x16_Vertical */
2064 sadI16 = 65536;
2065 /* check availability of top */
2066 if (video->intraAvailB)
2067 {
2068 sad = SAD_I16_Vert(video, curL, sadI16);
2069
2070 if (sad < sadI16)
2071 {
2072 sadI16 = sad;
2073 currMB->i16Mode = AVC_I16_Vertical;
2074 }
2075 }
2076 /* Intra_16x16_Horizontal */
2077 /* check availability of left */
2078 if (video->intraAvailA)
2079 {
2080 sad = SAD_I16_HorzDC(video, curL, AVC_I16_Horizontal, sadI16);
2081
2082 if (sad < sadI16)
2083 {
2084 sadI16 = sad;
2085 currMB->i16Mode = AVC_I16_Horizontal;
2086 }
2087 }
2088
2089 /* Intra_16x16_DC, default mode */
2090 sad = SAD_I16_HorzDC(video, curL, AVC_I16_DC, sadI16);
2091 if (sad < sadI16)
2092 {
2093 sadI16 = sad;
2094 currMB->i16Mode = AVC_I16_DC;
2095 }
2096
2097 /* Intra_16x16_Plane */
2098 if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
2099 {
2100 sad = SAD_I16_Plane(video, curL, sadI16);
2101
2102 if (sad < sadI16)
2103 {
2104 sadI16 = sad;
2105 currMB->i16Mode = AVC_I16_Plane;
2106 }
2107 }
2108
2109 sadI16 >>= 1; /* before comparison */
2110
2111 /* selection between intra4, intra16 or inter mode */
2112 if (sadI16 < encvid->min_cost)
2113 {
2114 currMB->mb_intra = TRUE;
2115 currMB->mbMode = AVC_I16;
2116 encvid->min_cost = sadI16;
2117 }
2118
2119 if (currMB->mb_intra) /* only do the chrominance search when intra is decided */
2120 {
2121 /* Note that we might be able to guess the type of prediction from
2122 the luma prediction type */
2123
2124 /* now search for the best chroma intra prediction */
2125 offset = (offset >> 2) + (video->mb_x << 2);
2126 curCb = currInput->YCbCr[1] + offset;
2127 curCr = currInput->YCbCr[2] + offset;
2128
2129 pitch >>= 1;
2130 video->pintra_pred_top_cb = curCb - pitch;
2131 video->pintra_pred_left_cb = curCb - 1;
2132 video->pintra_pred_top_cr = curCr - pitch;
2133 video->pintra_pred_left_cr = curCr - 1;
2134
2135 if (video->mb_y)
2136 {
2137 video->intra_pred_topleft_cb = *(curCb - pitch - 1);
2138 video->intra_pred_topleft_cr = *(curCr - pitch - 1);
2139 }
2140
2141 /* Intra_Chroma_DC */
2142 sad4 = SAD_Chroma_DC(video, curCb, curCr, 65536);
2143 currMB->intra_chroma_pred_mode = AVC_IC_DC;
2144
2145 /* Intra_Chroma_Horizontal */
2146 if (video->intraAvailA)
2147 {
2148 /* check availability of left */
2149 sad = SAD_Chroma_Horz(video, curCb, curCr, sad4);
2150 if (sad < sad4)
2151 {
2152 sad4 = sad;
2153 currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
2154 }
2155 }
2156
2157 /* Intra_Chroma_Vertical */
2158 if (video->intraAvailB)
2159 {
2160 /* check availability of top */
2161 sad = SAD_Chroma_Vert(video, curCb, curCr, sad4);
2162
2163 if (sad < sad4)
2164 {
2165 sad4 = sad;
2166 currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
2167 }
2168 }
2169
2170 /* Intra_Chroma_Plane */
2171 if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
2172 {
2173 /* check availability of top and left */
2174 Intra_Chroma_Plane(video, pitch);
2175
2176 sad = SADChroma(pred_block + 452, curCb, curCr, pitch);
2177
2178 if (sad < sad4)
2179 {
2180 sad4 = sad;
2181 currMB->intra_chroma_pred_mode = AVC_IC_Plane;
2182 }
2183 }
2184
2185 /* also reset the motion vectors */
2186 /* set MV and Ref_Idx codes of Intra blocks in P-slices */
2187 memset(currMB->mvL0, 0, sizeof(int32)*16);
2188 memset(currMB->ref_idx_L0, -1, sizeof(int16)*4);
2189
2190 }
2191
2192 // output from this function, currMB->mbMode should be set to either
2193 // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
2194
2195 return ;
2196 }
2197 #endif
2198
2199
2200