1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "avcenc_lib.h"
19 
20 #define TH_I4  0  /* threshold biasing toward I16 mode instead of I4 mode */
21 #define TH_Intra  0 /* threshold biasing toward INTER mode instead of intra mode */
22 
23 #define FIXED_INTRAPRED_MODE  AVC_I16
24 #define FIXED_I16_MODE  AVC_I16_DC
25 #define FIXED_I4_MODE   AVC_I4_Diagonal_Down_Left
26 #define FIXED_INTRA_CHROMA_MODE AVC_IC_DC
27 
28 #define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
29                  x = 0xFF & (~(x>>31));}
30 
31 
IntraDecisionABE(AVCEncObject * encvid,int min_cost,uint8 * curL,int picPitch)32 bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch)
33 {
34     AVCCommonObj *video = encvid->common;
35     AVCFrameIO *currInput = encvid->currInput;
36     int orgPitch = currInput->pitch;
37     int x_pos = (video->mb_x) << 4;
38     int y_pos = (video->mb_y) << 4;
39     uint8 *orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
40     int j;
41     uint8 *topL, *leftL, *orgY_2, *orgY_3;
42     int temp, SBE, offset;
43     OsclFloat ABE;
44     bool intra = true;
45 
46     if (((x_pos >> 4) != (int)video->PicWidthInMbs - 1) &&
47             ((y_pos >> 4) != (int)video->PicHeightInMbs - 1) &&
48             video->intraAvailA &&
49             video->intraAvailB)
50     {
51         SBE = 0;
52         /* top neighbor */
53         topL = curL - picPitch;
54         /* left neighbor */
55         leftL = curL - 1;
56         orgY_2 = orgY - orgPitch;
57 
58         for (j = 0; j < 16; j++)
59         {
60             temp = *topL++ - orgY[j];
61             SBE += ((temp >= 0) ? temp : -temp);
62             temp = *(leftL += picPitch) - *(orgY_2 += orgPitch);
63             SBE += ((temp >= 0) ? temp : -temp);
64         }
65 
66         /* calculate chroma */
67         offset = (y_pos >> 2) * picPitch + (x_pos >> 1);
68         topL = video->currPic->Scb + offset;
69         orgY_2 = currInput->YCbCr[1] + offset + (y_pos >> 2) * (orgPitch - picPitch);
70 
71         leftL = topL - 1;
72         topL -= (picPitch >> 1);
73         orgY_3 = orgY_2 - (orgPitch >> 1);
74         for (j = 0; j < 8; j++)
75         {
76             temp = *topL++ - orgY_2[j];
77             SBE += ((temp >= 0) ? temp : -temp);
78             temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
79             SBE += ((temp >= 0) ? temp : -temp);
80         }
81 
82         topL = video->currPic->Scr + offset;
83         orgY_2 = currInput->YCbCr[2] + offset + (y_pos >> 2) * (orgPitch - picPitch);
84 
85         leftL = topL - 1;
86         topL -= (picPitch >> 1);
87         orgY_3 = orgY_2 - (orgPitch >> 1);
88         for (j = 0; j < 8; j++)
89         {
90             temp = *topL++ - orgY_2[j];
91             SBE += ((temp >= 0) ? temp : -temp);
92             temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
93             SBE += ((temp >= 0) ? temp : -temp);
94         }
95 
96         /* compare mincost/384 and SBE/64 */
97         ABE = SBE / 64.0;
98         if (ABE*0.8 >= min_cost / 384.0)
99         {
100             intra = false;
101         }
102     }
103 
104     return intra;
105 }
106 
107 /* perform searching for MB mode */
108 /* assuming that this is done inside the encoding loop,
109 no need to call InitNeighborAvailability */
110 
MBIntraSearch(AVCEncObject * encvid,int mbnum,uint8 * curL,int picPitch)111 void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch)
112 {
113     AVCCommonObj *video = encvid->common;
114     AVCFrameIO *currInput = encvid->currInput;
115     AVCMacroblock *currMB = video->currMB;
116     int min_cost;
117     uint8 *orgY;
118     int x_pos = (video->mb_x) << 4;
119     int y_pos = (video->mb_y) << 4;
120     uint32 *saved_inter;
121     int j;
122     int orgPitch = currInput->pitch;
123     bool intra = true;
124 
125     currMB->CBP = 0;
126 
127     /* first do motion vector and variable block size search */
128     min_cost = encvid->min_cost[mbnum];
129 
130     /* now perform intra prediction search */
131     /* need to add the check for encvid->intraSearch[video->mbNum] to skip intra
132        if it's not worth checking. */
133     if (video->slice_type == AVC_P_SLICE)
134     {
135         /* Decide whether intra search is necessary or not */
136         /* This one, we do it in the encoding loop so the neighboring pixel are the
137         actual reconstructed pixels. */
138         intra = IntraDecisionABE(encvid, min_cost, curL, picPitch);
139     }
140 
141     if (intra == true || video->slice_type == AVC_I_SLICE)
142     {
143         orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
144 
145         /* i16 mode search */
146         /* generate all the predictions */
147         intrapred_luma_16x16(encvid);
148 
149         /* evaluate them one by one */
150         find_cost_16x16(encvid, orgY, &min_cost);
151 
152         if (video->slice_type == AVC_P_SLICE)
153         {
154             /* save current inter prediction */
155             saved_inter = encvid->subpel_pred; /* reuse existing buffer */
156             j = 16;
157             curL -= 4;
158             picPitch -= 16;
159             while (j--)
160             {
161                 *saved_inter++ = *((uint32*)(curL += 4));
162                 *saved_inter++ = *((uint32*)(curL += 4));
163                 *saved_inter++ = *((uint32*)(curL += 4));
164                 *saved_inter++ = *((uint32*)(curL += 4));
165                 curL += picPitch;
166             }
167 
168         }
169 
170         /* i4 mode search */
171         mb_intra4x4_search(encvid, &min_cost);
172 
173         encvid->min_cost[mbnum] = min_cost; /* update min_cost */
174     }
175 
176 
177     if (currMB->mb_intra)
178     {
179         chroma_intra_search(encvid);
180 
181         /* need to set this in order for the MBInterPrediction to work!! */
182         memset(currMB->mvL0, 0, sizeof(int32)*16);
183         currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
184                                     currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = -1;
185     }
186     else if (video->slice_type == AVC_P_SLICE && intra == true)
187     {
188         /* restore current inter prediction */
189         saved_inter = encvid->subpel_pred; /* reuse existing buffer */
190         j = 16;
191         curL -= ((picPitch + 16) << 4);
192         while (j--)
193         {
194             *((uint32*)(curL += 4)) = *saved_inter++;
195             *((uint32*)(curL += 4)) = *saved_inter++;
196             *((uint32*)(curL += 4)) = *saved_inter++;
197             *((uint32*)(curL += 4)) = *saved_inter++;
198             curL += picPitch;
199         }
200     }
201 
202     return ;
203 }
204 
205 /* generate all the prediction values */
intrapred_luma_16x16(AVCEncObject * encvid)206 void intrapred_luma_16x16(AVCEncObject *encvid)
207 {
208     AVCCommonObj *video = encvid->common;
209     AVCPictureData *currPic = video->currPic;
210 
211     int x_pos = (video->mb_x) << 4;
212     int y_pos = (video->mb_y) << 4;
213     int pitch = currPic->pitch;
214 
215     int offset = y_pos * pitch + x_pos;
216 
217     uint8 *pred, *top, *left;
218     uint8 *curL = currPic->Sl + offset; /* point to reconstructed frame */
219     uint32 word1, word2, word3, word4;
220     uint32 sum = 0;
221 
222     int a_16, b, c, factor_c;
223     uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
224     int H = 0, V = 0, tmp, value;
225     int i;
226 
227     if (video->intraAvailB)
228     {
229         //get vertical prediction mode
230         top = curL - pitch;
231 
232         pred = encvid->pred_i16[AVC_I16_Vertical] - 16;
233 
234         word1 = *((uint32*)(top));  /* read 4 bytes from top */
235         word2 = *((uint32*)(top + 4)); /* read 4 bytes from top */
236         word3 = *((uint32*)(top + 8)); /* read 4 bytes from top */
237         word4 = *((uint32*)(top + 12)); /* read 4 bytes from top */
238 
239         for (i = 0; i < 16; i++)
240         {
241             *((uint32*)(pred += 16)) = word1;
242             *((uint32*)(pred + 4)) = word2;
243             *((uint32*)(pred + 8)) = word3;
244             *((uint32*)(pred + 12)) = word4;
245 
246         }
247 
248         sum = word1 & 0xFF00FF;
249         word1 = (word1 >> 8) & 0xFF00FF;
250         sum += word1;
251         word1 = (word2 & 0xFF00FF);
252         sum += word1;
253         word2 = (word2 >> 8) & 0xFF00FF;
254         sum += word2;
255         word1 = (word3 & 0xFF00FF);
256         sum += word1;
257         word3 = (word3 >> 8) & 0xFF00FF;
258         sum += word3;
259         word1 = (word4 & 0xFF00FF);
260         sum += word1;
261         word4 = (word4 >> 8) & 0xFF00FF;
262         sum += word4;
263 
264         sum += (sum >> 16);
265         sum &= 0xFFFF;
266 
267         if (!video->intraAvailA)
268         {
269             sum = (sum + 8) >> 4;
270         }
271     }
272 
273     if (video->intraAvailA)
274     {
275         // get horizontal mode
276         left = curL - 1 - pitch;
277 
278         pred = encvid->pred_i16[AVC_I16_Horizontal] - 16;
279 
280         for (i = 0; i < 16; i++)
281         {
282             word1 = *(left += pitch);
283             sum += word1;
284 
285             word1 = (word1 << 8) | word1;
286             word1 = (word1 << 16) | word1; /* make it 4 */
287 
288             *(uint32*)(pred += 16) = word1;
289             *(uint32*)(pred + 4) = word1;
290             *(uint32*)(pred + 8) = word1;
291             *(uint32*)(pred + 12) = word1;
292         }
293 
294         if (!video->intraAvailB)
295         {
296             sum = (sum + 8) >> 4;
297         }
298         else
299         {
300             sum = (sum + 16) >> 5;
301         }
302     }
303 
304     // get DC mode
305     if (!video->intraAvailA && !video->intraAvailB)
306     {
307         sum = 0x80808080;
308     }
309     else
310     {
311         sum = (sum << 8) | sum;
312         sum = (sum << 16) | sum;
313     }
314 
315     pred = encvid->pred_i16[AVC_I16_DC] - 16;
316     for (i = 0; i < 16; i++)
317     {
318         *((uint32*)(pred += 16)) = sum;
319         *((uint32*)(pred + 4)) = sum;
320         *((uint32*)(pred + 8)) = sum;
321         *((uint32*)(pred + 12)) = sum;
322     }
323 
324     // get plane mode
325     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
326     {
327         pred = encvid->pred_i16[AVC_I16_Plane] - 16;
328 
329         comp_ref_x0 = curL - pitch + 8;
330         comp_ref_x1 = curL - pitch + 6;
331         comp_ref_y0 = curL - 1 + (pitch << 3);
332         comp_ref_y1 = curL - 1 + 6 * pitch;
333 
334         for (i = 1; i < 8; i++)
335         {
336             H += i * (*comp_ref_x0++ - *comp_ref_x1--);
337             V += i * (*comp_ref_y0 - *comp_ref_y1);
338             comp_ref_y0 += pitch;
339             comp_ref_y1 -= pitch;
340         }
341 
342         H += i * (*comp_ref_x0++ - curL[-pitch-1]);
343         V += i * (*comp_ref_y0 - *comp_ref_y1);
344 
345 
346         a_16 = ((*(curL - pitch + 15) + *(curL - 1 + 15 * pitch)) << 4) + 16;;
347         b = (5 * H + 32) >> 6;
348         c = (5 * V + 32) >> 6;
349 
350         tmp = 0;
351         for (i = 0; i < 16; i++)
352         {
353             factor_c = a_16 + c * (tmp++ - 7);
354             factor_c -= 7 * b;
355 
356             value = factor_c >> 5;
357             factor_c += b;
358             CLIP_RESULT(value)
359             word1 = value;
360             value = factor_c >> 5;
361             factor_c += b;
362             CLIP_RESULT(value)
363             word1 = (word1) | (value << 8);
364             value = factor_c >> 5;
365             factor_c += b;
366             CLIP_RESULT(value)
367             word1 = (word1) | (value << 16);
368             value = factor_c >> 5;
369             factor_c += b;
370             CLIP_RESULT(value)
371             word1 = (word1) | (value << 24);
372             *((uint32*)(pred += 16)) = word1;
373             value = factor_c >> 5;
374             factor_c += b;
375             CLIP_RESULT(value)
376             word1 = value;
377             value = factor_c >> 5;
378             factor_c += b;
379             CLIP_RESULT(value)
380             word1 = (word1) | (value << 8);
381             value = factor_c >> 5;
382             factor_c += b;
383             CLIP_RESULT(value)
384             word1 = (word1) | (value << 16);
385             value = factor_c >> 5;
386             factor_c += b;
387             CLIP_RESULT(value)
388             word1 = (word1) | (value << 24);
389             *((uint32*)(pred + 4)) = word1;
390             value = factor_c >> 5;
391             factor_c += b;
392             CLIP_RESULT(value)
393             word1 = value;
394             value = factor_c >> 5;
395             factor_c += b;
396             CLIP_RESULT(value)
397             word1 = (word1) | (value << 8);
398             value = factor_c >> 5;
399             factor_c += b;
400             CLIP_RESULT(value)
401             word1 = (word1) | (value << 16);
402             value = factor_c >> 5;
403             factor_c += b;
404             CLIP_RESULT(value)
405             word1 = (word1) | (value << 24);
406             *((uint32*)(pred + 8)) = word1;
407             value = factor_c >> 5;
408             factor_c += b;
409             CLIP_RESULT(value)
410             word1 = value;
411             value = factor_c >> 5;
412             factor_c += b;
413             CLIP_RESULT(value)
414             word1 = (word1) | (value << 8);
415             value = factor_c >> 5;
416             factor_c += b;
417             CLIP_RESULT(value)
418             word1 = (word1) | (value << 16);
419             value = factor_c >> 5;
420             CLIP_RESULT(value)
421             word1 = (word1) | (value << 24);
422             *((uint32*)(pred + 12)) = word1;
423         }
424     }
425 
426     return ;
427 }
428 
429 
430 /* evaluate each prediction mode of I16 */
find_cost_16x16(AVCEncObject * encvid,uint8 * orgY,int * min_cost)431 void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost)
432 {
433     AVCCommonObj *video = encvid->common;
434     AVCMacroblock *currMB = video->currMB;
435     int cost;
436     int org_pitch = encvid->currInput->pitch;
437 
438     /* evaluate vertical mode */
439     if (video->intraAvailB)
440     {
441         cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Vertical], *min_cost);
442         if (cost < *min_cost)
443         {
444             *min_cost = cost;
445             currMB->mbMode = AVC_I16;
446             currMB->mb_intra = 1;
447             currMB->i16Mode = AVC_I16_Vertical;
448         }
449     }
450 
451 
452     /* evaluate horizontal mode */
453     if (video->intraAvailA)
454     {
455         cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Horizontal], *min_cost);
456         if (cost < *min_cost)
457         {
458             *min_cost = cost;
459             currMB->mbMode = AVC_I16;
460             currMB->mb_intra = 1;
461             currMB->i16Mode = AVC_I16_Horizontal;
462         }
463     }
464 
465     /* evaluate DC mode */
466     cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_DC], *min_cost);
467     if (cost < *min_cost)
468     {
469         *min_cost = cost;
470         currMB->mbMode = AVC_I16;
471         currMB->mb_intra = 1;
472         currMB->i16Mode = AVC_I16_DC;
473     }
474 
475     /* evaluate plane mode */
476     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
477     {
478         cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Plane], *min_cost);
479         if (cost < *min_cost)
480         {
481             *min_cost = cost;
482             currMB->mbMode = AVC_I16;
483             currMB->mb_intra = 1;
484             currMB->i16Mode = AVC_I16_Plane;
485         }
486     }
487 
488     return ;
489 }
490 
491 
cost_i16(uint8 * org,int org_pitch,uint8 * pred,int min_cost)492 int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost)
493 {
494 
495     int cost;
496     int j, k;
497     int16 res[256], *pres; // residue
498     int m0, m1, m2, m3;
499 
500     // calculate SATD
501     org_pitch -= 16;
502     pres = res;
503     // horizontal transform
504     for (j = 0; j < 16; j++)
505     {
506         k = 4;
507         while (k > 0)
508         {
509             m0 = org[0] - pred[0];
510             m3 = org[3] - pred[3];
511             m0 += m3;
512             m3 = m0 - (m3 << 1);
513             m1 = org[1] - pred[1];
514             m2 = org[2] - pred[2];
515             m1 += m2;
516             m2 = m1 - (m2 << 1);
517             pres[0] = m0 + m1;
518             pres[2] = m0 - m1;
519             pres[1] = m2 + m3;
520             pres[3] = m3 - m2;
521 
522             org += 4;
523             pres += 4;
524             pred += 4;
525             k--;
526         }
527         org += org_pitch;
528     }
529     /* vertical transform */
530     cost = 0;
531     for (j = 0; j < 4; j++)
532     {
533         pres = res + (j << 6);
534         k = 16;
535         while (k > 0)
536         {
537             m0 = pres[0];
538             m3 = pres[3<<4];
539             m0 += m3;
540             m3 = m0 - (m3 << 1);
541             m1 = pres[1<<4];
542             m2 = pres[2<<4];
543             m1 += m2;
544             m2 = m1 - (m2 << 1);
545             pres[0] = m0 = m0 + m1;
546 
547             if (k&0x3)  // only sum up non DC values.
548             {
549                 cost += ((m0 > 0) ? m0 : -m0);
550             }
551 
552             m1 = m0 - (m1 << 1);
553             cost += ((m1 > 0) ? m1 : -m1);
554             m3 = m2 + m3;
555             cost += ((m3 > 0) ? m3 : -m3);
556             m2 = m3 - (m2 << 1);
557             cost += ((m2 > 0) ? m2 : -m2);
558 
559             pres++;
560             k--;
561         }
562         if ((cost >> 1) > min_cost) /* early drop out */
563         {
564             return (cost >> 1);
565         }
566     }
567 
568     /* Hadamard of the DC coefficient */
569     pres = res;
570     k = 4;
571     while (k > 0)
572     {
573         m0 = pres[0];
574         m3 = pres[3<<2];
575         m0 >>= 2;
576         m0 += (m3 >> 2);
577         m3 = m0 - (m3 >> 1);
578         m1 = pres[1<<2];
579         m2 = pres[2<<2];
580         m1 >>= 2;
581         m1 += (m2 >> 2);
582         m2 = m1 - (m2 >> 1);
583         pres[0] = (m0 + m1);
584         pres[2<<2] = (m0 - m1);
585         pres[1<<2] = (m2 + m3);
586         pres[3<<2] = (m3 - m2);
587         pres += (4 << 4);
588         k--;
589     }
590 
591     pres = res;
592     k = 4;
593     while (k > 0)
594     {
595         m0 = pres[0];
596         m3 = pres[3<<6];
597         m0 += m3;
598         m3 = m0 - (m3 << 1);
599         m1 = pres[1<<6];
600         m2 = pres[2<<6];
601         m1 += m2;
602         m2 = m1 - (m2 << 1);
603         m0 = m0 + m1;
604         cost += ((m0 >= 0) ? m0 : -m0);
605         m1 = m0 - (m1 << 1);
606         cost += ((m1 >= 0) ? m1 : -m1);
607         m3 = m2 + m3;
608         cost += ((m3 >= 0) ? m3 : -m3);
609         m2 = m3 - (m2 << 1);
610         cost += ((m2 >= 0) ? m2 : -m2);
611         pres += 4;
612 
613         if ((cost >> 1) > min_cost) /* early drop out */
614         {
615             return (cost >> 1);
616         }
617 
618         k--;
619     }
620 
621     return (cost >> 1);
622 }
623 
624 
mb_intra4x4_search(AVCEncObject * encvid,int * min_cost)625 void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost)
626 {
627     AVCCommonObj *video = encvid->common;
628     AVCMacroblock *currMB = video->currMB;
629     AVCPictureData *currPic = video->currPic;
630     AVCFrameIO *currInput = encvid->currInput;
631     int pitch = currPic->pitch;
632     int org_pitch = currInput->pitch;
633     int offset;
634     uint8 *curL, *comp, *org4, *org8;
635     int y = video->mb_y << 4;
636     int x = video->mb_x << 4;
637 
638     int b8, b4, cost4x4, blkidx;
639     int cost = 0;
640     int numcoef;
641     int dummy = 0;
642     int mb_intra = currMB->mb_intra; // save the original value
643 
644     offset = y * pitch + x;
645 
646     curL = currPic->Sl + offset;
647     org8 = currInput->YCbCr[0] + y * org_pitch + x;
648     video->pred_pitch = 4;
649 
650     cost = (int)(6.0 * encvid->lambda_mode + 0.4999);
651     cost <<= 2;
652 
653     currMB->mb_intra = 1;  // temporary set this to one to enable the IDCT
654     // operation inside dct_luma
655 
656     for (b8 = 0; b8 < 4; b8++)
657     {
658         comp = curL;
659         org4 = org8;
660 
661         for (b4 = 0; b4 < 4; b4++)
662         {
663             blkidx = blkIdx2blkXY[b8][b4];
664             cost4x4 = blk_intra4x4_search(encvid, blkidx, comp, org4);
665             cost += cost4x4;
666             if (cost > *min_cost)
667             {
668                 currMB->mb_intra = mb_intra; // restore the value
669                 return ;
670             }
671 
672             /* do residue, Xfrm, Q, invQ, invXfrm, recon and save the DCT coefs.*/
673             video->pred_block = encvid->pred_i4[currMB->i4Mode[blkidx]];
674             numcoef = dct_luma(encvid, blkidx, comp, org4, &dummy);
675             currMB->nz_coeff[blkidx] = numcoef;
676             if (numcoef)
677             {
678                 video->cbp4x4 |= (1 << blkidx);
679                 currMB->CBP |= (1 << b8);
680             }
681 
682             if (b4&1)
683             {
684                 comp += ((pitch << 2) - 4);
685                 org4 += ((org_pitch << 2) - 4);
686             }
687             else
688             {
689                 comp += 4;
690                 org4 += 4;
691             }
692         }
693 
694         if (b8&1)
695         {
696             curL += ((pitch << 3) - 8);
697             org8 += ((org_pitch << 3) - 8);
698         }
699         else
700         {
701             curL += 8;
702             org8 += 8;
703         }
704     }
705 
706     currMB->mb_intra = mb_intra; // restore the value
707 
708     if (cost < *min_cost)
709     {
710         *min_cost = cost;
711         currMB->mbMode = AVC_I4;
712         currMB->mb_intra = 1;
713     }
714 
715     return ;
716 }
717 
718 
719 /* search for i4 mode for a 4x4 block */
blk_intra4x4_search(AVCEncObject * encvid,int blkidx,uint8 * cur,uint8 * org)720 int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org)
721 {
722     AVCCommonObj *video = encvid->common;
723     AVCNeighborAvailability availability;
724     AVCMacroblock *currMB = video->currMB;
725     bool top_left = FALSE;
726     int pitch = video->currPic->pitch;
727     uint8 mode_avail[AVCNumI4PredMode];
728     uint32 temp, DC;
729     uint8 *pred;
730     int org_pitch = encvid->currInput->pitch;
731     uint16 min_cost, cost;
732 
733     int P_x, Q_x, R_x, P_y, Q_y, R_y, D, D0, D1;
734     int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2;
735     uint8 P_A, P_B, P_C, P_D, P_E, P_F, P_G, P_H, P_I, P_J, P_K, P_L, P_X;
736     int r0, r1, r2, r3, r4, r5, r6, r7;
737     int x0, x1, x2, x3, x4, x5;
738     uint32 temp1, temp2;
739 
740     int ipmode, mostProbableMode;
741     int fixedcost = 4 * encvid->lambda_mode;
742     int min_sad = 0x7FFF;
743 
744     availability.left = TRUE;
745     availability.top = TRUE;
746     if (blkidx <= 3) /* top row block  (!block_y) */
747     { /* check availability up */
748         availability.top = video->intraAvailB ;
749     }
750     if (!(blkidx&0x3)) /* left column block (!block_x)*/
751     { /* check availability left */
752         availability.left = video->intraAvailA ;
753     }
754     availability.top_right = BlkTopRight[blkidx];
755 
756     if (availability.top_right == 2)
757     {
758         availability.top_right = video->intraAvailB;
759     }
760     else if (availability.top_right == 3)
761     {
762         availability.top_right = video->intraAvailC;
763     }
764 
765     if (availability.top == TRUE)
766     {
767         temp = *(uint32*)(cur - pitch);
768         P_A = temp & 0xFF;
769         P_B = (temp >> 8) & 0xFF;
770         P_C = (temp >> 16) & 0xFF;
771         P_D = (temp >> 24) & 0xFF;
772     }
773     else
774     {
775         P_A = P_B = P_C = P_D = 128;
776     }
777 
778     if (availability.top_right == TRUE)
779     {
780         temp = *(uint32*)(cur - pitch + 4);
781         P_E = temp & 0xFF;
782         P_F = (temp >> 8) & 0xFF;
783         P_G = (temp >> 16) & 0xFF;
784         P_H = (temp >> 24) & 0xFF;
785     }
786     else
787     {
788         P_E = P_F = P_G = P_H = 128;
789     }
790 
791     if (availability.left == TRUE)
792     {
793         cur--;
794         P_I = *cur;
795         P_J = *(cur += pitch);
796         P_K = *(cur += pitch);
797         P_L = *(cur + pitch);
798         cur -= (pitch << 1);
799         cur++;
800     }
801     else
802     {
803         P_I = P_J = P_K = P_L = 128;
804     }
805 
806     /* check if top-left pixel is available */
807     if (((blkidx > 3) && (blkidx&0x3)) || ((blkidx > 3) && video->intraAvailA)
808             || ((blkidx&0x3) && video->intraAvailB)
809             || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
810     {
811         top_left = TRUE;
812         P_X = *(cur - pitch - 1);
813     }
814     else
815     {
816         P_X = 128;
817     }
818 
819     //===== INTRA PREDICTION FOR 4x4 BLOCK =====
820     /* vertical */
821     mode_avail[AVC_I4_Vertical] = 0;
822     if (availability.top)
823     {
824         mode_avail[AVC_I4_Vertical] = 1;
825         pred = encvid->pred_i4[AVC_I4_Vertical];
826 
827         temp = (P_D << 24) | (P_C << 16) | (P_B << 8) | P_A ;
828         *((uint32*)pred) =  temp; /* write 4 at a time */
829         *((uint32*)(pred += 4)) =  temp;
830         *((uint32*)(pred += 4)) =  temp;
831         *((uint32*)(pred += 4)) =  temp;
832     }
833     /* horizontal */
834     mode_avail[AVC_I4_Horizontal] = 0;
835     mode_avail[AVC_I4_Horizontal_Up] = 0;
836     if (availability.left)
837     {
838         mode_avail[AVC_I4_Horizontal] = 1;
839         pred = encvid->pred_i4[AVC_I4_Horizontal];
840 
841         temp = P_I | (P_I << 8);
842         temp = temp | (temp << 16);
843         *((uint32*)pred) = temp;
844         temp = P_J | (P_J << 8);
845         temp = temp | (temp << 16);
846         *((uint32*)(pred += 4)) = temp;
847         temp = P_K | (P_K << 8);
848         temp = temp | (temp << 16);
849         *((uint32*)(pred += 4)) = temp;
850         temp = P_L | (P_L << 8);
851         temp = temp | (temp << 16);
852         *((uint32*)(pred += 4)) = temp;
853 
854         mode_avail[AVC_I4_Horizontal_Up] = 1;
855         pred = encvid->pred_i4[AVC_I4_Horizontal_Up];
856 
857         Q0 = (P_J + P_K + 1) >> 1;
858         Q1 = (P_J + (P_K << 1) + P_L + 2) >> 2;
859         P0 = ((P_I + P_J + 1) >> 1);
860         P1 = ((P_I + (P_J << 1) + P_K + 2) >> 2);
861 
862         temp = P0 | (P1 << 8);      // [P0 P1 Q0 Q1]
863         temp |= (Q0 << 16);     // [Q0 Q1 R0 DO]
864         temp |= (Q1 << 24);     // [R0 D0 D1 D1]
865         *((uint32*)pred) = temp;      // [D1 D1 D1 D1]
866 
867         D0 = (P_K + 3 * P_L + 2) >> 2;
868         R0 = (P_K + P_L + 1) >> 1;
869 
870         temp = Q0 | (Q1 << 8);
871         temp |= (R0 << 16);
872         temp |= (D0 << 24);
873         *((uint32*)(pred += 4)) = temp;
874 
875         D1 = P_L;
876 
877         temp = R0 | (D0 << 8);
878         temp |= (D1 << 16);
879         temp |= (D1 << 24);
880         *((uint32*)(pred += 4)) = temp;
881 
882         temp = D1 | (D1 << 8);
883         temp |= (temp << 16);
884         *((uint32*)(pred += 4)) = temp;
885     }
886     /* DC */
887     mode_avail[AVC_I4_DC] = 1;
888     pred = encvid->pred_i4[AVC_I4_DC];
889     if (availability.left)
890     {
891         DC = P_I + P_J + P_K + P_L;
892 
893         if (availability.top)
894         {
895             DC = (P_A + P_B + P_C + P_D + DC + 4) >> 3;
896         }
897         else
898         {
899             DC = (DC + 2) >> 2;
900 
901         }
902     }
903     else if (availability.top)
904     {
905         DC = (P_A + P_B + P_C + P_D + 2) >> 2;
906 
907     }
908     else
909     {
910         DC = 128;
911     }
912 
913     temp = DC | (DC << 8);
914     temp = temp | (temp << 16);
915     *((uint32*)pred) = temp;
916     *((uint32*)(pred += 4)) = temp;
917     *((uint32*)(pred += 4)) = temp;
918     *((uint32*)(pred += 4)) = temp;
919 
920     /* Down-left */
921     mode_avail[AVC_I4_Diagonal_Down_Left] = 0;
922 
923     if (availability.top)
924     {
925         mode_avail[AVC_I4_Diagonal_Down_Left] = 1;
926 
927         pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Left];
928 
929         r0 = P_A;
930         r1 = P_B;
931         r2 = P_C;
932         r3 = P_D;
933 
934         r0 += (r1 << 1);
935         r0 += r2;
936         r0 += 2;
937         r0 >>= 2;
938         r1 += (r2 << 1);
939         r1 += r3;
940         r1 += 2;
941         r1 >>= 2;
942 
943         if (availability.top_right)
944         {
945             r4 = P_E;
946             r5 = P_F;
947             r6 = P_G;
948             r7 = P_H;
949 
950             r2 += (r3 << 1);
951             r2 += r4;
952             r2 += 2;
953             r2 >>= 2;
954             r3 += (r4 << 1);
955             r3 += r5;
956             r3 += 2;
957             r3 >>= 2;
958             r4 += (r5 << 1);
959             r4 += r6;
960             r4 += 2;
961             r4 >>= 2;
962             r5 += (r6 << 1);
963             r5 += r7;
964             r5 += 2;
965             r5 >>= 2;
966             r6 += (3 * r7);
967             r6 += 2;
968             r6 >>= 2;
969             temp = r0 | (r1 << 8);
970             temp |= (r2 << 16);
971             temp |= (r3 << 24);
972             *((uint32*)pred) = temp;
973 
974             temp = (temp >> 8) | (r4 << 24);
975             *((uint32*)(pred += 4)) = temp;
976 
977             temp = (temp >> 8) | (r5 << 24);
978             *((uint32*)(pred += 4)) = temp;
979 
980             temp = (temp >> 8) | (r6 << 24);
981             *((uint32*)(pred += 4)) = temp;
982         }
983         else
984         {
985             r2 += (r3 * 3);
986             r2 += 2;
987             r2 >>= 2;
988             r3 = ((r3 << 2) + 2);
989             r3 >>= 2;
990 
991             temp = r0 | (r1 << 8);
992             temp |= (r2 << 16);
993             temp |= (r3 << 24);
994             *((uint32*)pred) = temp;
995 
996             temp = (temp >> 8) | (r3 << 24);
997             *((uint32*)(pred += 4)) = temp;
998 
999             temp = (temp >> 8) | (r3 << 24);
1000             *((uint32*)(pred += 4)) = temp;
1001 
1002             temp = (temp >> 8) | (r3 << 24);
1003             *((uint32*)(pred += 4)) = temp;
1004 
1005         }
1006     }
1007 
1008     /* Down Right */
1009     mode_avail[AVC_I4_Diagonal_Down_Right] = 0;
1010     /* Diagonal Vertical Right */
1011     mode_avail[AVC_I4_Vertical_Right] = 0;
1012     /* Horizontal Down */
1013     mode_avail[AVC_I4_Horizontal_Down] = 0;
1014 
1015     if (top_left == TRUE)
1016     {
1017         /* Down Right */
1018         mode_avail[AVC_I4_Diagonal_Down_Right] = 1;
1019         pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Right];
1020 
1021         Q_x = (P_A + 2 * P_B + P_C + 2) >> 2;
1022         R_x = (P_B + 2 * P_C + P_D + 2) >> 2;
1023         P_x = (P_X + 2 * P_A + P_B + 2) >> 2;
1024         D   = (P_A + 2 * P_X + P_I + 2) >> 2;
1025         P_y = (P_X + 2 * P_I + P_J + 2) >> 2;
1026         Q_y = (P_I + 2 * P_J + P_K + 2) >> 2;
1027         R_y = (P_J + 2 * P_K + P_L + 2) >> 2;
1028 
1029         /* we can pack these */
1030         temp =  D | (P_x << 8);   //[D   P_x Q_x R_x]
1031         //[P_y D   P_x Q_x]
1032         temp |= (Q_x << 16); //[Q_y P_y D   P_x]
1033         temp |= (R_x << 24);  //[R_y Q_y P_y D  ]
1034         *((uint32*)pred) = temp;
1035 
1036         temp =  P_y | (D << 8);
1037         temp |= (P_x << 16);
1038         temp |= (Q_x << 24);
1039         *((uint32*)(pred += 4)) = temp;
1040 
1041         temp =  Q_y | (P_y << 8);
1042         temp |= (D << 16);
1043         temp |= (P_x << 24);
1044         *((uint32*)(pred += 4)) = temp;
1045 
1046         temp = R_y | (Q_y << 8);
1047         temp |= (P_y << 16);
1048         temp |= (D << 24);
1049         *((uint32*)(pred += 4)) = temp;
1050 
1051 
1052         /* Diagonal Vertical Right */
1053         mode_avail[AVC_I4_Vertical_Right] = 1;
1054         pred = encvid->pred_i4[AVC_I4_Vertical_Right];
1055 
1056         Q0 = P_A + P_B + 1;
1057         R0 = P_B + P_C + 1;
1058         S0 = P_C + P_D + 1;
1059         P0 = P_X + P_A + 1;
1060         D = (P_I + 2 * P_X + P_A + 2) >> 2;
1061 
1062         P1 = (P0 + Q0) >> 2;
1063         Q1 = (Q0 + R0) >> 2;
1064         R1 = (R0 + S0) >> 2;
1065 
1066         P0 >>= 1;
1067         Q0 >>= 1;
1068         R0 >>= 1;
1069         S0 >>= 1;
1070 
1071         P2 = (P_X + 2 * P_I + P_J + 2) >> 2;
1072         Q2 = (P_I + 2 * P_J + P_K + 2) >> 2;
1073 
1074         temp =  P0 | (Q0 << 8);  //[P0 Q0 R0 S0]
1075         //[D  P1 Q1 R1]
1076         temp |= (R0 << 16); //[P2 P0 Q0 R0]
1077         temp |= (S0 << 24); //[Q2 D  P1 Q1]
1078         *((uint32*)pred) =  temp;
1079 
1080         temp =  D | (P1 << 8);
1081         temp |= (Q1 << 16);
1082         temp |= (R1 << 24);
1083         *((uint32*)(pred += 4)) =  temp;
1084 
1085         temp = P2 | (P0 << 8);
1086         temp |= (Q0 << 16);
1087         temp |= (R0 << 24);
1088         *((uint32*)(pred += 4)) =  temp;
1089 
1090         temp = Q2 | (D << 8);
1091         temp |= (P1 << 16);
1092         temp |= (Q1 << 24);
1093         *((uint32*)(pred += 4)) =  temp;
1094 
1095 
1096         /* Horizontal Down */
1097         mode_avail[AVC_I4_Horizontal_Down] = 1;
1098         pred = encvid->pred_i4[AVC_I4_Horizontal_Down];
1099 
1100 
1101         Q2 = (P_A + 2 * P_B + P_C + 2) >> 2;
1102         P2 = (P_X + 2 * P_A + P_B + 2) >> 2;
1103         D = (P_I + 2 * P_X + P_A + 2) >> 2;
1104         P0 = P_X + P_I + 1;
1105         Q0 = P_I + P_J + 1;
1106         R0 = P_J + P_K + 1;
1107         S0 = P_K + P_L + 1;
1108 
1109         P1 = (P0 + Q0) >> 2;
1110         Q1 = (Q0 + R0) >> 2;
1111         R1 = (R0 + S0) >> 2;
1112 
1113         P0 >>= 1;
1114         Q0 >>= 1;
1115         R0 >>= 1;
1116         S0 >>= 1;
1117 
1118 
1119         /* we can pack these */
1120         temp = P0 | (D << 8);   //[P0 D  P2 Q2]
1121         //[Q0 P1 P0 D ]
1122         temp |= (P2 << 16);  //[R0 Q1 Q0 P1]
1123         temp |= (Q2 << 24); //[S0 R1 R0 Q1]
1124         *((uint32*)pred) = temp;
1125 
1126         temp = Q0 | (P1 << 8);
1127         temp |= (P0 << 16);
1128         temp |= (D << 24);
1129         *((uint32*)(pred += 4)) = temp;
1130 
1131         temp = R0 | (Q1 << 8);
1132         temp |= (Q0 << 16);
1133         temp |= (P1 << 24);
1134         *((uint32*)(pred += 4)) = temp;
1135 
1136         temp = S0 | (R1 << 8);
1137         temp |= (R0 << 16);
1138         temp |= (Q1 << 24);
1139         *((uint32*)(pred += 4)) = temp;
1140 
1141     }
1142 
1143     /* vertical left */
1144     mode_avail[AVC_I4_Vertical_Left] = 0;
1145     if (availability.top)
1146     {
1147         mode_avail[AVC_I4_Vertical_Left] = 1;
1148         pred = encvid->pred_i4[AVC_I4_Vertical_Left];
1149 
1150         x0 = P_A + P_B + 1;
1151         x1 = P_B + P_C + 1;
1152         x2 = P_C + P_D + 1;
1153         if (availability.top_right)
1154         {
1155             x3 = P_D + P_E + 1;
1156             x4 = P_E + P_F + 1;
1157             x5 = P_F + P_G + 1;
1158         }
1159         else
1160         {
1161             x3 = x4 = x5 = (P_D << 1) + 1;
1162         }
1163 
1164         temp1 = (x0 >> 1);
1165         temp1 |= ((x1 >> 1) << 8);
1166         temp1 |= ((x2 >> 1) << 16);
1167         temp1 |= ((x3 >> 1) << 24);
1168 
1169         *((uint32*)pred) = temp1;
1170 
1171         temp2 = ((x0 + x1) >> 2);
1172         temp2 |= (((x1 + x2) >> 2) << 8);
1173         temp2 |= (((x2 + x3) >> 2) << 16);
1174         temp2 |= (((x3 + x4) >> 2) << 24);
1175 
1176         *((uint32*)(pred += 4)) = temp2;
1177 
1178         temp1 = (temp1 >> 8) | ((x4 >> 1) << 24);   /* rotate out old value */
1179         *((uint32*)(pred += 4)) = temp1;
1180 
1181         temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
1182         *((uint32*)(pred += 4)) = temp2;
1183     }
1184 
1185     //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
1186     // can re-order the search here instead of going in order
1187 
1188     // find most probable mode
1189     encvid->mostProbableI4Mode[blkidx] = mostProbableMode = FindMostProbableI4Mode(video, blkidx);
1190 
1191     min_cost = 0xFFFF;
1192 
1193     for (ipmode = 0; ipmode < AVCNumI4PredMode; ipmode++)
1194     {
1195         if (mode_avail[ipmode] == TRUE)
1196         {
1197             cost  = (ipmode == mostProbableMode) ? 0 : fixedcost;
1198             pred = encvid->pred_i4[ipmode];
1199 
1200             cost_i4(org, org_pitch, pred, &cost);
1201 
1202             if (cost < min_cost)
1203             {
1204                 currMB->i4Mode[blkidx] = (AVCIntra4x4PredMode)ipmode;
1205                 min_cost   = cost;
1206                 min_sad = cost - ((ipmode == mostProbableMode) ? 0 : fixedcost);
1207             }
1208         }
1209     }
1210 
1211     if (blkidx == 0)
1212     {
1213         encvid->i4_sad = min_sad;
1214     }
1215     else
1216     {
1217         encvid->i4_sad += min_sad;
1218     }
1219 
1220     return min_cost;
1221 }
1222 
FindMostProbableI4Mode(AVCCommonObj * video,int blkidx)1223 int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx)
1224 {
1225     int dcOnlyPredictionFlag;
1226     AVCMacroblock *currMB = video->currMB;
1227     int intra4x4PredModeA, intra4x4PredModeB, predIntra4x4PredMode;
1228 
1229 
1230     dcOnlyPredictionFlag = 0;
1231     if (blkidx&0x3)
1232     {
1233         intra4x4PredModeA = currMB->i4Mode[blkidx-1]; // block to the left
1234     }
1235     else /* for blk 0, 4, 8, 12 */
1236     {
1237         if (video->intraAvailA)
1238         {
1239             if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
1240             {
1241                 intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[blkidx + 3];
1242             }
1243             else
1244             {
1245                 intra4x4PredModeA = AVC_I4_DC;
1246             }
1247         }
1248         else
1249         {
1250             dcOnlyPredictionFlag = 1;
1251             goto PRED_RESULT_READY;  // skip below
1252         }
1253     }
1254 
1255     if (blkidx >> 2)
1256     {
1257         intra4x4PredModeB = currMB->i4Mode[blkidx-4]; // block above
1258     }
1259     else /* block 0, 1, 2, 3 */
1260     {
1261         if (video->intraAvailB)
1262         {
1263             if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
1264             {
1265                 intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[blkidx+12];
1266             }
1267             else
1268             {
1269                 intra4x4PredModeB = AVC_I4_DC;
1270             }
1271         }
1272         else
1273         {
1274             dcOnlyPredictionFlag = 1;
1275         }
1276     }
1277 
1278 PRED_RESULT_READY:
1279     if (dcOnlyPredictionFlag)
1280     {
1281         intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
1282     }
1283 
1284     predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
1285 
1286     return predIntra4x4PredMode;
1287 }
1288 
cost_i4(uint8 * org,int org_pitch,uint8 * pred,uint16 * cost)1289 void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost)
1290 {
1291     int k;
1292     int16 res[16], *pres;
1293     int m0, m1, m2, m3, tmp1;
1294     int satd = 0;
1295 
1296     pres = res;
1297     // horizontal transform
1298     k = 4;
1299     while (k > 0)
1300     {
1301         m0 = org[0] - pred[0];
1302         m3 = org[3] - pred[3];
1303         m0 += m3;
1304         m3 = m0 - (m3 << 1);
1305         m1 = org[1] - pred[1];
1306         m2 = org[2] - pred[2];
1307         m1 += m2;
1308         m2 = m1 - (m2 << 1);
1309         pres[0] = m0 + m1;
1310         pres[2] = m0 - m1;
1311         pres[1] = m2 + m3;
1312         pres[3] = m3 - m2;
1313 
1314         org += org_pitch;
1315         pres += 4;
1316         pred += 4;
1317         k--;
1318     }
1319     /* vertical transform */
1320     pres = res;
1321     k = 4;
1322     while (k > 0)
1323     {
1324         m0 = pres[0];
1325         m3 = pres[12];
1326         m0 += m3;
1327         m3 = m0 - (m3 << 1);
1328         m1 = pres[4];
1329         m2 = pres[8];
1330         m1 += m2;
1331         m2 = m1 - (m2 << 1);
1332         pres[0] = m0 + m1;
1333         pres[8] = m0 - m1;
1334         pres[4] = m2 + m3;
1335         pres[12] = m3 - m2;
1336 
1337         pres++;
1338         k--;
1339 
1340     }
1341 
1342     pres = res;
1343     k = 4;
1344     while (k > 0)
1345     {
1346         tmp1 = *pres++;
1347         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1348         tmp1 = *pres++;
1349         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1350         tmp1 = *pres++;
1351         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1352         tmp1 = *pres++;
1353         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1354         k--;
1355     }
1356 
1357     satd = (satd + 1) >> 1;
1358     *cost += satd;
1359 
1360     return ;
1361 }
1362 
chroma_intra_search(AVCEncObject * encvid)1363 void chroma_intra_search(AVCEncObject *encvid)
1364 {
1365     AVCCommonObj *video = encvid->common;
1366     AVCPictureData *currPic = video->currPic;
1367 
1368     int x_pos = video->mb_x << 3;
1369     int y_pos = video->mb_y << 3;
1370     int pitch = currPic->pitch >> 1;
1371     int offset = y_pos * pitch + x_pos;
1372 
1373     uint8 *comp_ref_x, *comp_ref_y, *pred;
1374     int  sum_x0, sum_x1, sum_y0, sum_y1;
1375     int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
1376     uint32 pred_a, pred_b, pred_c, pred_d;
1377     int i, j, component;
1378     int a_16, b, c, factor_c, topleft;
1379     int H, V, value;
1380     uint8 *comp_ref_x0, *comp_ref_x1,  *comp_ref_y0, *comp_ref_y1;
1381 
1382     uint8 *curCb = currPic->Scb + offset;
1383     uint8 *curCr = currPic->Scr + offset;
1384 
1385     uint8 *orgCb, *orgCr;
1386     AVCFrameIO *currInput = encvid->currInput;
1387     AVCMacroblock *currMB = video->currMB;
1388     int org_pitch;
1389     int cost, mincost;
1390 
1391     /* evaluate DC mode */
1392     if (video->intraAvailB & video->intraAvailA)
1393     {
1394         comp_ref_x = curCb - pitch;
1395         comp_ref_y = curCb - 1;
1396 
1397         for (i = 0; i < 2; i++)
1398         {
1399             pred_a = *((uint32*)comp_ref_x);
1400             comp_ref_x += 4;
1401             pred_b = (pred_a >> 8) & 0xFF00FF;
1402             pred_a &= 0xFF00FF;
1403             pred_a += pred_b;
1404             pred_a += (pred_a >> 16);
1405             sum_x0 = pred_a & 0xFFFF;
1406 
1407             pred_a = *((uint32*)comp_ref_x);
1408             pred_b = (pred_a >> 8) & 0xFF00FF;
1409             pred_a &= 0xFF00FF;
1410             pred_a += pred_b;
1411             pred_a += (pred_a >> 16);
1412             sum_x1 = pred_a & 0xFFFF;
1413 
1414             pred_1[i] = (sum_x1 + 2) >> 2;
1415 
1416             sum_y0 = *comp_ref_y;
1417             sum_y0 += *(comp_ref_y += pitch);
1418             sum_y0 += *(comp_ref_y += pitch);
1419             sum_y0 += *(comp_ref_y += pitch);
1420 
1421             sum_y1 = *(comp_ref_y += pitch);
1422             sum_y1 += *(comp_ref_y += pitch);
1423             sum_y1 += *(comp_ref_y += pitch);
1424             sum_y1 += *(comp_ref_y += pitch);
1425 
1426             pred_2[i] = (sum_y1 + 2) >> 2;
1427 
1428             pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
1429             pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
1430 
1431             comp_ref_x = curCr - pitch;
1432             comp_ref_y = curCr - 1;
1433         }
1434     }
1435 
1436     else if (video->intraAvailA)
1437     {
1438         comp_ref_y = curCb - 1;
1439         for (i = 0; i < 2; i++)
1440         {
1441             sum_y0 = *comp_ref_y;
1442             sum_y0 += *(comp_ref_y += pitch);
1443             sum_y0 += *(comp_ref_y += pitch);
1444             sum_y0 += *(comp_ref_y += pitch);
1445 
1446             sum_y1 = *(comp_ref_y += pitch);
1447             sum_y1 += *(comp_ref_y += pitch);
1448             sum_y1 += *(comp_ref_y += pitch);
1449             sum_y1 += *(comp_ref_y += pitch);
1450 
1451             pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
1452             pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
1453 
1454             comp_ref_y = curCr - 1;
1455         }
1456     }
1457     else if (video->intraAvailB)
1458     {
1459         comp_ref_x = curCb - pitch;
1460         for (i = 0; i < 2; i++)
1461         {
1462             pred_a = *((uint32*)comp_ref_x);
1463             comp_ref_x += 4;
1464             pred_b = (pred_a >> 8) & 0xFF00FF;
1465             pred_a &= 0xFF00FF;
1466             pred_a += pred_b;
1467             pred_a += (pred_a >> 16);
1468             sum_x0 = pred_a & 0xFFFF;
1469 
1470             pred_a = *((uint32*)comp_ref_x);
1471             pred_b = (pred_a >> 8) & 0xFF00FF;
1472             pred_a &= 0xFF00FF;
1473             pred_a += pred_b;
1474             pred_a += (pred_a >> 16);
1475             sum_x1 = pred_a & 0xFFFF;
1476 
1477             pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
1478             pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
1479 
1480             comp_ref_x = curCr - pitch;
1481         }
1482     }
1483     else
1484     {
1485         pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
1486                                                 pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
1487     }
1488 
1489     pred = encvid->pred_ic[AVC_IC_DC];
1490 
1491     pred_a = pred_0[0];
1492     pred_b = pred_1[0];
1493     pred_a |= (pred_a << 8);
1494     pred_a |= (pred_a << 16);
1495     pred_b |= (pred_b << 8);
1496     pred_b |= (pred_b << 16);
1497 
1498     pred_c = pred_0[1];
1499     pred_d = pred_1[1];
1500     pred_c |= (pred_c << 8);
1501     pred_c |= (pred_c << 16);
1502     pred_d |= (pred_d << 8);
1503     pred_d |= (pred_d << 16);
1504 
1505 
1506     for (j = 0; j < 4; j++) /* 4 lines */
1507     {
1508         *((uint32*)pred) = pred_a;
1509         *((uint32*)(pred + 4)) = pred_b;
1510         *((uint32*)(pred + 8)) = pred_c;
1511         *((uint32*)(pred + 12)) = pred_d;
1512         pred += 16; /* move to the next line */
1513     }
1514 
1515     pred_a = pred_2[0];
1516     pred_b = pred_3[0];
1517     pred_a |= (pred_a << 8);
1518     pred_a |= (pred_a << 16);
1519     pred_b |= (pred_b << 8);
1520     pred_b |= (pred_b << 16);
1521 
1522     pred_c = pred_2[1];
1523     pred_d = pred_3[1];
1524     pred_c |= (pred_c << 8);
1525     pred_c |= (pred_c << 16);
1526     pred_d |= (pred_d << 8);
1527     pred_d |= (pred_d << 16);
1528 
1529     for (j = 0; j < 4; j++) /* 4 lines */
1530     {
1531         *((uint32*)pred) = pred_a;
1532         *((uint32*)(pred + 4)) = pred_b;
1533         *((uint32*)(pred + 8)) = pred_c;
1534         *((uint32*)(pred + 12)) = pred_d;
1535         pred += 16; /* move to the next line */
1536     }
1537 
1538     /* predict horizontal mode */
1539     if (video->intraAvailA)
1540     {
1541         comp_ref_y = curCb - 1;
1542         comp_ref_x = curCr - 1;
1543         pred = encvid->pred_ic[AVC_IC_Horizontal];
1544 
1545         for (i = 4; i < 6; i++)
1546         {
1547             for (j = 0; j < 4; j++)
1548             {
1549                 pred_a = *comp_ref_y;
1550                 comp_ref_y += pitch;
1551                 pred_a |= (pred_a << 8);
1552                 pred_a |= (pred_a << 16);
1553                 *((uint32*)pred) = pred_a;
1554                 *((uint32*)(pred + 4)) = pred_a;
1555 
1556                 pred_a = *comp_ref_x;
1557                 comp_ref_x += pitch;
1558                 pred_a |= (pred_a << 8);
1559                 pred_a |= (pred_a << 16);
1560                 *((uint32*)(pred + 8)) = pred_a;
1561                 *((uint32*)(pred + 12)) = pred_a;
1562 
1563                 pred += 16;
1564             }
1565         }
1566     }
1567 
1568     /* vertical mode */
1569     if (video->intraAvailB)
1570     {
1571         comp_ref_x = curCb - pitch;
1572         comp_ref_y = curCr - pitch;
1573         pred = encvid->pred_ic[AVC_IC_Vertical];
1574 
1575         pred_a = *((uint32*)comp_ref_x);
1576         pred_b = *((uint32*)(comp_ref_x + 4));
1577         pred_c = *((uint32*)comp_ref_y);
1578         pred_d = *((uint32*)(comp_ref_y + 4));
1579 
1580         for (j = 0; j < 8; j++)
1581         {
1582             *((uint32*)pred) = pred_a;
1583             *((uint32*)(pred + 4)) = pred_b;
1584             *((uint32*)(pred + 8)) = pred_c;
1585             *((uint32*)(pred + 12)) = pred_d;
1586             pred += 16;
1587         }
1588     }
1589 
1590     /* Intra_Chroma_Plane */
1591     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
1592     {
1593         comp_ref_x = curCb - pitch;
1594         comp_ref_y = curCb - 1;
1595         topleft = curCb[-pitch-1];
1596 
1597         pred = encvid->pred_ic[AVC_IC_Plane];
1598         for (component = 0; component < 2; component++)
1599         {
1600             H = V = 0;
1601             comp_ref_x0 = comp_ref_x + 4;
1602             comp_ref_x1 = comp_ref_x + 2;
1603             comp_ref_y0 = comp_ref_y + (pitch << 2);
1604             comp_ref_y1 = comp_ref_y + (pitch << 1);
1605             for (i = 1; i < 4; i++)
1606             {
1607                 H += i * (*comp_ref_x0++ - *comp_ref_x1--);
1608                 V += i * (*comp_ref_y0 - *comp_ref_y1);
1609                 comp_ref_y0 += pitch;
1610                 comp_ref_y1 -= pitch;
1611             }
1612             H += i * (*comp_ref_x0++ - topleft);
1613             V += i * (*comp_ref_y0 - *comp_ref_y1);
1614 
1615             a_16 = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
1616             b = (17 * H + 16) >> 5;
1617             c = (17 * V + 16) >> 5;
1618 
1619             pred_a = 0;
1620             for (i = 4; i < 6; i++)
1621             {
1622                 for (j = 0; j < 4; j++)
1623                 {
1624                     factor_c = a_16 + c * (pred_a++ - 3);
1625 
1626                     factor_c -= 3 * b;
1627 
1628                     value = factor_c >> 5;
1629                     factor_c += b;
1630                     CLIP_RESULT(value)
1631                     pred_b = value;
1632                     value = factor_c >> 5;
1633                     factor_c += b;
1634                     CLIP_RESULT(value)
1635                     pred_b |= (value << 8);
1636                     value = factor_c >> 5;
1637                     factor_c += b;
1638                     CLIP_RESULT(value)
1639                     pred_b |= (value << 16);
1640                     value = factor_c >> 5;
1641                     factor_c += b;
1642                     CLIP_RESULT(value)
1643                     pred_b |= (value << 24);
1644                     *((uint32*)pred) = pred_b;
1645 
1646                     value = factor_c >> 5;
1647                     factor_c += b;
1648                     CLIP_RESULT(value)
1649                     pred_b = value;
1650                     value = factor_c >> 5;
1651                     factor_c += b;
1652                     CLIP_RESULT(value)
1653                     pred_b |= (value << 8);
1654                     value = factor_c >> 5;
1655                     factor_c += b;
1656                     CLIP_RESULT(value)
1657                     pred_b |= (value << 16);
1658                     value = factor_c >> 5;
1659                     factor_c += b;
1660                     CLIP_RESULT(value)
1661                     pred_b |= (value << 24);
1662                     *((uint32*)(pred + 4)) = pred_b;
1663                     pred += 16;
1664                 }
1665             }
1666 
1667             pred -= 120; /* point to cr */
1668             comp_ref_x = curCr - pitch;
1669             comp_ref_y = curCr - 1;
1670             topleft = curCr[-pitch-1];
1671         }
1672     }
1673 
1674     /* now evaluate it */
1675 
1676     org_pitch = (currInput->pitch) >> 1;
1677     offset = x_pos + y_pos * org_pitch;
1678 
1679     orgCb = currInput->YCbCr[1] + offset;
1680     orgCr = currInput->YCbCr[2] + offset;
1681 
1682     mincost = 0x7fffffff;
1683     cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_DC], mincost);
1684     if (cost < mincost)
1685     {
1686         mincost = cost;
1687         currMB->intra_chroma_pred_mode = AVC_IC_DC;
1688     }
1689 
1690     if (video->intraAvailA)
1691     {
1692         cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Horizontal], mincost);
1693         if (cost < mincost)
1694         {
1695             mincost = cost;
1696             currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
1697         }
1698     }
1699 
1700     if (video->intraAvailB)
1701     {
1702         cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Vertical], mincost);
1703         if (cost < mincost)
1704         {
1705             mincost = cost;
1706             currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
1707         }
1708     }
1709 
1710     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
1711     {
1712         cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Plane], mincost);
1713         if (cost < mincost)
1714         {
1715             mincost = cost;
1716             currMB->intra_chroma_pred_mode = AVC_IC_Plane;
1717         }
1718     }
1719 
1720 
1721     return ;
1722 }
1723 
1724 
SATDChroma(uint8 * orgCb,uint8 * orgCr,int org_pitch,uint8 * pred,int min_cost)1725 int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int min_cost)
1726 {
1727     int cost;
1728     /* first take difference between orgCb, orgCr and pred */
1729     int16 res[128], *pres; // residue
1730     int m0, m1, m2, m3, tmp1;
1731     int j, k;
1732 
1733     pres = res;
1734     org_pitch -= 8;
1735     // horizontal transform
1736     for (j = 0; j < 8; j++)
1737     {
1738         k = 2;
1739         while (k > 0)
1740         {
1741             m0 = orgCb[0] - pred[0];
1742             m3 = orgCb[3] - pred[3];
1743             m0 += m3;
1744             m3 = m0 - (m3 << 1);
1745             m1 = orgCb[1] - pred[1];
1746             m2 = orgCb[2] - pred[2];
1747             m1 += m2;
1748             m2 = m1 - (m2 << 1);
1749             pres[0] = m0 + m1;
1750             pres[2] = m0 - m1;
1751             pres[1] = m2 + m3;
1752             pres[3] = m3 - m2;
1753 
1754             orgCb += 4;
1755             pres += 4;
1756             pred += 4;
1757             k--;
1758         }
1759         orgCb += org_pitch;
1760         k = 2;
1761         while (k > 0)
1762         {
1763             m0 = orgCr[0] - pred[0];
1764             m3 = orgCr[3] - pred[3];
1765             m0 += m3;
1766             m3 = m0 - (m3 << 1);
1767             m1 = orgCr[1] - pred[1];
1768             m2 = orgCr[2] - pred[2];
1769             m1 += m2;
1770             m2 = m1 - (m2 << 1);
1771             pres[0] = m0 + m1;
1772             pres[2] = m0 - m1;
1773             pres[1] = m2 + m3;
1774             pres[3] = m3 - m2;
1775 
1776             orgCr += 4;
1777             pres += 4;
1778             pred += 4;
1779             k--;
1780         }
1781         orgCr += org_pitch;
1782     }
1783 
1784     /* vertical transform */
1785     for (j = 0; j < 2; j++)
1786     {
1787         pres = res + (j << 6);
1788         k = 16;
1789         while (k > 0)
1790         {
1791             m0 = pres[0];
1792             m3 = pres[3<<4];
1793             m0 += m3;
1794             m3 = m0 - (m3 << 1);
1795             m1 = pres[1<<4];
1796             m2 = pres[2<<4];
1797             m1 += m2;
1798             m2 = m1 - (m2 << 1);
1799             pres[0] = m0 + m1;
1800             pres[2<<4] = m0 - m1;
1801             pres[1<<4] = m2 + m3;
1802             pres[3<<4] = m3 - m2;
1803 
1804             pres++;
1805             k--;
1806         }
1807     }
1808 
1809     /* now sum of absolute value */
1810     pres = res;
1811     cost = 0;
1812     k = 128;
1813     while (k > 0)
1814     {
1815         tmp1 = *pres++;
1816         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1817         tmp1 = *pres++;
1818         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1819         tmp1 = *pres++;
1820         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1821         tmp1 = *pres++;
1822         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1823         tmp1 = *pres++;
1824         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1825         tmp1 = *pres++;
1826         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1827         tmp1 = *pres++;
1828         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1829         tmp1 = *pres++;
1830         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1831         k -= 8;
1832         if (cost > min_cost) /* early drop out */
1833         {
1834             return cost;
1835         }
1836     }
1837 
1838     return cost;
1839 }
1840 
1841 
1842 
1843 ///////////////////////////////// old code, unused
1844 /* find the best intra mode based on original (unencoded) frame */
1845 /* output is
1846     currMB->mb_intra, currMB->mbMode,
1847     currMB->i16Mode  (if currMB->mbMode == AVC_I16)
1848     currMB->i4Mode[..] (if currMB->mbMode == AVC_I4) */
1849 
1850 #ifdef FIXED_INTRAPRED_MODE
MBIntraSearch(AVCEncObject * encvid,AVCMacroblock * currMB,int mbNum)1851 void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
1852 {
1853     (void)(mbNum);
1854 
1855     AVCCommonObj *video = encvid->common;
1856     int indx, block_x, block_y;
1857 
1858     video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
1859 
1860     if (!video->currPicParams->constrained_intra_pred_flag)
1861     {
1862         video->intraAvailA = video->mbAvailA;
1863         video->intraAvailB = video->mbAvailB;
1864         video->intraAvailC = video->mbAvailC;
1865         video->intraAvailD = video->mbAvailD;
1866     }
1867     else
1868     {
1869         if (video->mbAvailA)
1870         {
1871             video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
1872         }
1873         if (video->mbAvailB)
1874         {
1875             video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
1876         }
1877         if (video->mbAvailC)
1878         {
1879             video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
1880         }
1881         if (video->mbAvailD)
1882         {
1883             video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
1884         }
1885     }
1886 
1887     currMB->mb_intra = TRUE;
1888     currMB->mbMode = FIXED_INTRAPRED_MODE;
1889 
1890     if (currMB->mbMode == AVC_I16)
1891     {
1892         currMB->i16Mode = FIXED_I16_MODE;
1893 
1894         if (FIXED_I16_MODE == AVC_I16_Vertical && !video->intraAvailB)
1895         {
1896             currMB->i16Mode = AVC_I16_DC;
1897         }
1898 
1899         if (FIXED_I16_MODE == AVC_I16_Horizontal && !video->intraAvailA)
1900         {
1901             currMB->i16Mode = AVC_I16_DC;
1902         }
1903 
1904         if (FIXED_I16_MODE == AVC_I16_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
1905         {
1906             currMB->i16Mode = AVC_I16_DC;
1907         }
1908     }
1909     else //if(currMB->mbMode == AVC_I4)
1910     {
1911         for (indx = 0; indx < 16; indx++)
1912         {
1913             block_x = blkIdx2blkX[indx];
1914             block_y = blkIdx2blkY[indx];
1915 
1916             currMB->i4Mode[(block_y<<2)+block_x] = FIXED_I4_MODE;
1917 
1918             if (FIXED_I4_MODE == AVC_I4_Vertical && !(block_y > 0 || video->intraAvailB))
1919             {
1920                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1921             }
1922 
1923             if (FIXED_I4_MODE == AVC_I4_Horizontal && !(block_x || video->intraAvailA))
1924             {
1925                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1926             }
1927 
1928             if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Left &&
1929                     (block_y == 0 && !video->intraAvailB))
1930             {
1931                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1932             }
1933 
1934             if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Right &&
1935                     !((block_y && block_x)
1936                       || (block_y && video->intraAvailA)
1937                       || (block_x && video->intraAvailB)
1938                       || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1939             {
1940                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1941             }
1942 
1943             if (FIXED_I4_MODE == AVC_I4_Vertical_Right &&
1944                     !((block_y && block_x)
1945                       || (block_y && video->intraAvailA)
1946                       || (block_x && video->intraAvailB)
1947                       || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1948             {
1949                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1950             }
1951 
1952             if (FIXED_I4_MODE == AVC_I4_Horizontal_Down &&
1953                     !((block_y && block_x)
1954                       || (block_y && video->intraAvailA)
1955                       || (block_x && video->intraAvailB)
1956                       || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1957             {
1958                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1959             }
1960 
1961             if (FIXED_I4_MODE == AVC_I4_Vertical_Left &&
1962                     (block_y == 0 && !video->intraAvailB))
1963             {
1964                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1965             }
1966 
1967             if (FIXED_I4_MODE == AVC_I4_Horizontal_Up && !(block_x || video->intraAvailA))
1968             {
1969                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1970             }
1971         }
1972     }
1973 
1974     currMB->intra_chroma_pred_mode = FIXED_INTRA_CHROMA_MODE;
1975 
1976     if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Horizontal && !(video->intraAvailA))
1977     {
1978         currMB->intra_chroma_pred_mode = AVC_IC_DC;
1979     }
1980 
1981     if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Vertical && !(video->intraAvailB))
1982     {
1983         currMB->intra_chroma_pred_mode = AVC_IC_DC;
1984     }
1985 
1986     if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
1987     {
1988         currMB->intra_chroma_pred_mode = AVC_IC_DC;
1989     }
1990 
1991     /* also reset the motion vectors */
1992     /* set MV and Ref_Idx codes of Intra blocks in P-slices */
1993     memset(currMB->mvL0, 0, sizeof(int32)*16);
1994     currMB->ref_idx_L0[0] = -1;
1995     currMB->ref_idx_L0[1] = -1;
1996     currMB->ref_idx_L0[2] = -1;
1997     currMB->ref_idx_L0[3] = -1;
1998 
1999     // output from this function, currMB->mbMode should be set to either
2000     // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
2001     return ;
2002 }
2003 #else // faster combined prediction+SAD calculation
MBIntraSearch(AVCEncObject * encvid,AVCMacroblock * currMB,int mbNum)2004 void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
2005 {
2006     AVCCommonObj *video = encvid->common;
2007     AVCFrameIO *currInput = encvid->currInput;
2008     uint8 *curL, *curCb, *curCr;
2009     uint8 *comp, *pred_block;
2010     int block_x, block_y, offset;
2011     uint sad, sad4, sadI4, sadI16;
2012     int component, SubBlock_indx, temp;
2013     int pitch = video->currPic->pitch;
2014 
2015     /* calculate the cost of each intra prediction mode  and compare to the
2016     inter mode */
2017     /* full search for all intra prediction */
2018     offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
2019     curL = currInput->YCbCr[0] + offset;
2020     pred_block = video->pred_block + 84;
2021 
2022     /* Assuming that InitNeighborAvailability has been called prior to this function */
2023     video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
2024 
2025     if (!video->currPicParams->constrained_intra_pred_flag)
2026     {
2027         video->intraAvailA = video->mbAvailA;
2028         video->intraAvailB = video->mbAvailB;
2029         video->intraAvailC = video->mbAvailC;
2030         video->intraAvailD = video->mbAvailD;
2031     }
2032     else
2033     {
2034         if (video->mbAvailA)
2035         {
2036             video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
2037         }
2038         if (video->mbAvailB)
2039         {
2040             video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
2041         }
2042         if (video->mbAvailC)
2043         {
2044             video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
2045         }
2046         if (video->mbAvailD)
2047         {
2048             video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
2049         }
2050     }
2051 
2052     /* currently we're doing exhaustive search. Smart search will be used later */
2053 
2054     /* I16 modes */
2055     curL = currInput->YCbCr[0] + offset;
2056     video->pintra_pred_top = curL - pitch;
2057     video->pintra_pred_left = curL - 1;
2058     if (video->mb_y)
2059     {
2060         video->intra_pred_topleft = *(curL - pitch - 1);
2061     }
2062 
2063     /* Intra_16x16_Vertical */
2064     sadI16 = 65536;
2065     /* check availability of top */
2066     if (video->intraAvailB)
2067     {
2068         sad = SAD_I16_Vert(video, curL, sadI16);
2069 
2070         if (sad < sadI16)
2071         {
2072             sadI16 = sad;
2073             currMB->i16Mode = AVC_I16_Vertical;
2074         }
2075     }
2076     /* Intra_16x16_Horizontal */
2077     /* check availability of left */
2078     if (video->intraAvailA)
2079     {
2080         sad = SAD_I16_HorzDC(video, curL, AVC_I16_Horizontal, sadI16);
2081 
2082         if (sad < sadI16)
2083         {
2084             sadI16 = sad;
2085             currMB->i16Mode = AVC_I16_Horizontal;
2086         }
2087     }
2088 
2089     /* Intra_16x16_DC, default mode */
2090     sad = SAD_I16_HorzDC(video, curL, AVC_I16_DC, sadI16);
2091     if (sad < sadI16)
2092     {
2093         sadI16 = sad;
2094         currMB->i16Mode = AVC_I16_DC;
2095     }
2096 
2097     /* Intra_16x16_Plane */
2098     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
2099     {
2100         sad = SAD_I16_Plane(video, curL, sadI16);
2101 
2102         if (sad < sadI16)
2103         {
2104             sadI16 = sad;
2105             currMB->i16Mode = AVC_I16_Plane;
2106         }
2107     }
2108 
2109     sadI16 >>= 1;  /* before comparison */
2110 
2111     /* selection between intra4, intra16 or inter mode */
2112     if (sadI16 < encvid->min_cost)
2113     {
2114         currMB->mb_intra = TRUE;
2115         currMB->mbMode = AVC_I16;
2116         encvid->min_cost = sadI16;
2117     }
2118 
2119     if (currMB->mb_intra) /* only do the chrominance search when intra is decided */
2120     {
2121         /* Note that we might be able to guess the type of prediction from
2122         the luma prediction type */
2123 
2124         /* now search for the best chroma intra prediction */
2125         offset = (offset >> 2) + (video->mb_x << 2);
2126         curCb = currInput->YCbCr[1] + offset;
2127         curCr = currInput->YCbCr[2] + offset;
2128 
2129         pitch >>= 1;
2130         video->pintra_pred_top_cb = curCb - pitch;
2131         video->pintra_pred_left_cb = curCb - 1;
2132         video->pintra_pred_top_cr = curCr - pitch;
2133         video->pintra_pred_left_cr = curCr - 1;
2134 
2135         if (video->mb_y)
2136         {
2137             video->intra_pred_topleft_cb = *(curCb - pitch - 1);
2138             video->intra_pred_topleft_cr = *(curCr - pitch - 1);
2139         }
2140 
2141         /* Intra_Chroma_DC */
2142         sad4 = SAD_Chroma_DC(video, curCb, curCr, 65536);
2143         currMB->intra_chroma_pred_mode = AVC_IC_DC;
2144 
2145         /* Intra_Chroma_Horizontal */
2146         if (video->intraAvailA)
2147         {
2148             /* check availability of left */
2149             sad = SAD_Chroma_Horz(video, curCb, curCr, sad4);
2150             if (sad < sad4)
2151             {
2152                 sad4 = sad;
2153                 currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
2154             }
2155         }
2156 
2157         /* Intra_Chroma_Vertical */
2158         if (video->intraAvailB)
2159         {
2160             /* check availability of top */
2161             sad = SAD_Chroma_Vert(video, curCb, curCr, sad4);
2162 
2163             if (sad < sad4)
2164             {
2165                 sad4 = sad;
2166                 currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
2167             }
2168         }
2169 
2170         /* Intra_Chroma_Plane */
2171         if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
2172         {
2173             /* check availability of top and left */
2174             Intra_Chroma_Plane(video, pitch);
2175 
2176             sad = SADChroma(pred_block + 452, curCb, curCr, pitch);
2177 
2178             if (sad < sad4)
2179             {
2180                 sad4 = sad;
2181                 currMB->intra_chroma_pred_mode = AVC_IC_Plane;
2182             }
2183         }
2184 
2185         /* also reset the motion vectors */
2186         /* set MV and Ref_Idx codes of Intra blocks in P-slices */
2187         memset(currMB->mvL0, 0, sizeof(int32)*16);
2188         memset(currMB->ref_idx_L0, -1, sizeof(int16)*4);
2189 
2190     }
2191 
2192     // output from this function, currMB->mbMode should be set to either
2193     // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
2194 
2195     return ;
2196 }
2197 #endif
2198 
2199 
2200