1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "avcenc_lib.h"
19 
20 #define MIN_GOP     1   /* minimum size of GOP, 1/23/01, need to be tested */
21 
22 #define DEFAULT_REF_IDX     0  /* always from the first frame in the reflist */
23 
24 #define ALL_CAND_EQUAL  10  /*  any number greater than 5 will work */
25 
26 
27 /* from TMN 3.2 */
28 #define PREF_NULL_VEC 129   /* zero vector bias */
29 #define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
30 #define PREF_INTRA  3024//512       /* bias for INTRA coding */
31 
32 const static int tab_exclude[9][9] =  // [last_loc][curr_loc]
33 {
34     {0, 0, 0, 0, 0, 0, 0, 0, 0},
35     {0, 0, 0, 0, 1, 1, 1, 0, 0},
36     {0, 0, 0, 0, 1, 1, 1, 1, 1},
37     {0, 0, 0, 0, 0, 0, 1, 1, 1},
38     {0, 1, 1, 0, 0, 0, 1, 1, 1},
39     {0, 1, 1, 0, 0, 0, 0, 0, 1},
40     {0, 1, 1, 1, 1, 0, 0, 0, 1},
41     {0, 0, 1, 1, 1, 0, 0, 0, 0},
42     {0, 0, 1, 1, 1, 1, 1, 0, 0}
43 }; //to decide whether to continue or compute
44 
45 const static int refine_next[8][2] =    /* [curr_k][increment] */
46 {
47     {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
48 };
49 
50 #ifdef _SAD_STAT
51 uint32 num_MB = 0;
52 uint32 num_cand = 0;
53 #endif
54 
55 /************************************************************************/
56 #define TH_INTER_2  100  /* temporary for now */
57 
58 //#define FIXED_INTERPRED_MODE  AVC_P16
59 #define FIXED_REF_IDX   0
60 #define FIXED_MVX 0
61 #define FIXED_MVY 0
62 
63 // only use when AVC_P8 or AVC_P8ref0
64 #define FIXED_SUBMB_MODE    AVC_4x4
65 /*************************************************************************/
66 
67 /* Initialize arrays necessary for motion search */
InitMotionSearchModule(AVCHandle * avcHandle)68 AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle)
69 {
70     AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
71     AVCRateControl *rateCtrl = encvid->rateCtrl;
72     int search_range = rateCtrl->mvRange;
73     int number_of_subpel_positions = 4 * (2 * search_range + 3);
74     int max_mv_bits, max_mvd;
75     int temp_bits = 0;
76     uint8 *mvbits;
77     int bits, imax, imin, i;
78     uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
79 
80 
81     while (number_of_subpel_positions > 0)
82     {
83         temp_bits++;
84         number_of_subpel_positions >>= 1;
85     }
86 
87     max_mv_bits = 3 + 2 * temp_bits;
88     max_mvd  = (1 << (max_mv_bits >> 1)) - 1;
89 
90     encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
91                            sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR);
92 
93     if (encvid->mvbits_array == NULL)
94     {
95         return AVCENC_MEMORY_FAIL;
96     }
97 
98     mvbits = encvid->mvbits  = encvid->mvbits_array + max_mvd;
99 
100     mvbits[0] = 1;
101     for (bits = 3; bits <= max_mv_bits; bits += 2)
102     {
103         imax = 1    << (bits >> 1);
104         imin = imax >> 1;
105 
106         for (i = imin; i < imax; i++)   mvbits[-i] = mvbits[i] = bits;
107     }
108 
109     /* initialize half-pel search */
110     encvid->hpel_cand[0] = subpel_pred + REF_CENTER;
111     encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ;
112     encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
113     encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
114     encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
115     encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
116     encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
117     encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
118     encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
119 
120     /* For quarter-pel interpolation around best half-pel result */
121 
122     encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
123     encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
124     encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
125     encvid->bilin_base[0][3] = subpel_pred + REF_CENTER;
126 
127 
128     encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
129     encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24;
130     encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
131     encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
132 
133     encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24;
134     encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
135     encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
136     encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
137 
138     encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
139     encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
140     encvid->bilin_base[3][2] = subpel_pred + REF_CENTER;
141     encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
142 
143     encvid->bilin_base[4][0] = subpel_pred + REF_CENTER;
144     encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
145     encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
146     encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
147 
148     encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
149     encvid->bilin_base[5][1] = subpel_pred + REF_CENTER;
150     encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
151     encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
152 
153     encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1;
154     encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
155     encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24;
156     encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
157 
158     encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
159     encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
160     encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1;
161     encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
162 
163     encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25;
164     encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
165     encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
166     encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
167 
168 
169     return AVCENC_SUCCESS;
170 }
171 
172 /* Clean-up memory */
CleanMotionSearchModule(AVCHandle * avcHandle)173 void CleanMotionSearchModule(AVCHandle *avcHandle)
174 {
175     AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
176 
177     if (encvid->mvbits_array)
178     {
179         avcHandle->CBAVC_Free(avcHandle->userData, encvid->mvbits_array);
180         encvid->mvbits = NULL;
181     }
182 
183     return ;
184 }
185 
186 
IntraDecisionABE(int * min_cost,uint8 * cur,int pitch,bool ave)187 bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave)
188 {
189     int j;
190     uint8 *out;
191     int temp, SBE;
192     OsclFloat ABE;
193     bool intra = true;
194 
195     SBE = 0;
196     /* top neighbor */
197     out = cur - pitch;
198     for (j = 0; j < 16; j++)
199     {
200         temp = out[j] - cur[j];
201         SBE += ((temp >= 0) ? temp : -temp);
202     }
203 
204     /* left neighbor */
205     out = cur - 1;
206     out -= pitch;
207     cur -= pitch;
208     for (j = 0; j < 16; j++)
209     {
210         temp = *(out += pitch) - *(cur += pitch);
211         SBE += ((temp >= 0) ? temp : -temp);
212     }
213 
214     /* compare mincost/384 and SBE/64 */
215     ABE = SBE / 32.0; //ABE = SBE/64.0; //
216     if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) //
217     {
218         intra = false; // no possibility of intra, just use inter
219     }
220     else
221     {
222         if (ave == true)
223         {
224             *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost
225         }
226         else
227         {
228             *min_cost = (int)(SBE * 8);
229         }
230     }
231 
232     return intra;
233 }
234 
235 /******* main function for macroblock prediction for the entire frame ***/
236 /* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */
AVCMotionEstimation(AVCEncObject * encvid)237 void AVCMotionEstimation(AVCEncObject *encvid)
238 {
239     AVCCommonObj *video = encvid->common;
240     int slice_type = video->slice_type;
241     AVCFrameIO *currInput = encvid->currInput;
242     AVCPictureData *refPic = video->RefPicList0[0];
243     int i, j, k;
244     int mbwidth = video->PicWidthInMbs;
245     int mbheight = video->PicHeightInMbs;
246     int totalMB = video->PicSizeInMbs;
247     int pitch = currInput->pitch;
248     AVCMacroblock *currMB, *mblock = video->mblock;
249     AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16;
250     // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc;
251     AVCRateControl *rateCtrl = encvid->rateCtrl;
252     uint8 *intraSearch = encvid->intraSearch;
253     uint FS_en = encvid->fullsearch_enable;
254 
255     int NumIntraSearch, start_i, numLoop, incr_i;
256     int mbnum, offset;
257     uint8 *cur, *best_cand[5];
258     int totalSAD = 0;   /* average SAD for rate control */
259     int type_pred;
260     int abe_cost;
261 
262 #ifdef HTFM
263     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
264     int collect = 0;
265     HTFM_Stat htfm_stat;
266     double newvar[16];
267     double exp_lamda[15];
268     /*********************************/
269 #endif
270     int hp_guess = 0;
271     uint32 mv_uint32;
272 
273     offset = 0;
274 
275     if (slice_type == AVC_I_SLICE)
276     {
277         /* cannot do I16 prediction here because it needs full decoding. */
278         for (i = 0; i < totalMB; i++)
279         {
280             encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
281         }
282 
283         memset(intraSearch, 1, sizeof(uint8)*totalMB);
284 
285         encvid->firstIntraRefreshMBIndx = 0; /* reset this */
286 
287         return ;
288     }
289     else   // P_SLICE
290     {
291         for (i = 0; i < totalMB; i++)
292         {
293             mblock[i].mb_intra = 0;
294         }
295         memset(intraSearch, 1, sizeof(uint8)*totalMB);
296     }
297 
298     if (refPic->padded == 0)
299     {
300         AVCPaddingEdge(refPic);
301         refPic->padded = 1;
302     }
303     /* Random INTRA update */
304     if (rateCtrl->intraMBRate)
305     {
306         AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate);
307     }
308 
309     encvid->sad_extra_info = NULL;
310 #ifdef HTFM
311     /***** HYPOTHESIS TESTING ********/
312     InitHTFM(video, &htfm_stat, newvar, &collect);
313     /*********************************/
314 #endif
315 
316     if ((rateCtrl->scdEnable == 1)
317             && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP)))
318         /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
319     {
320         incr_i = 2;
321         numLoop = 2;
322         start_i = 1;
323         type_pred = 0; /* for initial candidate selection */
324     }
325     else
326     {
327         incr_i = 1;
328         numLoop = 1;
329         start_i = 0;
330         type_pred = 2;
331     }
332 
333     /* First pass, loop thru half the macroblock */
334     /* determine scene change */
335     /* Second pass, for the rest of macroblocks */
336     NumIntraSearch = 0; // to be intra searched in the encoding loop.
337     while (numLoop--)
338     {
339         for (j = 0; j < mbheight; j++)
340         {
341             if (incr_i > 1)
342                 start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
343 
344             offset = pitch * (j << 4) + (start_i << 4);
345 
346             mbnum = j * mbwidth + start_i;
347 
348             for (i = start_i; i < mbwidth; i += incr_i)
349             {
350                 video->mbNum = mbnum;
351                 video->currMB = currMB = mblock + mbnum;
352                 mot_mb_16x16 = mot16x16 + mbnum;
353 
354                 cur = currInput->YCbCr[0] + offset;
355 
356                 if (currMB->mb_intra == 0) /* for INTER mode */
357                 {
358 #if defined(HTFM)
359                     HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch);
360 #else
361                     AVCPrepareCurMB(encvid, cur, pitch);
362 #endif
363                     /************************************************************/
364                     /******** full-pel 1MV search **********************/
365 
366                     AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred,
367                                       FS_en, &hp_guess);
368 
369                     abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad;
370 
371                     /* set mbMode and MVs */
372                     currMB->mbMode = AVC_P16;
373                     currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
374                     mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff);
375                     for (k = 0; k < 32; k += 2)
376                     {
377                         currMB->mvL0[k>>1] = mv_uint32;
378                     }
379 
380                     /* make a decision whether it should be tested for intra or not */
381                     if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
382                     {
383                         if (false == IntraDecisionABE(&abe_cost, cur, pitch, true))
384                         {
385                             intraSearch[mbnum] = 0;
386                         }
387                         else
388                         {
389                             NumIntraSearch++;
390                             rateCtrl->MADofMB[mbnum] = abe_cost;
391                         }
392                     }
393                     else // boundary MBs, always do intra search
394                     {
395                         NumIntraSearch++;
396                     }
397 
398                     totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad;
399                 }
400                 else    /* INTRA update, use for prediction */
401                 {
402                     mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0;
403 
404                     /* reset all other MVs to zero */
405                     /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */
406                     abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF;  /* max value for int */
407 
408                     if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
409                     {
410                         IntraDecisionABE(&abe_cost, cur, pitch, false);
411 
412                         rateCtrl->MADofMB[mbnum] = abe_cost;
413                         totalSAD += abe_cost;
414                     }
415 
416                     NumIntraSearch++ ;
417                     /* cannot do I16 prediction here because it needs full decoding. */
418                     // intraSearch[mbnum] = 1;
419 
420                 }
421 
422                 mbnum += incr_i;
423                 offset += (incr_i << 4);
424 
425             } /* for i */
426         } /* for j */
427 
428         /* since we cannot do intra/inter decision here, the SCD has to be
429         based on other criteria such as motion vectors coherency or the SAD */
430         if (incr_i > 1 && numLoop) /* scene change on and first loop */
431         {
432             //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
433             if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */
434                 /* need to do more investigation about this threshold since the NumIntraSearch
435                 only show potential intra MBs, not the actual one */
436             {
437                 /* we can choose to just encode I_SLICE without IDR */
438                 //video->nal_unit_type = AVC_NALTYPE_IDR;
439                 video->nal_unit_type = AVC_NALTYPE_SLICE;
440                 video->sliceHdr->slice_type = AVC_I_ALL_SLICE;
441                 video->slice_type = AVC_I_SLICE;
442                 memset(intraSearch, 1, sizeof(uint8)*totalMB);
443                 i = totalMB;
444                 while (i--)
445                 {
446                     mblock[i].mb_intra = 1;
447                     encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
448                 }
449 
450                 rateCtrl->totalSAD = totalSAD * 2;  /* SAD */
451 
452                 return ;
453             }
454         }
455         /******** no scene change, continue motion search **********************/
456         start_i = 0;
457         type_pred++; /* second pass */
458     }
459 
460     rateCtrl->totalSAD = totalSAD;  /* SAD */
461 
462 #ifdef HTFM
463     /***** HYPOTHESIS TESTING ********/
464     if (collect)
465     {
466         collect = 0;
467         UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat);
468     }
469     /*********************************/
470 #endif
471 
472     return ;
473 }
474 
475 /*=====================================================================
476     Function:   PaddingEdge
477     Date:       09/16/2000
478     Purpose:    Pad edge of a Vop
479 =====================================================================*/
480 
AVCPaddingEdge(AVCPictureData * refPic)481 void  AVCPaddingEdge(AVCPictureData *refPic)
482 {
483     uint8 *src, *dst;
484     int i;
485     int pitch, width, height;
486     uint32 temp1, temp2;
487 
488     width = refPic->width;
489     height = refPic->height;
490     pitch = refPic->pitch;
491 
492     /* pad top */
493     src = refPic->Sl;
494 
495     temp1 = *src; /* top-left corner */
496     temp2 = src[width-1]; /* top-right corner */
497     temp1 |= (temp1 << 8);
498     temp1 |= (temp1 << 16);
499     temp2 |= (temp2 << 8);
500     temp2 |= (temp2 << 16);
501 
502     dst = src - (pitch << 4);
503 
504     *((uint32*)(dst - 16)) = temp1;
505     *((uint32*)(dst - 12)) = temp1;
506     *((uint32*)(dst - 8)) = temp1;
507     *((uint32*)(dst - 4)) = temp1;
508 
509     memcpy(dst, src, width);
510 
511     *((uint32*)(dst += width)) = temp2;
512     *((uint32*)(dst + 4)) = temp2;
513     *((uint32*)(dst + 8)) = temp2;
514     *((uint32*)(dst + 12)) = temp2;
515 
516     dst = dst - width - 16;
517 
518     i = 15;
519     while (i--)
520     {
521         memcpy(dst + pitch, dst, pitch);
522         dst += pitch;
523     }
524 
525     /* pad sides */
526     dst += (pitch + 16);
527     src = dst;
528     i = height;
529     while (i--)
530     {
531         temp1 = *src;
532         temp2 = src[width-1];
533         temp1 |= (temp1 << 8);
534         temp1 |= (temp1 << 16);
535         temp2 |= (temp2 << 8);
536         temp2 |= (temp2 << 16);
537 
538         *((uint32*)(dst - 16)) = temp1;
539         *((uint32*)(dst - 12)) = temp1;
540         *((uint32*)(dst - 8)) = temp1;
541         *((uint32*)(dst - 4)) = temp1;
542 
543         *((uint32*)(dst += width)) = temp2;
544         *((uint32*)(dst + 4)) = temp2;
545         *((uint32*)(dst + 8)) = temp2;
546         *((uint32*)(dst + 12)) = temp2;
547 
548         src += pitch;
549         dst = src;
550     }
551 
552     /* pad bottom */
553     dst -= 16;
554     i = 16;
555     while (i--)
556     {
557         memcpy(dst, dst - pitch, pitch);
558         dst += pitch;
559     }
560 
561 
562     return ;
563 }
564 
565 /*===========================================================================
566     Function:   AVCRasterIntraUpdate
567     Date:       2/26/01
568     Purpose:    To raster-scan assign INTRA-update .
569                 N macroblocks are updated (also was programmable).
570 ===========================================================================*/
AVCRasterIntraUpdate(AVCEncObject * encvid,AVCMacroblock * mblock,int totalMB,int numRefresh)571 void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh)
572 {
573     int indx, i;
574 
575     indx = encvid->firstIntraRefreshMBIndx;
576     for (i = 0; i < numRefresh && indx < totalMB; i++)
577     {
578         (mblock + indx)->mb_intra = 1;
579         encvid->intraSearch[indx++] = 1;
580     }
581 
582     /* if read the end of frame, reset and loop around */
583     if (indx >= totalMB - 1)
584     {
585         indx = 0;
586         while (i < numRefresh && indx < totalMB)
587         {
588             (mblock + indx)->mb_intra = 1;
589             encvid->intraSearch[indx++] = 1;
590             i++;
591         }
592     }
593 
594     encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */
595 
596     return ;
597 }
598 
599 
600 #ifdef HTFM
InitHTFM(VideoEncData * encvid,HTFM_Stat * htfm_stat,double * newvar,int * collect)601 void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect)
602 {
603     AVCCommonObj *video = encvid->common;
604     int i;
605     int lx = video->currPic->width; // padding
606     int lx2 = lx << 1;
607     int lx3 = lx2 + lx;
608     int rx = video->currPic->pitch;
609     int rx2 = rx << 1;
610     int rx3 = rx2 + rx;
611 
612     int *offset, *offset2;
613 
614     /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
615     if (((int)video->sliceHdr->frame_num) % 30 == 1)
616     {
617 
618         *collect = 1;
619 
620         htfm_stat->countbreak = 0;
621         htfm_stat->abs_dif_mad_avg = 0;
622 
623         for (i = 0; i < 16; i++)
624         {
625             newvar[i] = 0.0;
626         }
627 //      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
628         encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
629         encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
630         encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
631         encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
632         encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
633         encvid->sad_extra_info = (void*)(htfm_stat);
634         offset = htfm_stat->offsetArray;
635         offset2 = htfm_stat->offsetRef;
636     }
637     else
638     {
639 //      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
640         encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
641         encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
642         encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
643         encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
644         encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
645         encvid->sad_extra_info = (void*)(encvid->nrmlz_th);
646         offset = encvid->nrmlz_th + 16;
647         offset2 = encvid->nrmlz_th + 32;
648     }
649 
650     offset[0] = 0;
651     offset[1] = lx2 + 2;
652     offset[2] = 2;
653     offset[3] = lx2;
654     offset[4] = lx + 1;
655     offset[5] = lx3 + 3;
656     offset[6] = lx + 3;
657     offset[7] = lx3 + 1;
658     offset[8] = lx;
659     offset[9] = lx3 + 2;
660     offset[10] = lx3 ;
661     offset[11] = lx + 2 ;
662     offset[12] = 1;
663     offset[13] = lx2 + 3;
664     offset[14] = lx2 + 1;
665     offset[15] = 3;
666 
667     offset2[0] = 0;
668     offset2[1] = rx2 + 2;
669     offset2[2] = 2;
670     offset2[3] = rx2;
671     offset2[4] = rx + 1;
672     offset2[5] = rx3 + 3;
673     offset2[6] = rx + 3;
674     offset2[7] = rx3 + 1;
675     offset2[8] = rx;
676     offset2[9] = rx3 + 2;
677     offset2[10] = rx3 ;
678     offset2[11] = rx + 2 ;
679     offset2[12] = 1;
680     offset2[13] = rx2 + 3;
681     offset2[14] = rx2 + 1;
682     offset2[15] = 3;
683 
684     return ;
685 }
686 
UpdateHTFM(AVCEncObject * encvid,double * newvar,double * exp_lamda,HTFM_Stat * htfm_stat)687 void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
688 {
689     if (htfm_stat->countbreak == 0)
690         htfm_stat->countbreak = 1;
691 
692     newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
693 
694     if (newvar[0] < 0.001)
695     {
696         newvar[0] = 0.001; /* to prevent floating overflow */
697     }
698     exp_lamda[0] =  1 / (newvar[0] * 1.4142136);
699     exp_lamda[1] = exp_lamda[0] * 1.5825;
700     exp_lamda[2] = exp_lamda[0] * 2.1750;
701     exp_lamda[3] = exp_lamda[0] * 3.5065;
702     exp_lamda[4] = exp_lamda[0] * 3.1436;
703     exp_lamda[5] = exp_lamda[0] * 3.5315;
704     exp_lamda[6] = exp_lamda[0] * 3.7449;
705     exp_lamda[7] = exp_lamda[0] * 4.5854;
706     exp_lamda[8] = exp_lamda[0] * 4.6191;
707     exp_lamda[9] = exp_lamda[0] * 5.4041;
708     exp_lamda[10] = exp_lamda[0] * 6.5974;
709     exp_lamda[11] = exp_lamda[0] * 10.5341;
710     exp_lamda[12] = exp_lamda[0] * 10.0719;
711     exp_lamda[13] = exp_lamda[0] * 12.0516;
712     exp_lamda[14] = exp_lamda[0] * 15.4552;
713 
714     CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th);
715     return ;
716 }
717 
718 
CalcThreshold(double pf,double exp_lamda[],int nrmlz_th[])719 void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[])
720 {
721     int i;
722     double temp[15];
723     //  printf("\nLamda: ");
724 
725     /* parametric PREMODELling */
726     for (i = 0; i < 15; i++)
727     {
728         //    printf("%g ",exp_lamda[i]);
729         if (pf < 0.5)
730             temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
731         else
732             temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
733     }
734 
735     nrmlz_th[15] = 0;
736     for (i = 0; i < 15; i++)        /* scale upto no.pixels */
737         nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5);
738 
739     return ;
740 }
741 
HTFMPrepareCurMB_AVC(AVCEncObject * encvid,HTFM_Stat * htfm_stat,uint8 * cur,int pitch)742 void    HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch)
743 {
744     AVCCommonObj *video = encvid->common;
745     uint32 *htfmMB = (uint32*)(encvid->currYMB);
746     uint8 *ptr, byte;
747     int *offset;
748     int i;
749     uint32 word;
750 
751     if (((int)video->sliceHdr->frame_num) % 30 == 1)
752     {
753         offset = htfm_stat->offsetArray;
754     }
755     else
756     {
757         offset = encvid->nrmlz_th + 16;
758     }
759 
760     for (i = 0; i < 16; i++)
761     {
762         ptr = cur + offset[i];
763         word = ptr[0];
764         byte = ptr[4];
765         word |= (byte << 8);
766         byte = ptr[8];
767         word |= (byte << 16);
768         byte = ptr[12];
769         word |= (byte << 24);
770         *htfmMB++ = word;
771 
772         word = *(ptr += (pitch << 2));
773         byte = ptr[4];
774         word |= (byte << 8);
775         byte = ptr[8];
776         word |= (byte << 16);
777         byte = ptr[12];
778         word |= (byte << 24);
779         *htfmMB++ = word;
780 
781         word = *(ptr += (pitch << 2));
782         byte = ptr[4];
783         word |= (byte << 8);
784         byte = ptr[8];
785         word |= (byte << 16);
786         byte = ptr[12];
787         word |= (byte << 24);
788         *htfmMB++ = word;
789 
790         word = *(ptr += (pitch << 2));
791         byte = ptr[4];
792         word |= (byte << 8);
793         byte = ptr[8];
794         word |= (byte << 16);
795         byte = ptr[12];
796         word |= (byte << 24);
797         *htfmMB++ = word;
798     }
799 
800     return ;
801 }
802 
803 
804 #endif // HTFM
805 
AVCPrepareCurMB(AVCEncObject * encvid,uint8 * cur,int pitch)806 void    AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch)
807 {
808     void* tmp = (void*)(encvid->currYMB);
809     uint32 *currYMB = (uint32*) tmp;
810     int i;
811 
812     cur -= pitch;
813 
814     for (i = 0; i < 16; i++)
815     {
816         *currYMB++ = *((uint32*)(cur += pitch));
817         *currYMB++ = *((uint32*)(cur + 4));
818         *currYMB++ = *((uint32*)(cur + 8));
819         *currYMB++ = *((uint32*)(cur + 12));
820     }
821 
822     return ;
823 }
824 
825 #ifdef FIXED_INTERPRED_MODE
826 
827 /* due to the complexity of the predicted motion vector, we may not decide to skip
828 a macroblock here just yet. */
829 /* We will find the best motion vector and the best intra prediction mode for each block. */
830 /* output are
831     currMB->NumMbPart,  currMB->MbPartWidth, currMB->MbPartHeight,
832     currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight,
833     currMB->MBPartPredMode[][] (L0 or L1 or BiPred)
834     currMB->RefIdx[], currMB->ref_idx_L0[],
835     currMB->mvL0[], currMB->mvL1[]
836     */
837 
AVCMBMotionSearch(AVCEncObject * encvid,AVCMacroblock * currMB,int mbNum,int num_pass)838 AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
839                                 int num_pass)
840 {
841     AVCCommonObj *video = encvid->common;
842     int mbPartIdx, subMbPartIdx;
843     int16 *mv;
844     int i;
845     int SubMbPartHeight, SubMbPartWidth, NumSubMbPart;
846 
847     /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */
848 
849     currMB->mbMode = FIXED_INTERPRED_MODE;
850     currMB->mb_intra = 0;
851 
852     if (currMB->mbMode == AVC_P16)
853     {
854         currMB->NumMbPart = 1;
855         currMB->MbPartWidth = 16;
856         currMB->MbPartHeight = 16;
857         currMB->SubMbPartHeight[0] = 16;
858         currMB->SubMbPartWidth[0] = 16;
859         currMB->NumSubMbPart[0] =  1;
860     }
861     else if (currMB->mbMode == AVC_P16x8)
862     {
863         currMB->NumMbPart = 2;
864         currMB->MbPartWidth = 16;
865         currMB->MbPartHeight = 8;
866         for (i = 0; i < 2; i++)
867         {
868             currMB->SubMbPartWidth[i] = 16;
869             currMB->SubMbPartHeight[i] = 8;
870             currMB->NumSubMbPart[i] = 1;
871         }
872     }
873     else if (currMB->mbMode == AVC_P8x16)
874     {
875         currMB->NumMbPart = 2;
876         currMB->MbPartWidth = 8;
877         currMB->MbPartHeight = 16;
878         for (i = 0; i < 2; i++)
879         {
880             currMB->SubMbPartWidth[i] = 8;
881             currMB->SubMbPartHeight[i] = 16;
882             currMB->NumSubMbPart[i] = 1;
883         }
884     }
885     else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
886     {
887         currMB->NumMbPart = 4;
888         currMB->MbPartWidth = 8;
889         currMB->MbPartHeight = 8;
890         if (FIXED_SUBMB_MODE == AVC_8x8)
891         {
892             SubMbPartHeight = 8;
893             SubMbPartWidth = 8;
894             NumSubMbPart = 1;
895         }
896         else if (FIXED_SUBMB_MODE == AVC_8x4)
897         {
898             SubMbPartHeight = 4;
899             SubMbPartWidth = 8;
900             NumSubMbPart = 2;
901         }
902         else if (FIXED_SUBMB_MODE == AVC_4x8)
903         {
904             SubMbPartHeight = 8;
905             SubMbPartWidth = 4;
906             NumSubMbPart = 2;
907         }
908         else if (FIXED_SUBMB_MODE == AVC_4x4)
909         {
910             SubMbPartHeight = 4;
911             SubMbPartWidth = 4;
912             NumSubMbPart = 4;
913         }
914 
915         for (i = 0; i < 4; i++)
916         {
917             currMB->subMbMode[i] = FIXED_SUBMB_MODE;
918             currMB->SubMbPartHeight[i] = SubMbPartHeight;
919             currMB->SubMbPartWidth[i] = SubMbPartWidth;
920             currMB->NumSubMbPart[i] = NumSubMbPart;
921         }
922     }
923     else /* it's probably intra mode */
924     {
925         return AVCENC_SUCCESS;
926     }
927 
928     for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
929     {
930         currMB->MBPartPredMode[mbPartIdx][0]  = AVC_Pred_L0;
931         currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX;
932         currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx;
933 
934         for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++)
935         {
936             mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx);
937 
938             *mv++ = FIXED_MVX;
939             *mv = FIXED_MVY;
940         }
941     }
942 
943     encvid->min_cost = 0;
944 
945     return AVCENC_SUCCESS;
946 }
947 
948 #else /* perform the search */
949 
950 /* This option #1 search is very similar to PV's MPEG4 motion search algorithm.
951   The search is done in hierarchical manner from 16x16 MB down to smaller and smaller
952   partition. At each level, a decision can be made to stop the search if the expected
953   prediction gain is not worth the computation. The decision can also be made at the finest
954   level for more fullsearch-like behavior with the price of heavier computation. */
AVCMBMotionSearch(AVCEncObject * encvid,uint8 * cur,uint8 * best_cand[],int i0,int j0,int type_pred,int FS_en,int * hp_guess)955 void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
956                        int i0, int j0, int type_pred, int FS_en, int *hp_guess)
957 {
958     AVCCommonObj *video = encvid->common;
959     AVCPictureData *currPic = video->currPic;
960     AVCSeqParamSet *currSPS = video->currSeqParams;
961     AVCRateControl *rateCtrl = encvid->rateCtrl;
962     AVCMacroblock *currMB = video->currMB;
963     uint8 *ref, *cand, *ncand;
964     void *extra_info = encvid->sad_extra_info;
965     int mbnum = video->mbNum;
966     int width = currPic->width; /* 6/12/01, must be multiple of 16 */
967     int height = currPic->height;
968     AVCMV *mot16x16 = encvid->mot16x16;
969     int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
970 
971     int range = rateCtrl->mvRange;
972 
973     int lx = currPic->pitch; /*  padding */
974     int i, j, imin, jmin, ilow, ihigh, jlow, jhigh;
975     int d, dmin, dn[9];
976     int k;
977     int mvx[5], mvy[5];
978     int num_can, center_again;
979     int last_loc, new_loc = 0;
980     int step, max_step = range >> 1;
981     int next;
982 
983     int cmvx, cmvy; /* estimated predicted MV */
984     int lev_idx;
985     int lambda_motion = encvid->lambda_motion;
986     uint8 *mvbits = encvid->mvbits;
987     int mvshift = 2;
988     int mvcost;
989 
990     int min_sad = 65535;
991 
992     ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */
993 
994     /* have to initialize these params, necessary for interprediction part */
995     currMB->NumMbPart = 1;
996     currMB->SubMbPartHeight[0] = 16;
997     currMB->SubMbPartWidth[0] = 16;
998     currMB->NumSubMbPart[0] = 1;
999     currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
1000                                 currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX;
1001     currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] =
1002                                 currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX;
1003     currMB->RefIdx[0] = currMB->RefIdx[1] =
1004                             currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx;
1005 
1006     cur = encvid->currYMB; /* use smaller memory space for current MB */
1007 
1008     /*  find limit of the search (adjusting search range)*/
1009     lev_idx = mapLev2Idx[currSPS->level_idc];
1010 
1011     /* we can make this part dynamic based on previous statistics */
1012     ilow = i0 - range;
1013     if (i0 - ilow > 2047) /* clip to conform with the standard */
1014     {
1015         ilow = i0 - 2047;
1016     }
1017     if (ilow < -13)  // change it from -15 to -13 because of 6-tap filter needs extra 2 lines.
1018     {
1019         ilow = -13;
1020     }
1021 
1022     ihigh = i0 + range - 1;
1023     if (ihigh - i0 > 2047) /* clip to conform with the standard */
1024     {
1025         ihigh = i0 + 2047;
1026     }
1027     if (ihigh > width - 3)
1028     {
1029         ihigh = width - 3;  // change from width-1 to width-3 for the same reason as above
1030     }
1031 
1032     jlow = j0 - range;
1033     if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1034     {
1035         jlow = j0 - MaxVmvR[lev_idx] + 1;
1036     }
1037     if (jlow < -13)     // same reason as above
1038     {
1039         jlow = -13;
1040     }
1041 
1042     jhigh = j0 + range - 1;
1043     if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1044     {
1045         jhigh = j0 + MaxVmvR[lev_idx] - 1;
1046     }
1047     if (jhigh > height - 3) // same reason as above
1048     {
1049         jhigh = height - 3;
1050     }
1051 
1052     /* find initial motion vector & predicted MV*/
1053     AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy);
1054 
1055     imin = i0;
1056     jmin = j0; /* needed for fullsearch */
1057     ncand = ref + i0 + j0 * lx;
1058 
1059     /* for first row of MB, fullsearch can be used */
1060     if (FS_en)
1061     {
1062         *hp_guess = 0; /* no guess for fast half-pel */
1063 
1064         dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1065 
1066         ncand = ref + imin + jmin * lx;
1067     }
1068     else
1069     {   /*       fullsearch the top row to only upto (0,3) MB */
1070         /*       upto 30% complexity saving with the same complexity */
1071         if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1)
1072         {
1073             *hp_guess = 0; /* no guess for fast half-pel */
1074             dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1075             ncand = ref + imin + jmin * lx;
1076         }
1077         else
1078         {
1079             /************** initialize candidate **************************/
1080 
1081             dmin = 65535;
1082 
1083             /* check if all are equal */
1084             if (num_can == ALL_CAND_EQUAL)
1085             {
1086                 i = i0 + mvx[0];
1087                 j = j0 + mvy[0];
1088 
1089                 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1090                 {
1091                     cand = ref + i + j * lx;
1092 
1093                     d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1094                     mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1095                     d +=  mvcost;
1096 
1097                     if (d < dmin)
1098                     {
1099                         dmin = d;
1100                         imin = i;
1101                         jmin = j;
1102                         ncand = cand;
1103                         min_sad = d - mvcost; // for rate control
1104                     }
1105                 }
1106             }
1107             else
1108             {
1109                 /************** evaluate unique candidates **********************/
1110                 for (k = 0; k < num_can; k++)
1111                 {
1112                     i = i0 + mvx[k];
1113                     j = j0 + mvy[k];
1114 
1115                     if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1116                     {
1117                         cand = ref + i + j * lx;
1118                         d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1119                         mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1120                         d +=  mvcost;
1121 
1122                         if (d < dmin)
1123                         {
1124                             dmin = d;
1125                             imin = i;
1126                             jmin = j;
1127                             ncand = cand;
1128                             min_sad = d - mvcost; // for rate control
1129                         }
1130                     }
1131                 }
1132             }
1133 
1134             /******************* local refinement ***************************/
1135             center_again = 0;
1136             last_loc = new_loc = 0;
1137             //          ncand = ref + jmin*lx + imin;  /* center of the search */
1138             step = 0;
1139             dn[0] = dmin;
1140             while (!center_again && step <= max_step)
1141             {
1142 
1143                 AVCMoveNeighborSAD(dn, last_loc);
1144 
1145                 center_again = 1;
1146                 i = imin;
1147                 j = jmin - 1;
1148                 cand = ref + i + j * lx;
1149 
1150                 /*  starting from [0,-1] */
1151                 /* spiral check one step at a time*/
1152                 for (k = 2; k <= 8; k += 2)
1153                 {
1154                     if (!tab_exclude[last_loc][k]) /* exclude last step computation */
1155                     {       /* not already computed */
1156                         if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1157                         {
1158                             d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1159                             mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1160                             d += mvcost;
1161 
1162                             dn[k] = d; /* keep it for half pel use */
1163 
1164                             if (d < dmin)
1165                             {
1166                                 ncand = cand;
1167                                 dmin = d;
1168                                 imin = i;
1169                                 jmin = j;
1170                                 center_again = 0;
1171                                 new_loc = k;
1172                                 min_sad = d - mvcost; // for rate control
1173                             }
1174                         }
1175                     }
1176                     if (k == 8)  /* end side search*/
1177                     {
1178                         if (!center_again)
1179                         {
1180                             k = -1; /* start diagonal search */
1181                             cand -= lx;
1182                             j--;
1183                         }
1184                     }
1185                     else
1186                     {
1187                         next = refine_next[k][0];
1188                         i += next;
1189                         cand += next;
1190                         next = refine_next[k][1];
1191                         j += next;
1192                         cand += lx * next;
1193                     }
1194                 }
1195                 last_loc = new_loc;
1196                 step ++;
1197             }
1198             if (!center_again)
1199                 AVCMoveNeighborSAD(dn, last_loc);
1200 
1201             *hp_guess = AVCFindMin(dn);
1202 
1203             encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1204         }
1205     }
1206 
1207     mot16x16[mbnum].sad = dmin;
1208     mot16x16[mbnum].x = (imin - i0) << 2;
1209     mot16x16[mbnum].y = (jmin - j0) << 2;
1210     best_cand[0] = ncand;
1211 
1212     if (rateCtrl->subPelEnable) // always enable half-pel search
1213     {
1214         /* find half-pel resolution motion vector */
1215         min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy);
1216 
1217         encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1218 
1219 
1220         if (encvid->best_qpel_pos == -1)
1221         {
1222             ncand = encvid->hpel_cand[encvid->best_hpel_pos];
1223         }
1224         else
1225         {
1226             ncand = encvid->qpel_cand[encvid->best_qpel_pos];
1227         }
1228     }
1229     else
1230     {
1231         encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1232     }
1233 
1234     /** do motion comp here for now */
1235     ref = currPic->Sl + i0 + j0 * lx;
1236     /* copy from the best result to current Picture */
1237     for (j = 0; j < 16; j++)
1238     {
1239         for (i = 0; i < 16; i++)
1240         {
1241             *ref++ = *ncand++;
1242         }
1243         ref += (lx - 16);
1244         ncand += 8;
1245     }
1246 
1247     return ;
1248 }
1249 
1250 #endif
1251 
1252 /*===============================================================================
1253     Function:   AVCFullSearch
1254     Date:       09/16/2000
1255     Purpose:    Perform full-search motion estimation over the range of search
1256                 region in a spiral-outward manner.
1257     Input/Output:   VideoEncData, current Vol, previou Vop, pointer to the left corner of
1258                 current VOP, current coord (also output), boundaries.
1259 ===============================================================================*/
AVCFullSearch(AVCEncObject * encvid,uint8 * prev,uint8 * cur,int * imin,int * jmin,int ilow,int ihigh,int jlow,int jhigh,int cmvx,int cmvy)1260 int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
1261                   int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
1262                   int cmvx, int cmvy)
1263 {
1264     int range = encvid->rateCtrl->mvRange;
1265     AVCPictureData *currPic = encvid->common->currPic;
1266     uint8 *cand;
1267     int i, j, k, l;
1268     int d, dmin;
1269     int i0 = *imin; /* current position */
1270     int j0 = *jmin;
1271     int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
1272     void *extra_info = encvid->sad_extra_info;
1273     int lx = currPic->pitch; /* with padding */
1274 
1275     int offset = i0 + j0 * lx;
1276 
1277     int lambda_motion = encvid->lambda_motion;
1278     uint8 *mvbits = encvid->mvbits;
1279     int mvshift = 2;
1280     int mvcost;
1281     int min_sad;
1282 
1283     cand = prev + offset;
1284 
1285     dmin  = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info);
1286     mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy);
1287     min_sad = dmin;
1288     dmin += mvcost;
1289 
1290     /* perform spiral search */
1291     for (k = 1; k <= range; k++)
1292     {
1293 
1294         i = i0 - k;
1295         j = j0 - k;
1296 
1297         cand = prev + i + j * lx;
1298 
1299         for (l = 0; l < 8*k; l++)
1300         {
1301             /* no need for boundary checking again */
1302             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1303             {
1304                 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
1305                 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1306                 d +=  mvcost;
1307 
1308                 if (d < dmin)
1309                 {
1310                     dmin = d;
1311                     *imin = i;
1312                     *jmin = j;
1313                     min_sad = d - mvcost;
1314                 }
1315             }
1316 
1317             if (l < (k << 1))
1318             {
1319                 i++;
1320                 cand++;
1321             }
1322             else if (l < (k << 2))
1323             {
1324                 j++;
1325                 cand += lx;
1326             }
1327             else if (l < ((k << 2) + (k << 1)))
1328             {
1329                 i--;
1330                 cand--;
1331             }
1332             else
1333             {
1334                 j--;
1335                 cand -= lx;
1336             }
1337         }
1338     }
1339 
1340     encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control
1341 
1342     return dmin;
1343 }
1344 
1345 /*===============================================================================
1346     Function:   AVCCandidateSelection
1347     Date:       09/16/2000
1348     Purpose:    Fill up the list of candidate using spatio-temporal correlation
1349                 among neighboring blocks.
1350     Input/Output:   type_pred = 0: first pass, 1: second pass, or no SCD
1351     Modified:   , 09/23/01, get rid of redundant candidates before passing back.
1352                 , 09/11/07, added return for modified predicted MV, this will be
1353                     needed for both fast search and fullsearch.
1354 ===============================================================================*/
1355 
AVCCandidateSelection(int * mvx,int * mvy,int * num_can,int imb,int jmb,AVCEncObject * encvid,int type_pred,int * cmvx,int * cmvy)1356 void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
1357                            AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy)
1358 {
1359     AVCCommonObj *video = encvid->common;
1360     AVCMV *mot16x16 = encvid->mot16x16;
1361     AVCMV *pmot;
1362     int mbnum = video->mbNum;
1363     int mbwidth = video->PicWidthInMbs;
1364     int mbheight = video->PicHeightInMbs;
1365     int i, j, same, num1;
1366 
1367     /* this part is for predicted MV */
1368     int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0;
1369     int availA = 0, availB = 0, availC = 0;
1370 
1371     *num_can = 0;
1372 
1373     if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame
1374     {
1375         /* Spatio-Temporal Candidate (five candidates) */
1376         if (type_pred == 0) /* first pass */
1377         {
1378             pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1379             mvx[(*num_can)] = (pmot->x) >> 2;
1380             mvy[(*num_can)++] = (pmot->y) >> 2;
1381             if (imb >= (mbwidth >> 1) && imb > 0)  /*left neighbor previous frame */
1382             {
1383                 pmot = &mot16x16[mbnum-1];
1384                 mvx[(*num_can)] = (pmot->x) >> 2;
1385                 mvy[(*num_can)++] = (pmot->y) >> 2;
1386             }
1387             else if (imb + 1 < mbwidth)   /*right neighbor previous frame */
1388             {
1389                 pmot = &mot16x16[mbnum+1];
1390                 mvx[(*num_can)] = (pmot->x) >> 2;
1391                 mvy[(*num_can)++] = (pmot->y) >> 2;
1392             }
1393 
1394             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1395             {
1396                 pmot = &mot16x16[mbnum+mbwidth];
1397                 mvx[(*num_can)] = (pmot->x) >> 2;
1398                 mvy[(*num_can)++] = (pmot->y) >> 2;
1399             }
1400             else if (jmb > 0)   /*upper neighbor previous frame */
1401             {
1402                 pmot = &mot16x16[mbnum-mbwidth];
1403                 mvx[(*num_can)] = (pmot->x) >> 2;
1404                 mvy[(*num_can)++] = (pmot->y) >> 2;
1405             }
1406 
1407             if (imb > 0 && jmb > 0)  /* upper-left neighbor current frame*/
1408             {
1409                 pmot = &mot16x16[mbnum-mbwidth-1];
1410                 mvx[(*num_can)] = (pmot->x) >> 2;
1411                 mvy[(*num_can)++] = (pmot->y) >> 2;
1412             }
1413             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor current frame*/
1414             {
1415                 pmot = &mot16x16[mbnum-mbwidth+1];
1416                 mvx[(*num_can)] = (pmot->x) >> 2;
1417                 mvy[(*num_can)++] = (pmot->y) >> 2;
1418             }
1419         }
1420         else    /* second pass */
1421             /* original ST1 algorithm */
1422         {
1423             pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1424             mvx[(*num_can)] = (pmot->x) >> 2;
1425             mvy[(*num_can)++] = (pmot->y) >> 2;
1426 
1427             if (imb > 0)  /*left neighbor current frame */
1428             {
1429                 pmot = &mot16x16[mbnum-1];
1430                 mvx[(*num_can)] = (pmot->x) >> 2;
1431                 mvy[(*num_can)++] = (pmot->y) >> 2;
1432             }
1433             if (jmb > 0)  /*upper neighbor current frame */
1434             {
1435                 pmot = &mot16x16[mbnum-mbwidth];
1436                 mvx[(*num_can)] = (pmot->x) >> 2;
1437                 mvy[(*num_can)++] = (pmot->y) >> 2;
1438             }
1439             if (imb < mbwidth - 1)  /*right neighbor previous frame */
1440             {
1441                 pmot = &mot16x16[mbnum+1];
1442                 mvx[(*num_can)] = (pmot->x) >> 2;
1443                 mvy[(*num_can)++] = (pmot->y) >> 2;
1444             }
1445             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1446             {
1447                 pmot = &mot16x16[mbnum+mbwidth];
1448                 mvx[(*num_can)] = (pmot->x) >> 2;
1449                 mvy[(*num_can)++] = (pmot->y) >> 2;
1450             }
1451         }
1452 
1453         /* get predicted MV */
1454         if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
1455         {
1456             availA = 1;
1457             pmot = &mot16x16[mbnum-1];
1458             pmvA_x = pmot->x;
1459             pmvA_y = pmot->y;
1460         }
1461 
1462         if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1463         {
1464             availB = 1;
1465             pmot = &mot16x16[mbnum-mbwidth];
1466             pmvB_x = pmot->x;
1467             pmvB_y = pmot->y;
1468 
1469             availC = 1;
1470 
1471             if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1472             {
1473                 pmot = &mot16x16[mbnum-mbwidth+1];
1474             }
1475             else /* get MV from top-left (D) neighbor of current frame */
1476             {
1477                 pmot = &mot16x16[mbnum-mbwidth-1];
1478             }
1479             pmvC_x = pmot->x;
1480             pmvC_y = pmot->y;
1481         }
1482 
1483     }
1484     else  /* only Spatial Candidate (four candidates)*/
1485     {
1486         if (type_pred == 0) /*first pass*/
1487         {
1488             if (imb > 1)  /* neighbor two blocks away to the left */
1489             {
1490                 pmot = &mot16x16[mbnum-2];
1491                 mvx[(*num_can)] = (pmot->x) >> 2;
1492                 mvy[(*num_can)++] = (pmot->y) >> 2;
1493             }
1494             if (imb > 0 && jmb > 0)  /* upper-left neighbor */
1495             {
1496                 pmot = &mot16x16[mbnum-mbwidth-1];
1497                 mvx[(*num_can)] = (pmot->x) >> 2;
1498                 mvy[(*num_can)++] = (pmot->y) >> 2;
1499             }
1500             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor */
1501             {
1502                 pmot = &mot16x16[mbnum-mbwidth+1];
1503                 mvx[(*num_can)] = (pmot->x) >> 2;
1504                 mvy[(*num_can)++] = (pmot->y) >> 2;
1505             }
1506 
1507             /* get predicted MV */
1508             if (imb > 1)    /* get MV from 2nd left (A) neighbor either of current frame */
1509             {
1510                 availA = 1;
1511                 pmot = &mot16x16[mbnum-2];
1512                 pmvA_x = pmot->x;
1513                 pmvA_y = pmot->y;
1514             }
1515 
1516             if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */
1517             {
1518                 availB = 1;
1519                 pmot = &mot16x16[mbnum-mbwidth-1];
1520                 pmvB_x = pmot->x;
1521                 pmvB_y = pmot->y;
1522             }
1523 
1524             if (jmb > 0 && imb < mbwidth - 1)
1525             {
1526                 availC = 1;
1527                 pmot = &mot16x16[mbnum-mbwidth+1];
1528                 pmvC_x = pmot->x;
1529                 pmvC_y = pmot->y;
1530             }
1531         }
1532 //#ifdef SCENE_CHANGE_DETECTION
1533         /* second pass (ST2 algorithm)*/
1534         else
1535         {
1536             if (type_pred == 1) /*  4/7/01 */
1537             {
1538                 if (imb > 0)  /*left neighbor current frame */
1539                 {
1540                     pmot = &mot16x16[mbnum-1];
1541                     mvx[(*num_can)] = (pmot->x) >> 2;
1542                     mvy[(*num_can)++] = (pmot->y) >> 2;
1543                 }
1544                 if (jmb > 0)  /*upper neighbor current frame */
1545                 {
1546                     pmot = &mot16x16[mbnum-mbwidth];
1547                     mvx[(*num_can)] = (pmot->x) >> 2;
1548                     mvy[(*num_can)++] = (pmot->y) >> 2;
1549                 }
1550                 if (imb < mbwidth - 1)  /*right neighbor current frame */
1551                 {
1552                     pmot = &mot16x16[mbnum+1];
1553                     mvx[(*num_can)] = (pmot->x) >> 2;
1554                     mvy[(*num_can)++] = (pmot->y) >> 2;
1555                 }
1556                 if (jmb < mbheight - 1)  /*bottom neighbor current frame */
1557                 {
1558                     pmot = &mot16x16[mbnum+mbwidth];
1559                     mvx[(*num_can)] = (pmot->x) >> 2;
1560                     mvy[(*num_can)++] = (pmot->y) >> 2;
1561                 }
1562             }
1563             //#else
1564             else /* original ST1 algorithm */
1565             {
1566                 if (imb > 0)  /*left neighbor current frame */
1567                 {
1568                     pmot = &mot16x16[mbnum-1];
1569                     mvx[(*num_can)] = (pmot->x) >> 2;
1570                     mvy[(*num_can)++] = (pmot->y) >> 2;
1571 
1572                     if (jmb > 0)  /*upper-left neighbor current frame */
1573                     {
1574                         pmot = &mot16x16[mbnum-mbwidth-1];
1575                         mvx[(*num_can)] = (pmot->x) >> 2;
1576                         mvy[(*num_can)++] = (pmot->y) >> 2;
1577                     }
1578 
1579                 }
1580                 if (jmb > 0)  /*upper neighbor current frame */
1581                 {
1582                     pmot = &mot16x16[mbnum-mbwidth];
1583                     mvx[(*num_can)] = (pmot->x) >> 2;
1584                     mvy[(*num_can)++] = (pmot->y) >> 2;
1585 
1586                     if (imb < mbheight - 1)  /*upper-right neighbor current frame */
1587                     {
1588                         pmot = &mot16x16[mbnum-mbwidth+1];
1589                         mvx[(*num_can)] = (pmot->x) >> 2;
1590                         mvy[(*num_can)++] = (pmot->y) >> 2;
1591                     }
1592                 }
1593             }
1594 
1595             /* get predicted MV */
1596             if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
1597             {
1598                 availA = 1;
1599                 pmot = &mot16x16[mbnum-1];
1600                 pmvA_x = pmot->x;
1601                 pmvA_y = pmot->y;
1602             }
1603 
1604             if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1605             {
1606                 availB = 1;
1607                 pmot = &mot16x16[mbnum-mbwidth];
1608                 pmvB_x = pmot->x;
1609                 pmvB_y = pmot->y;
1610 
1611                 availC = 1;
1612 
1613                 if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1614                 {
1615                     pmot = &mot16x16[mbnum-mbwidth+1];
1616                 }
1617                 else /* get MV from top-left (D) neighbor of current frame */
1618                 {
1619                     pmot = &mot16x16[mbnum-mbwidth-1];
1620                 }
1621                 pmvC_x = pmot->x;
1622                 pmvC_y = pmot->y;
1623             }
1624         }
1625 //#endif
1626     }
1627 
1628     /*  3/23/01, remove redundant candidate (possible k-mean) */
1629     num1 = *num_can;
1630     *num_can = 1;
1631     for (i = 1; i < num1; i++)
1632     {
1633         same = 0;
1634         j = 0;
1635         while (!same && j < *num_can)
1636         {
1637 #if (CANDIDATE_DISTANCE==0)
1638             if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
1639 #else
1640             // modified k-mean,  3/24/01, shouldn't be greater than 3
1641             if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
1642 #endif
1643                 same = 1;
1644             j++;
1645         }
1646         if (!same)
1647         {
1648             mvx[*num_can] = mvx[i];
1649             mvy[*num_can] = mvy[i];
1650             (*num_can)++;
1651         }
1652     }
1653 
1654     if (num1 == 5 && *num_can == 1)
1655         *num_can = ALL_CAND_EQUAL; /* all are equal */
1656 
1657     /* calculate predicted MV */
1658 
1659     if (availA && !(availB || availC))
1660     {
1661         *cmvx = pmvA_x;
1662         *cmvy = pmvA_y;
1663     }
1664     else
1665     {
1666         *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x);
1667         *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y);
1668     }
1669 
1670     return ;
1671 }
1672 
1673 
1674 /*************************************************************
1675     Function:   AVCMoveNeighborSAD
1676     Date:       3/27/01
1677     Purpose:    Move neighboring SAD around when center has shifted
1678 *************************************************************/
1679 
AVCMoveNeighborSAD(int dn[],int new_loc)1680 void AVCMoveNeighborSAD(int dn[], int new_loc)
1681 {
1682     int tmp[9];
1683     tmp[0] = dn[0];
1684     tmp[1] = dn[1];
1685     tmp[2] = dn[2];
1686     tmp[3] = dn[3];
1687     tmp[4] = dn[4];
1688     tmp[5] = dn[5];
1689     tmp[6] = dn[6];
1690     tmp[7] = dn[7];
1691     tmp[8] = dn[8];
1692     dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
1693 
1694     switch (new_loc)
1695     {
1696         case 0:
1697             break;
1698         case 1:
1699             dn[4] = tmp[2];
1700             dn[5] = tmp[0];
1701             dn[6] = tmp[8];
1702             break;
1703         case 2:
1704             dn[4] = tmp[3];
1705             dn[5] = tmp[4];
1706             dn[6] = tmp[0];
1707             dn[7] = tmp[8];
1708             dn[8] = tmp[1];
1709             break;
1710         case 3:
1711             dn[6] = tmp[4];
1712             dn[7] = tmp[0];
1713             dn[8] = tmp[2];
1714             break;
1715         case 4:
1716             dn[1] = tmp[2];
1717             dn[2] = tmp[3];
1718             dn[6] = tmp[5];
1719             dn[7] = tmp[6];
1720             dn[8] = tmp[0];
1721             break;
1722         case 5:
1723             dn[1] = tmp[0];
1724             dn[2] = tmp[4];
1725             dn[8] = tmp[6];
1726             break;
1727         case 6:
1728             dn[1] = tmp[8];
1729             dn[2] = tmp[0];
1730             dn[3] = tmp[4];
1731             dn[4] = tmp[5];
1732             dn[8] = tmp[7];
1733             break;
1734         case 7:
1735             dn[2] = tmp[8];
1736             dn[3] = tmp[0];
1737             dn[4] = tmp[6];
1738             break;
1739         case 8:
1740             dn[2] = tmp[1];
1741             dn[3] = tmp[2];
1742             dn[4] = tmp[0];
1743             dn[5] = tmp[6];
1744             dn[6] = tmp[7];
1745             break;
1746     }
1747     dn[0] = tmp[new_loc];
1748 
1749     return ;
1750 }
1751 
1752 /*  3/28/01, find minimal of dn[9] */
1753 
AVCFindMin(int dn[])1754 int AVCFindMin(int dn[])
1755 {
1756     int min, i;
1757     int dmin;
1758 
1759     dmin = dn[1];
1760     min = 1;
1761     for (i = 2; i < 9; i++)
1762     {
1763         if (dn[i] < dmin)
1764         {
1765             dmin = dn[i];
1766             min = i;
1767         }
1768     }
1769 
1770     return min;
1771 }
1772 
1773 
1774 
1775