1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "mp4def.h"
19 #include "mp4enc_lib.h"
20 #include "mp4lib_int.h"
21 #include "m4venc_oscl.h"
22 
23 //#define PRINT_MV
24 #define MIN_GOP 1   /* minimum size of GOP,  1/23/01, need to be tested */
25 
26 #define CANDIDATE_DISTANCE  0 /* distance candidate from one another to consider as a distinct one */
27 /* shouldn't be more than 3 */
28 
29 #define ZERO_MV_PREF    0 /* 0: bias (0,0)MV before full-pel search, lowest complexity*/
30 /* 1: bias (0,0)MV after full-pel search, before half-pel, highest comp */
31 /* 2: bias (0,0)MV after half-pel, high comp, better PSNR */
32 
33 #define RASTER_REFRESH  /* instead of random INTRA refresh, do raster scan,  2/26/01 */
34 
35 #ifdef RASTER_REFRESH
36 #define TARGET_REFRESH_PER_REGION 4 /* , no. MB per frame to be INTRA refreshed */
37 #else
38 #define TARGET_REFRESH_PER_REGION 1 /* , no. MB per region to be INTRA refreshed */
39 #endif
40 
41 #define ALL_CAND_EQUAL  10  /*  any number greater than 5 will work */
42 
43 #define NumPixelMB  256     /*  number of pixels used in SAD calculation */
44 
45 #define DEF_8X8_WIN 3   /* search region for 8x8 MVs around the 16x16 MV */
46 #define MB_Nb  256
47 
48 #define PREF_NULL_VEC 129   /* for zero vector bias */
49 #define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
50 #define PREF_INTRA  512     /* bias for INTRA coding */
51 
52 const static Int tab_exclude[9][9] =  // [last_loc][curr_loc]
53 {
54     {0, 0, 0, 0, 0, 0, 0, 0, 0},
55     {0, 0, 0, 0, 1, 1, 1, 0, 0},
56     {0, 0, 0, 0, 1, 1, 1, 1, 1},
57     {0, 0, 0, 0, 0, 0, 1, 1, 1},
58     {0, 1, 1, 0, 0, 0, 1, 1, 1},
59     {0, 1, 1, 0, 0, 0, 0, 0, 1},
60     {0, 1, 1, 1, 1, 0, 0, 0, 1},
61     {0, 0, 1, 1, 1, 0, 0, 0, 0},
62     {0, 0, 1, 1, 1, 1, 1, 0, 0}
63 }; //to decide whether to continue or compute
64 
65 const static Int refine_next[8][2] =    /* [curr_k][increment] */
66 {
67     {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
68 };
69 
70 #ifdef __cplusplus
71 extern "C"
72 {
73 #endif
74 
75     void MBMotionSearch(VideoEncData *video, UChar *cur, UChar *best_cand[],
76     Int i0, Int j0, Int type_pred, Int fullsearch, Int *hp_guess);
77 
78     Int  fullsearch(VideoEncData *video, Vol *currVol, UChar *ref, UChar *cur,
79                     Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh);
80     Int fullsearchBlk(VideoEncData *video, Vol *currVol, UChar *cent, UChar *cur,
81                       Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh, Int range);
82     void CandidateSelection(Int *mvx, Int *mvy, Int *num_can, Int imb, Int jmb,
83                             VideoEncData *video, Int type_pred);
84     void RasterIntraUpdate(UChar *intraArray, UChar *Mode, Int totalMB, Int numRefresh);
85     void ResetIntraUpdate(UChar *intraArray, Int totalMB);
86     void ResetIntraUpdateRegion(UChar *intraArray, Int start_i, Int rwidth,
87                                 Int start_j, Int rheight, Int mbwidth, Int mbheight);
88 
89     void MoveNeighborSAD(Int dn[], Int new_loc);
90     Int FindMin(Int dn[]);
91     void PrepareCurMB(VideoEncData *video, UChar *cur);
92 
93 #ifdef __cplusplus
94 }
95 #endif
96 
97 /***************************************/
98 /*  2/28/01, for HYPOTHESIS TESTING */
99 #ifdef HTFM     /* defined in mp4def.h */
100 #ifdef __cplusplus
101 extern "C"
102 {
103 #endif
104     void CalcThreshold(double pf, double exp_lamda[], Int nrmlz_th[]);
105     void    HTFMPrepareCurMB(VideoEncData *video, HTFM_Stat *htfm_stat, UChar *cur);
106 #ifdef __cplusplus
107 }
108 #endif
109 
110 
111 #define HTFM_Pf  0.25   /* 3/2/1, probability of false alarm, can be varied from 0 to 0.5 */
112 /***************************************/
113 #endif
114 
115 #ifdef _SAD_STAT
116 ULong num_MB = 0;
117 ULong num_HP_MB = 0;
118 ULong num_Blk = 0;
119 ULong num_HP_Blk = 0;
120 ULong num_cand = 0;
121 ULong num_better_hp = 0;
122 ULong i_dist_from_guess = 0;
123 ULong j_dist_from_guess = 0;
124 ULong num_hp_not_zero = 0;
125 #endif
126 
127 
128 
129 /*==================================================================
130     Function:   MotionEstimation
131     Date:       10/3/2000
132     Purpose:    Go through all macroblock for motion search and
133                 determine scene change detection.
134 ====================================================================*/
135 
MotionEstimation(VideoEncData * video)136 void MotionEstimation(VideoEncData *video)
137 {
138     UChar use_4mv = video->encParams->MV8x8_Enabled;
139     Vol *currVol = video->vol[video->currLayer];
140     Vop *currVop = video->currVop;
141     VideoEncFrameIO *currFrame = video->input;
142     Int i, j, comp;
143     Int mbwidth = currVol->nMBPerRow;
144     Int mbheight = currVol->nMBPerCol;
145     Int totalMB = currVol->nTotalMB;
146     Int width = currFrame->pitch;
147     UChar *mode_mb, *Mode = video->headerInfo.Mode;
148     MOT *mot_mb, **mot = video->mot;
149     UChar *intraArray = video->intraArray;
150     Int FS_en = video->encParams->FullSearch_Enabled;
151     void (*ComputeMBSum)(UChar *, Int, MOT *) = video->functionPointer->ComputeMBSum;
152     void (*ChooseMode)(UChar*, UChar*, Int, Int) = video->functionPointer->ChooseMode;
153 
154     Int numIntra, start_i, numLoop, incr_i;
155     Int mbnum, offset;
156     UChar *cur, *best_cand[5];
157     Int sad8 = 0, sad16 = 0;
158     Int totalSAD = 0;   /* average SAD for rate control */
159     Int skip_halfpel_4mv;
160     Int f_code_p, f_code_n, max_mag = 0, min_mag = 0;
161     Int type_pred;
162     Int xh[5] = {0, 0, 0, 0, 0};
163     Int yh[5] = {0, 0, 0, 0, 0}; /* half-pel */
164     UChar hp_mem4MV[17*17*4];
165 
166 #ifdef HTFM
167     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
168     Int collect = 0;
169     HTFM_Stat htfm_stat;
170     double newvar[16];
171     double exp_lamda[15];
172     /*********************************/
173 #endif
174     Int hp_guess = 0;
175 #ifdef PRINT_MV
176     FILE *fp_debug;
177 #endif
178 
179 //  FILE *fstat;
180 //  static int frame_num = 0;
181 
182     offset = 0;
183 
184     if (video->currVop->predictionType == I_VOP)
185     {   /* compute the SAV */
186         mbnum = 0;
187         cur = currFrame->yChan;
188 
189         for (j = 0; j < mbheight; j++)
190         {
191             for (i = 0; i < mbwidth; i++)
192             {
193                 video->mbnum = mbnum;
194                 mot_mb = mot[mbnum];
195 
196                 (*ComputeMBSum)(cur + (i << 4), width, mot_mb);
197 
198                 totalSAD += mot_mb[0].sad;
199 
200                 mbnum++;
201             }
202             cur += (width << 4);
203         }
204 
205         video->sumMAD = (float)totalSAD / (float)NumPixelMB;
206 
207         ResetIntraUpdate(intraArray, totalMB);
208 
209         return  ;
210     }
211 
212     /* 09/20/05 */
213     if (video->prevBaseVop->padded == 0 && !video->encParams->H263_Enabled)
214     {
215         PaddingEdge(video->prevBaseVop);
216         video->prevBaseVop->padded = 1;
217     }
218 
219     /* Random INTRA update */
220     /*  suggest to do it in CodeMB */
221     /*  2/21/2001 */
222     //if(video->encParams->RC_Type == CBR_1 || video->encParams->RC_Type == CBR_2)
223     if (video->currLayer == 0 && video->encParams->Refresh)
224     {
225         RasterIntraUpdate(intraArray, Mode, totalMB, video->encParams->Refresh);
226     }
227 
228     video->sad_extra_info = NULL;
229 
230 #ifdef HTFM
231     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
232     InitHTFM(video, &htfm_stat, newvar, &collect);
233     /*********************************/
234 #endif
235 
236     if ((video->encParams->SceneChange_Det == 1) /*&& video->currLayer==0 */
237             && ((video->encParams->LayerFrameRate[0] < 5.0) || (video->numVopsInGOP > MIN_GOP)))
238         /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
239     {
240         incr_i = 2;
241         numLoop = 2;
242         start_i = 1;
243         type_pred = 0; /* for initial candidate selection */
244     }
245     else
246     {
247         incr_i = 1;
248         numLoop = 1;
249         start_i = 0;
250         type_pred = 2;
251     }
252 
253     /* First pass, loop thru half the macroblock */
254     /* determine scene change */
255     /* Second pass, for the rest of macroblocks */
256     numIntra = 0;
257     while (numLoop--)
258     {
259         for (j = 0; j < mbheight; j++)
260         {
261             if (incr_i > 1)
262                 start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
263 
264             offset = width * (j << 4) + (start_i << 4);
265 
266             mbnum = j * mbwidth + start_i;
267 
268             for (i = start_i; i < mbwidth; i += incr_i)
269             {
270                 video->mbnum = mbnum;
271                 mot_mb = mot[mbnum];
272                 mode_mb = Mode + mbnum;
273 
274                 cur = currFrame->yChan + offset;
275 
276 
277                 if (*mode_mb != MODE_INTRA)
278                 {
279 #if defined(HTFM)
280                     HTFMPrepareCurMB(video, &htfm_stat, cur);
281 #else
282                     PrepareCurMB(video, cur);
283 #endif
284                     /************************************************************/
285                     /******** full-pel 1MV and 4MVs search **********************/
286 
287 #ifdef _SAD_STAT
288                     num_MB++;
289 #endif
290                     MBMotionSearch(video, cur, best_cand, i << 4, j << 4, type_pred,
291                                    FS_en, &hp_guess);
292 
293 #ifdef PRINT_MV
294                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
295                     fprintf(fp_debug, "#%d (%d,%d,%d) : ", mbnum, mot_mb[0].x, mot_mb[0].y, mot_mb[0].sad);
296                     fprintf(fp_debug, "(%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : ==>\n",
297                             mot_mb[1].x, mot_mb[1].y, mot_mb[1].sad,
298                             mot_mb[2].x, mot_mb[2].y, mot_mb[2].sad,
299                             mot_mb[3].x, mot_mb[3].y, mot_mb[3].sad,
300                             mot_mb[4].x, mot_mb[4].y, mot_mb[4].sad);
301                     fclose(fp_debug);
302 #endif
303                     sad16 = mot_mb[0].sad;
304 #ifdef NO_INTER4V
305                     sad8 = sad16;
306 #else
307                     sad8 = mot_mb[1].sad + mot_mb[2].sad + mot_mb[3].sad + mot_mb[4].sad;
308 #endif
309 
310                     /* choose between INTRA or INTER */
311                     (*ChooseMode)(mode_mb, cur, width, ((sad8 < sad16) ? sad8 : sad16));
312                 }
313                 else    /* INTRA update, use for prediction 3/23/01 */
314                 {
315                     mot_mb[0].x = mot_mb[0].y = 0;
316                 }
317 
318                 if (*mode_mb == MODE_INTRA)
319                 {
320                     numIntra++ ;
321 
322                     /* compute SAV for rate control and fast DCT, 11/28/00 */
323                     (*ComputeMBSum)(cur, width, mot_mb);
324 
325                     /* leave mot_mb[0] as it is for fast motion search */
326                     /* set the 4 MVs to zeros */
327                     for (comp = 1; comp <= 4; comp++)
328                     {
329                         mot_mb[comp].x = 0;
330                         mot_mb[comp].y = 0;
331                     }
332 #ifdef PRINT_MV
333                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
334                     fprintf(fp_debug, "\n");
335                     fclose(fp_debug);
336 #endif
337                 }
338                 else /* *mode_mb = MODE_INTER;*/
339                 {
340                     if (video->encParams->HalfPel_Enabled)
341                     {
342 #ifdef _SAD_STAT
343                         num_HP_MB++;
344 #endif
345                         /* find half-pel resolution motion vector */
346                         FindHalfPelMB(video, cur, mot_mb, best_cand[0],
347                                       i << 4, j << 4, xh, yh, hp_guess);
348 #ifdef PRINT_MV
349                         fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
350                         fprintf(fp_debug, "(%d,%d), %d\n", mot_mb[0].x, mot_mb[0].y, mot_mb[0].sad);
351                         fclose(fp_debug);
352 #endif
353                         skip_halfpel_4mv = ((sad16 - mot_mb[0].sad) <= (MB_Nb >> 1) + 1);
354                         sad16 = mot_mb[0].sad;
355 
356 #ifndef NO_INTER4V
357                         if (use_4mv && !skip_halfpel_4mv)
358                         {
359                             /* Also decide 1MV or 4MV !!!!!!!!*/
360                             sad8 = FindHalfPelBlk(video, cur, mot_mb, sad16,
361                                                   best_cand, mode_mb, i << 4, j << 4, xh, yh, hp_mem4MV);
362 
363 #ifdef PRINT_MV
364                             fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
365                             fprintf(fp_debug, " (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) \n",
366                                     mot_mb[1].x, mot_mb[1].y, mot_mb[1].sad,
367                                     mot_mb[2].x, mot_mb[2].y, mot_mb[2].sad,
368                                     mot_mb[3].x, mot_mb[3].y, mot_mb[3].sad,
369                                     mot_mb[4].x, mot_mb[4].y, mot_mb[4].sad);
370                             fclose(fp_debug);
371 #endif
372                         }
373 #endif /* NO_INTER4V */
374                     }
375                     else    /* HalfPel_Enabled ==0  */
376                     {
377 #ifndef NO_INTER4V
378                         //if(sad16 < sad8-PREF_16_VEC)
379                         if (sad16 - PREF_16_VEC > sad8)
380                         {
381                             *mode_mb = MODE_INTER4V;
382                         }
383 #endif
384                     }
385 #if (ZERO_MV_PREF==2)   /* use mot_mb[7].sad as d0 computed in MBMotionSearch*/
386                     /******************************************************/
387                     if (mot_mb[7].sad - PREF_NULL_VEC < sad16 && mot_mb[7].sad - PREF_NULL_VEC < sad8)
388                     {
389                         mot_mb[0].sad = mot_mb[7].sad - PREF_NULL_VEC;
390                         mot_mb[0].x = mot_mb[0].y = 0;
391                         *mode_mb = MODE_INTER;
392                     }
393                     /******************************************************/
394 #endif
395                     if (*mode_mb == MODE_INTER)
396                     {
397                         if (mot_mb[0].x == 0 && mot_mb[0].y == 0)   /* use zero vector */
398                             mot_mb[0].sad += PREF_NULL_VEC; /* add back the bias */
399 
400                         mot_mb[1].sad = mot_mb[2].sad = mot_mb[3].sad = mot_mb[4].sad = (mot_mb[0].sad + 2) >> 2;
401                         mot_mb[1].x = mot_mb[2].x = mot_mb[3].x = mot_mb[4].x = mot_mb[0].x;
402                         mot_mb[1].y = mot_mb[2].y = mot_mb[3].y = mot_mb[4].y = mot_mb[0].y;
403 
404                     }
405                 }
406 
407                 /* find maximum magnitude */
408                 /* compute average SAD for rate control, 11/28/00 */
409                 if (*mode_mb == MODE_INTER)
410                 {
411 #ifdef PRINT_MV
412                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
413                     fprintf(fp_debug, "%d MODE_INTER\n", mbnum);
414                     fclose(fp_debug);
415 #endif
416                     totalSAD += mot_mb[0].sad;
417                     if (mot_mb[0].x > max_mag)
418                         max_mag = mot_mb[0].x;
419                     if (mot_mb[0].y > max_mag)
420                         max_mag = mot_mb[0].y;
421                     if (mot_mb[0].x < min_mag)
422                         min_mag = mot_mb[0].x;
423                     if (mot_mb[0].y < min_mag)
424                         min_mag = mot_mb[0].y;
425                 }
426                 else if (*mode_mb == MODE_INTER4V)
427                 {
428 #ifdef PRINT_MV
429                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
430                     fprintf(fp_debug, "%d MODE_INTER4V\n", mbnum);
431                     fclose(fp_debug);
432 #endif
433                     totalSAD += sad8;
434                     for (comp = 1; comp <= 4; comp++)
435                     {
436                         if (mot_mb[comp].x > max_mag)
437                             max_mag = mot_mb[comp].x;
438                         if (mot_mb[comp].y > max_mag)
439                             max_mag = mot_mb[comp].y;
440                         if (mot_mb[comp].x < min_mag)
441                             min_mag = mot_mb[comp].x;
442                         if (mot_mb[comp].y < min_mag)
443                             min_mag = mot_mb[comp].y;
444                     }
445                 }
446                 else    /* MODE_INTRA */
447                 {
448 #ifdef PRINT_MV
449                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
450                     fprintf(fp_debug, "%d MODE_INTRA\n", mbnum);
451                     fclose(fp_debug);
452 #endif
453                     totalSAD += mot_mb[0].sad;
454                 }
455                 mbnum += incr_i;
456                 offset += (incr_i << 4);
457 
458             }
459         }
460 
461         if (incr_i > 1 && numLoop) /* scene change on and first loop */
462         {
463             //if(numIntra > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
464             if (numIntra > (0.30*(totalMB / 2.0))) /* 15% of 50%MBs */
465             {
466                 /******** scene change detected *******************/
467                 currVop->predictionType = I_VOP;
468                 M4VENC_MEMSET(Mode, MODE_INTRA, sizeof(UChar)*totalMB); /* set this for MB level coding*/
469                 currVop->quantizer = video->encParams->InitQuantIvop[video->currLayer];
470 
471                 /* compute the SAV for rate control & fast DCT */
472                 totalSAD = 0;
473                 offset = 0;
474                 mbnum = 0;
475                 cur = currFrame->yChan;
476 
477                 for (j = 0; j < mbheight; j++)
478                 {
479                     for (i = 0; i < mbwidth; i++)
480                     {
481                         video->mbnum = mbnum;
482                         mot_mb = mot[mbnum];
483 
484 
485                         (*ComputeMBSum)(cur + (i << 4), width, mot_mb);
486                         totalSAD += mot_mb[0].sad;
487 
488                         mbnum++;
489                     }
490                     cur += (width << 4);
491                 }
492 
493                 video->sumMAD = (float)totalSAD / (float)NumPixelMB;
494                 ResetIntraUpdate(intraArray, totalMB);
495                 /* video->numVopsInGOP=0; 3/13/01 move it to vop.c*/
496 
497                 return ;
498             }
499         }
500         /******** no scene change, continue motion search **********************/
501         start_i = 0;
502         type_pred++; /* second pass */
503     }
504 
505     video->sumMAD = (float)totalSAD / (float)NumPixelMB;    /* avg SAD */
506 
507     /* find f_code , 10/27/2000 */
508     f_code_p = 1;
509     while ((max_mag >> (4 + f_code_p)) > 0)
510         f_code_p++;
511 
512     f_code_n = 1;
513     min_mag *= -1;
514     while ((min_mag - 1) >> (4 + f_code_n) > 0)
515         f_code_n++;
516 
517     currVop->fcodeForward = (f_code_p > f_code_n ? f_code_p : f_code_n);
518 
519 #ifdef HTFM
520     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
521     if (collect)
522     {
523         collect = 0;
524         UpdateHTFM(video, newvar, exp_lamda, &htfm_stat);
525     }
526     /*********************************/
527 #endif
528 
529     return ;
530 }
531 
532 
533 #ifdef HTFM
InitHTFM(VideoEncData * video,HTFM_Stat * htfm_stat,double * newvar,Int * collect)534 void InitHTFM(VideoEncData *video, HTFM_Stat *htfm_stat, double *newvar, Int *collect)
535 {
536     Int i;
537     Int lx = video->currVop->width; //  padding
538     Int lx2 = lx << 1;
539     Int lx3 = lx2 + lx;
540     Int rx = video->currVop->pitch;
541     Int rx2 = rx << 1;
542     Int rx3 = rx2 + rx;
543 
544     Int *offset, *offset2;
545 
546     /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
547     if (((Int)video->numVopsInGOP) % 30 == 1)
548     {
549 
550         *collect = 1;
551 
552         htfm_stat->countbreak = 0;
553         htfm_stat->abs_dif_mad_avg = 0;
554 
555         for (i = 0; i < 16; i++)
556         {
557             newvar[i] = 0.0;
558         }
559 //      video->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
560         video->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
561         video->functionPointer->SAD_MB_HalfPel[0] = NULL;
562         video->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
563         video->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
564         video->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
565         video->sad_extra_info = (void*)(htfm_stat);
566         offset = htfm_stat->offsetArray;
567         offset2 = htfm_stat->offsetRef;
568     }
569     else
570     {
571 //      video->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
572         video->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
573         video->functionPointer->SAD_MB_HalfPel[0] = NULL;
574         video->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
575         video->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
576         video->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
577         video->sad_extra_info = (void*)(video->nrmlz_th);
578         offset = video->nrmlz_th + 16;
579         offset2 = video->nrmlz_th + 32;
580     }
581 
582     offset[0] = 0;
583     offset[1] = lx2 + 2;
584     offset[2] = 2;
585     offset[3] = lx2;
586     offset[4] = lx + 1;
587     offset[5] = lx3 + 3;
588     offset[6] = lx + 3;
589     offset[7] = lx3 + 1;
590     offset[8] = lx;
591     offset[9] = lx3 + 2;
592     offset[10] = lx3 ;
593     offset[11] = lx + 2 ;
594     offset[12] = 1;
595     offset[13] = lx2 + 3;
596     offset[14] = lx2 + 1;
597     offset[15] = 3;
598 
599     offset2[0] = 0;
600     offset2[1] = rx2 + 2;
601     offset2[2] = 2;
602     offset2[3] = rx2;
603     offset2[4] = rx + 1;
604     offset2[5] = rx3 + 3;
605     offset2[6] = rx + 3;
606     offset2[7] = rx3 + 1;
607     offset2[8] = rx;
608     offset2[9] = rx3 + 2;
609     offset2[10] = rx3 ;
610     offset2[11] = rx + 2 ;
611     offset2[12] = 1;
612     offset2[13] = rx2 + 3;
613     offset2[14] = rx2 + 1;
614     offset2[15] = 3;
615 
616     return ;
617 }
618 
UpdateHTFM(VideoEncData * video,double * newvar,double * exp_lamda,HTFM_Stat * htfm_stat)619 void UpdateHTFM(VideoEncData *video, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
620 {
621     if (htfm_stat->countbreak == 0)
622         htfm_stat->countbreak = 1;
623 
624     newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
625 
626     if (newvar[0] < 0.001)
627     {
628         newvar[0] = 0.001; /* to prevent floating overflow */
629     }
630     exp_lamda[0] =  1 / (newvar[0] * 1.4142136);
631     exp_lamda[1] = exp_lamda[0] * 1.5825;
632     exp_lamda[2] = exp_lamda[0] * 2.1750;
633     exp_lamda[3] = exp_lamda[0] * 3.5065;
634     exp_lamda[4] = exp_lamda[0] * 3.1436;
635     exp_lamda[5] = exp_lamda[0] * 3.5315;
636     exp_lamda[6] = exp_lamda[0] * 3.7449;
637     exp_lamda[7] = exp_lamda[0] * 4.5854;
638     exp_lamda[8] = exp_lamda[0] * 4.6191;
639     exp_lamda[9] = exp_lamda[0] * 5.4041;
640     exp_lamda[10] = exp_lamda[0] * 6.5974;
641     exp_lamda[11] = exp_lamda[0] * 10.5341;
642     exp_lamda[12] = exp_lamda[0] * 10.0719;
643     exp_lamda[13] = exp_lamda[0] * 12.0516;
644     exp_lamda[14] = exp_lamda[0] * 15.4552;
645 
646     CalcThreshold(HTFM_Pf, exp_lamda, video->nrmlz_th);
647     return ;
648 }
649 
650 
CalcThreshold(double pf,double exp_lamda[],Int nrmlz_th[])651 void CalcThreshold(double pf, double exp_lamda[], Int nrmlz_th[])
652 {
653     Int i;
654     double temp[15];
655     //  printf("\nLamda: ");
656 
657     /* parametric PREMODELling */
658     for (i = 0; i < 15; i++)
659     {
660         //    printf("%g ",exp_lamda[i]);
661         if (pf < 0.5)
662             temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
663         else
664             temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
665     }
666 
667     nrmlz_th[15] = 0;
668     for (i = 0; i < 15; i++)        /* scale upto no.pixels */
669         nrmlz_th[i] = (Int)(temp[i] * ((i + 1) << 4) + 0.5);
670 
671     return ;
672 }
673 
HTFMPrepareCurMB(VideoEncData * video,HTFM_Stat * htfm_stat,UChar * cur)674 void    HTFMPrepareCurMB(VideoEncData *video, HTFM_Stat *htfm_stat, UChar *cur)
675 {
676     void* tmp = (void*)(video->currYMB);
677     ULong *htfmMB = (ULong*)tmp;
678     UChar *ptr, byte;
679     Int *offset;
680     Int i;
681     ULong word;
682     Int width = video->currVop->width;
683 
684     if (((Int)video->numVopsInGOP) % 30 == 1)
685     {
686         offset = htfm_stat->offsetArray;
687     }
688     else
689     {
690         offset = video->nrmlz_th + 16;
691     }
692 
693     for (i = 0; i < 16; i++)
694     {
695         ptr = cur + offset[i];
696         word = ptr[0];
697         byte = ptr[4];
698         word |= (byte << 8);
699         byte = ptr[8];
700         word |= (byte << 16);
701         byte = ptr[12];
702         word |= (byte << 24);
703         *htfmMB++ = word;
704 
705         word = *(ptr += (width << 2));
706         byte = ptr[4];
707         word |= (byte << 8);
708         byte = ptr[8];
709         word |= (byte << 16);
710         byte = ptr[12];
711         word |= (byte << 24);
712         *htfmMB++ = word;
713 
714         word = *(ptr += (width << 2));
715         byte = ptr[4];
716         word |= (byte << 8);
717         byte = ptr[8];
718         word |= (byte << 16);
719         byte = ptr[12];
720         word |= (byte << 24);
721         *htfmMB++ = word;
722 
723         word = *(ptr += (width << 2));
724         byte = ptr[4];
725         word |= (byte << 8);
726         byte = ptr[8];
727         word |= (byte << 16);
728         byte = ptr[12];
729         word |= (byte << 24);
730         *htfmMB++ = word;
731     }
732 
733     return ;
734 }
735 
736 
737 #endif
738 
PrepareCurMB(VideoEncData * video,UChar * cur)739 void    PrepareCurMB(VideoEncData *video, UChar *cur)
740 {
741     void* tmp = (void*)(video->currYMB);
742     ULong *currYMB = (ULong*)tmp;
743     Int i;
744     Int width = video->currVop->width;
745 
746     cur -= width;
747 
748     for (i = 0; i < 16; i++)
749     {
750         *currYMB++ = *((ULong*)(cur += width));
751         *currYMB++ = *((ULong*)(cur + 4));
752         *currYMB++ = *((ULong*)(cur + 8));
753         *currYMB++ = *((ULong*)(cur + 12));
754     }
755 
756     return ;
757 }
758 
759 
760 /*==================================================================
761     Function:   MBMotionSearch
762     Date:       09/06/2000
763     Purpose:    Perform motion estimation for a macroblock.
764                 Find 1MV and 4MVs in half-pels resolutions.
765                 Using ST1 algorithm provided by Chalidabhongse and Kuo
766                 CSVT March'98.
767 
768 ==================================================================*/
769 
MBMotionSearch(VideoEncData * video,UChar * cur,UChar * best_cand[],Int i0,Int j0,Int type_pred,Int FS_en,Int * hp_guess)770 void MBMotionSearch(VideoEncData *video, UChar *cur, UChar *best_cand[],
771                     Int i0, Int j0, Int type_pred, Int FS_en, Int *hp_guess)
772 {
773     Vol *currVol = video->vol[video->currLayer];
774     UChar *ref, *cand, *ncand = NULL, *cur8;
775     void *extra_info = video->sad_extra_info;
776     Int mbnum = video->mbnum;
777     Int width = video->currVop->width; /* 6/12/01, must be multiple of 16 */
778     Int height = video->currVop->height;
779     MOT **mot = video->mot;
780     UChar use_4mv = video->encParams->MV8x8_Enabled;
781     UChar h263_mode = video->encParams->H263_Enabled;
782     Int(*SAD_Macroblock)(UChar*, UChar*, Int, void*) = video->functionPointer->SAD_Macroblock;
783     Int(*SAD_Block)(UChar*, UChar*, Int, Int, void*) = video->functionPointer->SAD_Block;
784     VideoEncParams *encParams = video->encParams;
785     Int range = encParams->SearchRange;
786 
787     Int lx = video->currVop->pitch; /* padding */
788     Int comp;
789     Int i, j, imin, jmin, ilow, ihigh, jlow, jhigh, iorg, jorg;
790     Int d, dmin, dn[9];
791 #if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
792     Int d0;
793 #endif
794     Int k;
795     Int mvx[5], mvy[5], imin0, jmin0;
796     Int num_can, center_again;
797     Int last_loc, new_loc = 0;
798     Int step, max_step = range >> 1;
799     Int next;
800 
801     ref = video->forwardRefVop->yChan; /* origin of actual frame */
802 
803     cur = video->currYMB; /* use smaller memory space for current MB */
804 
805     /*  find limit of the search (adjusting search range)*/
806 
807     if (!h263_mode)
808     {
809         ilow = i0 - range;
810         if (ilow < -15)
811             ilow = -15;
812         ihigh = i0 + range - 1;
813         if (ihigh > width - 1)
814             ihigh = width - 1;
815         jlow = j0 - range;
816         if (jlow < -15)
817             jlow = -15;
818         jhigh = j0 + range - 1;
819         if (jhigh > height - 1)
820             jhigh = height - 1;
821     }
822     else
823     {
824         ilow = i0 - range;
825         if (ilow < 0)
826             ilow = 0;
827         ihigh = i0 + range - 1;
828         if (ihigh > width - 16)
829             ihigh = width - 16;
830         jlow = j0 - range;
831         if (jlow < 0)
832             jlow = 0;
833         jhigh = j0 + range - 1;
834         if (jhigh > height - 16)
835             jhigh = height - 16;
836     }
837 
838     imin = i0;
839     jmin = j0; /* needed for fullsearch */
840     ncand = ref + imin + jmin * lx;
841 
842     /* for first row of MB, fullsearch can be used */
843     if (FS_en)
844     {
845         *hp_guess = 0; /* no guess for fast half-pel */
846 
847         dmin =  fullsearch(video, currVol, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh);
848 
849         ncand = ref + imin + jmin * lx;
850 
851         mot[mbnum][0].sad = dmin;
852         mot[mbnum][0].x = (imin - i0) << 1;
853         mot[mbnum][0].y = (jmin - j0) << 1;
854         imin0 = imin << 1;  /* 16x16 MV in half-pel resolution */
855         jmin0 = jmin << 1;
856         best_cand[0] = ncand;
857     }
858     else
859     {   /* 4/7/01, modified this testing for fullsearch the top row to only upto (0,3) MB */
860         /*            upto 30% complexity saving with the same complexity */
861         if (video->forwardRefVop->predictionType == I_VOP && j0 == 0 && i0 <= 64 && type_pred != 1)
862         {
863             *hp_guess = 0; /* no guess for fast half-pel */
864             dmin =  fullsearch(video, currVol, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh);
865             ncand = ref + imin + jmin * lx;
866         }
867         else
868         {
869             /************** initialize candidate **************************/
870             /* find initial motion vector */
871             CandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, video, type_pred);
872 
873             dmin = 65535;
874 
875             /* check if all are equal */
876             if (num_can == ALL_CAND_EQUAL)
877             {
878                 i = i0 + mvx[0];
879                 j = j0 + mvy[0];
880 
881                 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
882                 {
883                     cand = ref + i + j * lx;
884 
885                     d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
886 
887                     if (d < dmin)
888                     {
889                         dmin = d;
890                         imin = i;
891                         jmin = j;
892                         ncand = cand;
893                     }
894                 }
895             }
896             else
897             {
898                 /************** evaluate unique candidates **********************/
899                 for (k = 0; k < num_can; k++)
900                 {
901                     i = i0 + mvx[k];
902                     j = j0 + mvy[k];
903 
904                     if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
905                     {
906                         cand = ref + i + j * lx;
907                         d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
908 
909                         if (d < dmin)
910                         {
911                             dmin = d;
912                             imin = i;
913                             jmin = j;
914                             ncand = cand;
915                         }
916                         else if ((d == dmin) && PV_ABS(mvx[k]) + PV_ABS(mvy[k]) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
917                         {
918                             dmin = d;
919                             imin = i;
920                             jmin = j;
921                             ncand = cand;
922                         }
923                     }
924                 }
925             }
926             if (num_can == 0 || dmin == 65535) /* no candidate selected */
927             {
928                 ncand = ref + i0 + j0 * lx; /* use (0,0) MV as initial value */
929                 mot[mbnum][7].sad = dmin = (*SAD_Macroblock)(ncand, cur, (65535 << 16) | lx, extra_info);
930 #if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
931                 d0 = dmin;
932 #endif
933                 imin = i0;
934                 jmin = j0;
935             }
936 
937 #if (ZERO_MV_PREF==0)  /*  COMPUTE ZERO VECTOR FIRST !!!!!*/
938             dmin -= PREF_NULL_VEC;
939 #endif
940 
941             /******************* local refinement ***************************/
942             center_again = 0;
943             last_loc = new_loc = 0;
944             //          ncand = ref + jmin*lx + imin;  /* center of the search */
945             step = 0;
946             dn[0] = dmin;
947             while (!center_again && step <= max_step)
948             {
949 
950                 MoveNeighborSAD(dn, last_loc);
951 
952                 center_again = 1;
953                 i = imin;
954                 j = jmin - 1;
955                 cand = ref + i + j * lx;
956 
957                 /*  starting from [0,-1] */
958                 /* spiral check one step at a time*/
959                 for (k = 2; k <= 8; k += 2)
960                 {
961                     if (!tab_exclude[last_loc][k]) /* exclude last step computation */
962                     {       /* not already computed */
963                         if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
964                         {
965                             d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
966                             dn[k] = d; /* keep it for half pel use */
967 
968                             if (d < dmin)
969                             {
970                                 ncand = cand;
971                                 dmin = d;
972                                 imin = i;
973                                 jmin = j;
974                                 center_again = 0;
975                                 new_loc = k;
976                             }
977                             else if ((d == dmin) && PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
978                             {
979                                 ncand = cand;
980                                 imin = i;
981                                 jmin = j;
982                                 center_again = 0;
983                                 new_loc = k;
984                             }
985                         }
986                     }
987                     if (k == 8)  /* end side search*/
988                     {
989                         if (!center_again)
990                         {
991                             k = -1; /* start diagonal search */
992                             cand -= lx;
993                             j--;
994                         }
995                     }
996                     else
997                     {
998                         next = refine_next[k][0];
999                         i += next;
1000                         cand += next;
1001                         next = refine_next[k][1];
1002                         j += next;
1003                         cand += lx * next;
1004                     }
1005                 }
1006                 last_loc = new_loc;
1007                 step ++;
1008             }
1009             if (!center_again)
1010                 MoveNeighborSAD(dn, last_loc);
1011 
1012             *hp_guess = FindMin(dn);
1013 
1014         }
1015 
1016 #if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
1017         if (d0 - PREF_NULL_VEC < dmin)
1018         {
1019             ncand = ref + i0 + j0 * lx;
1020             dmin = d0;
1021             imin = i0;
1022             jmin = j0;
1023         }
1024 #endif
1025         mot[mbnum][0].sad = dmin;
1026         mot[mbnum][0].x = (imin - i0) << 1;
1027         mot[mbnum][0].y = (jmin - j0) << 1;
1028         imin0 = imin << 1;  /* 16x16 MV in half-pel resolution */
1029         jmin0 = jmin << 1;
1030         best_cand[0] = ncand;
1031     }
1032     /* imin and jmin is the best 1 MV */
1033 #ifndef NO_INTER4V
1034     /*******************  Find 4 motion vectors ****************************/
1035     if (use_4mv && !h263_mode)
1036     {
1037 #ifdef _SAD_STAT
1038         num_Blk += 4;
1039 #endif
1040         /* starting from the best 1MV */
1041         //offset = imin + jmin*lx;
1042         iorg = i0;
1043         jorg = j0;
1044 
1045         for (comp = 0; comp < 4; comp++)
1046         {
1047             i0 = iorg + ((comp & 1) << 3);
1048             j0 = jorg + ((comp & 2) << 2);
1049 
1050             imin = (imin0 >> 1) + ((comp & 1) << 3);    /* starting point from 16x16 MV */
1051             jmin = (jmin0 >> 1) + ((comp & 2) << 2);
1052             ncand = ref + imin + jmin * lx;
1053 
1054             cur8 = cur + ((comp & 1) << 3) + (((comp & 2) << 2) << 4) ; /* 11/30/05, smaller cache */
1055 
1056             /*  find limit of the search (adjusting search range)*/
1057             ilow = i0 - range;
1058             ihigh = i0 + range - 1 ;/* 4/9/01 */
1059             if (ilow < -15)
1060                 ilow = -15;
1061             if (ihigh > width - 1)
1062                 ihigh = width - 1;
1063             jlow = j0 - range;
1064             jhigh = j0 + range - 1 ;/* 4/9/01 */
1065             if (jlow < -15)
1066                 jlow = -15;
1067             if (jhigh > height - 1)
1068                 jhigh = height - 1;
1069 
1070             SAD_Block = video->functionPointer->SAD_Block;
1071 
1072             if (FS_en)  /* fullsearch enable, center around 16x16 MV */
1073             {
1074                 dmin =  fullsearchBlk(video, currVol, ncand, cur8, &imin, &jmin, ilow, ihigh, jlow, jhigh, range);
1075                 ncand = ref + imin + jmin * lx;
1076 
1077                 mot[mbnum][comp+1].sad = dmin;
1078                 mot[mbnum][comp+1].x = (imin - i0) << 1;
1079                 mot[mbnum][comp+1].y = (jmin - j0) << 1;
1080                 best_cand[comp+1] = ncand;
1081             }
1082             else    /* no fullsearch, do local search */
1083             {
1084                 /* starting point from 16x16 */
1085                 dmin = (*SAD_Block)(ncand, cur8, 65536, lx, extra_info);
1086 
1087                 /******************* local refinement ***************************/
1088                 center_again = 0;
1089                 last_loc = 0;
1090 
1091                 while (!center_again)
1092                 {
1093                     center_again = 1;
1094                     i = imin;
1095                     j = jmin - 1;
1096                     cand = ref + i + j * lx;
1097 
1098                     /*  starting from [0,-1] */
1099                     /* spiral check one step at a time*/
1100                     for (k = 2; k <= 8; k += 2)
1101                     {
1102                         if (!tab_exclude[last_loc][k]) /* exclude last step computation */
1103                         {       /* not already computed */
1104                             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1105                             {
1106                                 d = (*SAD_Block)(cand, cur8, dmin, lx, extra_info);
1107 
1108                                 if (d < dmin)
1109                                 {
1110                                     ncand = cand;
1111                                     dmin = d;
1112                                     imin = i;
1113                                     jmin = j;
1114                                     center_again = 0;
1115                                     new_loc = k;
1116                                 }
1117                                 else if ((d == dmin) &&
1118                                          PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
1119                                 {
1120                                     ncand = cand;
1121                                     imin = i;
1122                                     jmin = j;
1123                                     center_again = 0;
1124                                     new_loc = k;
1125                                 }
1126                             }
1127                         }
1128                         if (k == 8)  /* end side search*/
1129                         {
1130                             if (!center_again)
1131                             {
1132                                 k = -1; /* start diagonal search */
1133                                 if (j <= height - 1 && j > 0)   cand -= lx;
1134                                 j--;
1135                             }
1136                         }
1137                         else
1138                         {
1139                             next = refine_next[k][0];
1140                             cand += next;
1141                             i += next;
1142                             next = refine_next[k][1];
1143                             cand += lx * next;
1144                             j += next;
1145                         }
1146                     }
1147                     last_loc = new_loc;
1148                 }
1149                 mot[mbnum][comp+1].sad = dmin;
1150                 mot[mbnum][comp+1].x = (imin - i0) << 1;
1151                 mot[mbnum][comp+1].y = (jmin - j0) << 1;
1152                 best_cand[comp+1] = ncand;
1153             }
1154             /********************************************/
1155         }
1156     }
1157     else
1158 #endif  /* NO_INTER4V */
1159     {
1160         mot[mbnum][1].sad = mot[mbnum][2].sad = mot[mbnum][3].sad = mot[mbnum][4].sad = (dmin + 2) >> 2;
1161         mot[mbnum][1].x = mot[mbnum][2].x = mot[mbnum][3].x = mot[mbnum][4].x = mot[mbnum][0].x;
1162         mot[mbnum][1].y = mot[mbnum][2].y = mot[mbnum][3].y = mot[mbnum][4].y = mot[mbnum][0].y;
1163         best_cand[1] = best_cand[2] = best_cand[3] = best_cand[4] = ncand;
1164 
1165     }
1166     return ;
1167 }
1168 
1169 
1170 /*===============================================================================
1171     Function:   fullsearch
1172     Date:       09/16/2000
1173     Purpose:    Perform full-search motion estimation over the range of search
1174                 region in a spiral-outward manner.
1175     Input/Output:   VideoEncData, current Vol, previou Vop, pointer to the left corner of
1176                 current VOP, current coord (also output), boundaries.
1177 ===============================================================================*/
1178 
fullsearch(VideoEncData * video,Vol * currVol,UChar * prev,UChar * cur,Int * imin,Int * jmin,Int ilow,Int ihigh,Int jlow,Int jhigh)1179 Int fullsearch(VideoEncData *video, Vol *currVol, UChar *prev, UChar *cur,
1180                Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh)
1181 {
1182     Int range = video->encParams->SearchRange;
1183     UChar *cand;
1184     Int i, j, k, l;
1185     Int d, dmin;
1186     Int i0 = *imin; /* current position */
1187     Int j0 = *jmin;
1188     Int(*SAD_Macroblock)(UChar*, UChar*, Int, void*) = video->functionPointer->SAD_Macroblock;
1189     void *extra_info = video->sad_extra_info;
1190 //  UChar h263_mode = video->encParams->H263_Enabled;
1191     Int lx = video->currVop->pitch; /* with padding */
1192 
1193     Int offset = i0 + j0 * lx;
1194 
1195     OSCL_UNUSED_ARG(currVol);
1196 
1197     cand = prev + offset;
1198 
1199     dmin  = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info) - PREF_NULL_VEC;
1200 
1201     /* perform spiral search */
1202     for (k = 1; k <= range; k++)
1203     {
1204 
1205         i = i0 - k;
1206         j = j0 - k;
1207 
1208         cand = prev + i + j * lx;
1209 
1210         for (l = 0; l < 8*k; l++)
1211         {
1212             /* no need for boundary checking again */
1213             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1214             {
1215                 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
1216 
1217                 if (d < dmin)
1218                 {
1219                     dmin = d;
1220                     *imin = i;
1221                     *jmin = j;
1222                 }
1223                 else if ((d == dmin) && PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - *imin) + PV_ABS(j0 - *jmin))
1224                 {
1225                     dmin = d;
1226                     *imin = i;
1227                     *jmin = j;
1228                 }
1229             }
1230 
1231             if (l < (k << 1))
1232             {
1233                 i++;
1234                 cand++;
1235             }
1236             else if (l < (k << 2))
1237             {
1238                 j++;
1239                 cand += lx;
1240             }
1241             else if (l < ((k << 2) + (k << 1)))
1242             {
1243                 i--;
1244                 cand--;
1245             }
1246             else
1247             {
1248                 j--;
1249                 cand -= lx;
1250             }
1251         }
1252     }
1253 
1254     return dmin;
1255 }
1256 
1257 #ifndef NO_INTER4V
1258 /*===============================================================================
1259     Function:   fullsearchBlk
1260     Date:       01/9/2001
1261     Purpose:    Perform full-search motion estimation of an 8x8 block over the range
1262                 of search region in a spiral-outward manner centered at the 16x16 MV.
1263     Input/Output:   VideoEncData, MB coordinate, pointer to the initial MV on the
1264                 reference, pointer to coor of current block, search range.
1265 ===============================================================================*/
fullsearchBlk(VideoEncData * video,Vol * currVol,UChar * cent,UChar * cur,Int * imin,Int * jmin,Int ilow,Int ihigh,Int jlow,Int jhigh,Int range)1266 Int fullsearchBlk(VideoEncData *video, Vol *currVol, UChar *cent, UChar *cur,
1267                   Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh, Int range)
1268 {
1269     UChar *cand, *ref;
1270     Int i, j, k, l, istart, jstart;
1271     Int d, dmin;
1272     Int lx = video->currVop->pitch; /* with padding */
1273     Int(*SAD_Block)(UChar*, UChar*, Int, Int, void*) = video->functionPointer->SAD_Block;
1274     void *extra_info = video->sad_extra_info;
1275 
1276     OSCL_UNUSED_ARG(currVol);
1277 
1278     /* starting point centered at 16x16 MV */
1279     ref = cent;
1280     istart = *imin;
1281     jstart = *jmin;
1282 
1283     dmin = (*SAD_Block)(ref, cur, 65536, lx, (void*)extra_info);
1284 
1285     cand = ref;
1286     /* perform spiral search */
1287     for (k = 1; k <= range; k++)
1288     {
1289 
1290         i = istart - k;
1291         j = jstart - k;
1292         cand -= (lx + 1);  /* candidate region */
1293 
1294         for (l = 0; l < 8*k; l++)
1295         {
1296             /* no need for boundary checking again */
1297             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1298             {
1299                 d = (*SAD_Block)(cand, cur, dmin, lx, (void*)extra_info);
1300 
1301                 if (d < dmin)
1302                 {
1303                     dmin = d;
1304                     *imin = i;
1305                     *jmin = j;
1306                 }
1307                 else if ((d == dmin) &&
1308                          PV_ABS(istart - i) + PV_ABS(jstart - j) < PV_ABS(istart - *imin) + PV_ABS(jstart - *jmin))
1309                 {
1310                     dmin = d;
1311                     *imin = i;
1312                     *jmin = j;
1313                 }
1314             }
1315 
1316             if (l < (k << 1))
1317             {
1318                 i++;
1319                 cand++;
1320             }
1321             else if (l < (k << 2))
1322             {
1323                 j++;
1324                 cand += lx;
1325             }
1326             else if (l < ((k << 2) + (k << 1)))
1327             {
1328                 i--;
1329                 cand--;
1330             }
1331             else
1332             {
1333                 j--;
1334                 cand -= lx;
1335             }
1336         }
1337     }
1338 
1339     return dmin;
1340 }
1341 #endif /* NO_INTER4V */
1342 
1343 /*===============================================================================
1344     Function:   CandidateSelection
1345     Date:       09/16/2000
1346     Purpose:    Fill up the list of candidate using spatio-temporal correlation
1347                 among neighboring blocks.
1348     Input/Output:   type_pred = 0: first pass, 1: second pass, or no SCD
1349     Modified:    09/23/01, get rid of redundant candidates before passing back.
1350 ===============================================================================*/
1351 
CandidateSelection(Int * mvx,Int * mvy,Int * num_can,Int imb,Int jmb,VideoEncData * video,Int type_pred)1352 void CandidateSelection(Int *mvx, Int *mvy, Int *num_can, Int imb, Int jmb,
1353                         VideoEncData *video, Int type_pred)
1354 {
1355     MOT **mot = video->mot;
1356     MOT *pmot;
1357     Int mbnum = video->mbnum;
1358     Vol *currVol = video->vol[video->currLayer];
1359     Int mbwidth = currVol->nMBPerRow;
1360     Int mbheight = currVol->nMBPerCol;
1361     Int i, j, same, num1;
1362 
1363     *num_can = 0;
1364 
1365     if (video->forwardRefVop->predictionType == P_VOP)
1366     {
1367         /* Spatio-Temporal Candidate (five candidates) */
1368         if (type_pred == 0) /* first pass */
1369         {
1370             pmot = &mot[mbnum][0]; /* same coordinate previous frame */
1371             mvx[(*num_can)] = (pmot->x) >> 1;
1372             mvy[(*num_can)++] = (pmot->y) >> 1;
1373             if (imb >= (mbwidth >> 1) && imb > 0)  /*left neighbor previous frame */
1374             {
1375                 pmot = &mot[mbnum-1][0];
1376                 mvx[(*num_can)] = (pmot->x) >> 1;
1377                 mvy[(*num_can)++] = (pmot->y) >> 1;
1378             }
1379             else if (imb + 1 < mbwidth)   /*right neighbor previous frame */
1380             {
1381                 pmot = &mot[mbnum+1][0];
1382                 mvx[(*num_can)] = (pmot->x) >> 1;
1383                 mvy[(*num_can)++] = (pmot->y) >> 1;
1384             }
1385 
1386             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1387             {
1388                 pmot = &mot[mbnum+mbwidth][0];
1389                 mvx[(*num_can)] = (pmot->x) >> 1;
1390                 mvy[(*num_can)++] = (pmot->y) >> 1;
1391             }
1392             else if (jmb > 0)   /*upper neighbor previous frame */
1393             {
1394                 pmot = &mot[mbnum-mbwidth][0];
1395                 mvx[(*num_can)] = (pmot->x) >> 1;
1396                 mvy[(*num_can)++] = (pmot->y) >> 1;
1397             }
1398 
1399             if (imb > 0 && jmb > 0)  /* upper-left neighbor current frame*/
1400             {
1401                 pmot = &mot[mbnum-mbwidth-1][0];
1402                 mvx[(*num_can)] = (pmot->x) >> 1;
1403                 mvy[(*num_can)++] = (pmot->y) >> 1;
1404             }
1405             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor current frame*/
1406             {
1407                 pmot = &mot[mbnum-mbwidth+1][0];
1408                 mvx[(*num_can)] = (pmot->x) >> 1;
1409                 mvy[(*num_can)++] = (pmot->y) >> 1;
1410             }
1411         }
1412         else    /* second pass */
1413             /* original ST1 algorithm */
1414         {
1415             pmot = &mot[mbnum][0]; /* same coordinate previous frame */
1416             mvx[(*num_can)] = (pmot->x) >> 1;
1417             mvy[(*num_can)++] = (pmot->y) >> 1;
1418 
1419             if (imb > 0)  /*left neighbor current frame */
1420             {
1421                 pmot = &mot[mbnum-1][0];
1422                 mvx[(*num_can)] = (pmot->x) >> 1;
1423                 mvy[(*num_can)++] = (pmot->y) >> 1;
1424             }
1425             if (jmb > 0)  /*upper neighbor current frame */
1426             {
1427                 pmot = &mot[mbnum-mbwidth][0];
1428                 mvx[(*num_can)] = (pmot->x) >> 1;
1429                 mvy[(*num_can)++] = (pmot->y) >> 1;
1430             }
1431             if (imb < mbwidth - 1)  /*right neighbor previous frame */
1432             {
1433                 pmot = &mot[mbnum+1][0];
1434                 mvx[(*num_can)] = (pmot->x) >> 1;
1435                 mvy[(*num_can)++] = (pmot->y) >> 1;
1436             }
1437             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1438             {
1439                 pmot = &mot[mbnum+mbwidth][0];
1440                 mvx[(*num_can)] = (pmot->x) >> 1;
1441                 mvy[(*num_can)++] = (pmot->y) >> 1;
1442             }
1443         }
1444     }
1445     else  /* only Spatial Candidate (four candidates)*/
1446     {
1447         if (type_pred == 0) /*first pass*/
1448         {
1449             if (imb > 1)  /* neighbor two blocks away to the left */
1450             {
1451                 pmot = &mot[mbnum-2][0];
1452                 mvx[(*num_can)] = (pmot->x) >> 1;
1453                 mvy[(*num_can)++] = (pmot->y) >> 1;
1454             }
1455             if (imb > 0 && jmb > 0)  /* upper-left neighbor */
1456             {
1457                 pmot = &mot[mbnum-mbwidth-1][0];
1458                 mvx[(*num_can)] = (pmot->x) >> 1;
1459                 mvy[(*num_can)++] = (pmot->y) >> 1;
1460             }
1461             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor */
1462             {
1463                 pmot = &mot[mbnum-mbwidth+1][0];
1464                 mvx[(*num_can)] = (pmot->x) >> 1;
1465                 mvy[(*num_can)++] = (pmot->y) >> 1;
1466             }
1467         }
1468 //#ifdef SCENE_CHANGE_DETECTION
1469         /* second pass (ST2 algorithm)*/
1470         else if (type_pred == 1) /* 4/7/01 */
1471         {
1472             if (imb > 0)  /*left neighbor current frame */
1473             {
1474                 pmot = &mot[mbnum-1][0];
1475                 mvx[(*num_can)] = (pmot->x) >> 1;
1476                 mvy[(*num_can)++] = (pmot->y) >> 1;
1477             }
1478             if (jmb > 0)  /*upper neighbor current frame */
1479             {
1480                 pmot = &mot[mbnum-mbwidth][0];
1481                 mvx[(*num_can)] = (pmot->x) >> 1;
1482                 mvy[(*num_can)++] = (pmot->y) >> 1;
1483             }
1484             if (imb < mbwidth - 1)  /*right neighbor current frame */
1485             {
1486                 pmot = &mot[mbnum+1][0];
1487                 mvx[(*num_can)] = (pmot->x) >> 1;
1488                 mvy[(*num_can)++] = (pmot->y) >> 1;
1489             }
1490             if (jmb < mbheight - 1)  /*bottom neighbor current frame */
1491             {
1492                 pmot = &mot[mbnum+mbwidth][0];
1493                 mvx[(*num_can)] = (pmot->x) >> 1;
1494                 mvy[(*num_can)++] = (pmot->y) >> 1;
1495             }
1496         }
1497 //#else
1498         else /* original ST1 algorithm */
1499         {
1500             if (imb > 0)  /*left neighbor current frame */
1501             {
1502                 pmot = &mot[mbnum-1][0];
1503                 mvx[(*num_can)] = (pmot->x) >> 1;
1504                 mvy[(*num_can)++] = (pmot->y) >> 1;
1505 
1506                 if (jmb > 0)  /*upper-left neighbor current frame */
1507                 {
1508                     pmot = &mot[mbnum-mbwidth-1][0];
1509                     mvx[(*num_can)] = (pmot->x) >> 1;
1510                     mvy[(*num_can)++] = (pmot->y) >> 1;
1511                 }
1512 
1513             }
1514             if (jmb > 0)  /*upper neighbor current frame */
1515             {
1516                 pmot = &mot[mbnum-mbwidth][0];
1517                 mvx[(*num_can)] = (pmot->x) >> 1;
1518                 mvy[(*num_can)++] = (pmot->y) >> 1;
1519 
1520                 if (imb < mbheight - 1)  /*upper-right neighbor current frame */
1521                 {
1522                     pmot = &mot[mbnum-mbwidth+1][0];
1523                     mvx[(*num_can)] = (pmot->x) >> 1;
1524                     mvy[(*num_can)++] = (pmot->y) >> 1;
1525                 }
1526             }
1527         }
1528 //#endif
1529     }
1530 
1531     /* 3/23/01, remove redundant candidate (possible k-mean) */
1532     num1 = *num_can;
1533     *num_can = 1;
1534     for (i = 1; i < num1; i++)
1535     {
1536         same = 0;
1537         j = 0;
1538         while (!same && j < *num_can)
1539         {
1540 #if (CANDIDATE_DISTANCE==0)
1541             if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
1542 #else
1543             // modified k-mean, 3/24/01, shouldn't be greater than 3
1544             if (PV_ABS(mvx[i] - mvx[j]) + PV_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
1545 #endif
1546                 same = 1;
1547             j++;
1548         }
1549         if (!same)
1550         {
1551             mvx[*num_can] = mvx[i];
1552             mvy[*num_can] = mvy[i];
1553             (*num_can)++;
1554         }
1555     }
1556 
1557 #ifdef _SAD_STAT
1558     num_cand += (*num_can);
1559 #endif
1560 
1561     if (num1 == 5 && *num_can == 1)
1562         *num_can = ALL_CAND_EQUAL; /* all are equal */
1563 
1564     return ;
1565 }
1566 
1567 /*===========================================================================
1568     Function:   RasterIntraUpdate
1569     Date:       2/26/01
1570     Purpose:    To raster-scan assign INTRA-update .
1571                 N macroblocks are updated (also was programmable).
1572 ===========================================================================*/
RasterIntraUpdate(UChar * intraArray,UChar * Mode,Int totalMB,Int numRefresh)1573 void RasterIntraUpdate(UChar *intraArray, UChar *Mode, Int totalMB, Int numRefresh)
1574 {
1575     Int indx, i;
1576 
1577     /* find the last refresh MB */
1578     indx = 0;
1579     while (indx < totalMB && intraArray[indx] == 1)
1580         indx++;
1581 
1582     /* add more  */
1583     for (i = 0; i < numRefresh && indx < totalMB; i++)
1584     {
1585         Mode[indx] = MODE_INTRA;
1586         intraArray[indx++] = 1;
1587     }
1588 
1589     /* if read the end of frame, reset and loop around */
1590     if (indx >= totalMB - 1)
1591     {
1592         ResetIntraUpdate(intraArray, totalMB);
1593         indx = 0;
1594         while (i < numRefresh && indx < totalMB)
1595         {
1596             intraArray[indx] = 1;
1597             Mode[indx++] = MODE_INTRA;
1598             i++;
1599         }
1600     }
1601 
1602     return ;
1603 }
1604 
1605 /*===========================================================================
1606     Function:   ResetIntraUpdate
1607     Date:       11/28/00
1608     Purpose:    Reset already intra updated flags to all zero
1609 ===========================================================================*/
1610 
ResetIntraUpdate(UChar * intraArray,Int totalMB)1611 void ResetIntraUpdate(UChar *intraArray, Int totalMB)
1612 {
1613     M4VENC_MEMSET(intraArray, 0, sizeof(UChar)*totalMB);
1614     return ;
1615 }
1616 
1617 /*===========================================================================
1618     Function:   ResetIntraUpdateRegion
1619     Date:       12/1/00
1620     Purpose:    Reset already intra updated flags in one region to all zero
1621 ===========================================================================*/
ResetIntraUpdateRegion(UChar * intraArray,Int start_i,Int rwidth,Int start_j,Int rheight,Int mbwidth,Int mbheight)1622 void ResetIntraUpdateRegion(UChar *intraArray, Int start_i, Int rwidth,
1623                             Int start_j, Int rheight, Int mbwidth, Int mbheight)
1624 {
1625     Int indx, j;
1626 
1627     if (start_i + rwidth >= mbwidth)
1628         rwidth = mbwidth - start_i;
1629     if (start_j + rheight >= mbheight)
1630         rheight = mbheight - start_j;
1631 
1632     for (j = start_j; j < start_j + rheight; j++)
1633     {
1634         indx = j * mbwidth;
1635         M4VENC_MEMSET(intraArray + indx + start_i, 0, sizeof(UChar)*rwidth);
1636     }
1637 
1638     return ;
1639 }
1640 
1641 /*************************************************************
1642     Function:   MoveNeighborSAD
1643     Date:       3/27/01
1644     Purpose:    Move neighboring SAD around when center has shifted
1645 *************************************************************/
1646 
MoveNeighborSAD(Int dn[],Int new_loc)1647 void MoveNeighborSAD(Int dn[], Int new_loc)
1648 {
1649     Int tmp[9];
1650     tmp[0] = dn[0];
1651     tmp[1] = dn[1];
1652     tmp[2] = dn[2];
1653     tmp[3] = dn[3];
1654     tmp[4] = dn[4];
1655     tmp[5] = dn[5];
1656     tmp[6] = dn[6];
1657     tmp[7] = dn[7];
1658     tmp[8] = dn[8];
1659     dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
1660 
1661     switch (new_loc)
1662     {
1663         case 0:
1664             break;
1665         case 1:
1666             dn[4] = tmp[2];
1667             dn[5] = tmp[0];
1668             dn[6] = tmp[8];
1669             break;
1670         case 2:
1671             dn[4] = tmp[3];
1672             dn[5] = tmp[4];
1673             dn[6] = tmp[0];
1674             dn[7] = tmp[8];
1675             dn[8] = tmp[1];
1676             break;
1677         case 3:
1678             dn[6] = tmp[4];
1679             dn[7] = tmp[0];
1680             dn[8] = tmp[2];
1681             break;
1682         case 4:
1683             dn[1] = tmp[2];
1684             dn[2] = tmp[3];
1685             dn[6] = tmp[5];
1686             dn[7] = tmp[6];
1687             dn[8] = tmp[0];
1688             break;
1689         case 5:
1690             dn[1] = tmp[0];
1691             dn[2] = tmp[4];
1692             dn[8] = tmp[6];
1693             break;
1694         case 6:
1695             dn[1] = tmp[8];
1696             dn[2] = tmp[0];
1697             dn[3] = tmp[4];
1698             dn[4] = tmp[5];
1699             dn[8] = tmp[7];
1700             break;
1701         case 7:
1702             dn[2] = tmp[8];
1703             dn[3] = tmp[0];
1704             dn[4] = tmp[6];
1705             break;
1706         case 8:
1707             dn[2] = tmp[1];
1708             dn[3] = tmp[2];
1709             dn[4] = tmp[0];
1710             dn[5] = tmp[6];
1711             dn[6] = tmp[7];
1712             break;
1713     }
1714     dn[0] = tmp[new_loc];
1715 
1716     return ;
1717 }
1718 
1719 /* 3/28/01, find minimal of dn[9] */
1720 
FindMin(Int dn[])1721 Int FindMin(Int dn[])
1722 {
1723     Int min, i;
1724     Int dmin;
1725 
1726     dmin = dn[1];
1727     min = 1;
1728     for (i = 2; i < 9; i++)
1729     {
1730         if (dn[i] < dmin)
1731         {
1732             dmin = dn[i];
1733             min = i;
1734         }
1735     }
1736 
1737     return min;
1738 }
1739 
1740 
1741 
1742