1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21  *******************************************************************************
22  * @file
23  *  ih264e_me.c
24  *
25  * @brief
26  *
27  *
28  * @author
29  *  Ittiam
30  *
31  * @par List of Functions:
32  *  -
33  *
34  * @remarks
35  *  None
36  *
37  *******************************************************************************
38  */
39 
40 /*****************************************************************************/
41 /* File Includes                                                             */
42 /*****************************************************************************/
43 
44 /* System include files */
45 #include <stdio.h>
46 #include <assert.h>
47 #include <limits.h>
48 #include <string.h>
49 
50 /* User include files */
51 #include "ime_typedefs.h"
52 #include "ime_distortion_metrics.h"
53 #include "ime_defs.h"
54 #include "ime_structs.h"
55 #include "ime.h"
56 #include "ime_macros.h"
57 #include "ime_statistics.h"
58 
59 /**
60 *******************************************************************************
61 *
62 * @brief Diamond Search
63 *
64 * @par Description:
65 *  This function computes the sad at vertices of several layers of diamond grid
66 *  at a time. The number of layers of diamond grid that would be evaluated is
67 *  configurable.The function computes the sad at vertices of a diamond grid. If
68 *  the sad at the center of the diamond grid is lesser than the sad at any other
69 *  point of the diamond grid, the function marks the candidate Mb partition as
70 *  mv.
71 *
72 * @param[in] ps_mb_part
73 *  pointer to current mb partition ctxt with respect to ME
74 *
75 * @param[in] ps_me_ctxt
76 *  pointer to me context
77 *
78 * @param[in] u4_lambda_motion
79 *  lambda motion
80 *
81 * @param[in] u4_enable_fast_sad
82 *  enable/disable fast sad computation
83 *
84 * @returns  mv pair & corresponding distortion and cost
85 *
86 * @remarks Diamond Srch, radius is 1
87 *
88 *******************************************************************************
89 */
ime_diamond_search_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)90 void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
91 {
92     /* MB partition info */
93     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
94 
95     /* lagrange parameter */
96     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
97 
98     /* srch range*/
99     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
100     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
101     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
102     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
103 
104     /* enabled fast sad computation */
105 //    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
106 
107     /* pointer to src macro block */
108     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
109     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
110 
111     /* strides */
112     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
113     WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
114 
115     /* least cost */
116     WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
117 
118     /* least sad */
119     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
120 
121     /* mv pair */
122     WORD16 i2_mvx, i2_mvy;
123 
124     /* mv bits */
125     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
126 
127     /* temp var */
128     WORD32 i4_cost[4];
129     WORD32 i4_sad[4];
130     UWORD8 *pu1_ref;
131     WORD16 i2_mv_u_x, i2_mv_u_y;
132 
133     /* Diamond search Iteration Max Cnt */
134     UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
135 
136     /* temp var */
137 //    UWORD8 u1_prev_jump = NONE;
138 //    UWORD8 u1_curr_jump = NONE;
139 //    UWORD8 u1_next_jump;
140 //    WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
141 //    WORD32 mask;
142 //    UWORD8 *apu1_ref[4];
143 //    WORD32 i, cnt;
144 //    WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
145 
146     /* mv with best sad during initial evaluation */
147     i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
148     i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
149 
150     i2_mv_u_x = i2_mvx;
151     i2_mv_u_y = i2_mvy;
152 
153     while (u4_num_layers--)
154     {
155         /* FIXME : is this the write way to check for out of bounds ? */
156         if ( (i2_mvx - 1 < i4_srch_range_w) ||
157                         (i2_mvx + 1 > i4_srch_range_e) ||
158                         (i2_mvy - 1 < i4_srch_range_n) ||
159                         (i2_mvy + 1 > i4_srch_range_s) )
160         {
161             break;
162         }
163 
164         pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
165 
166         ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
167                                                 pu1_curr_mb,
168                                                 i4_ref_strd,
169                                                 i4_src_strd,
170                                                 i4_sad);
171 
172         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
173         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
174         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
175         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
176 
177         /* compute cost */
178         i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
179                                                                    + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
180         i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
181                                                                    + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
182         i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
183                                                                    + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
184         i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
185                                                                    + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
186 
187 
188         if (i4_cost_least > i4_cost[0])
189         {
190             i4_cost_least = i4_cost[0];
191             i4_distortion_least = i4_sad[0];
192 
193             i2_mv_u_x = (i2_mvx - 1);
194             i2_mv_u_y = i2_mvy;
195         }
196 
197         if (i4_cost_least > i4_cost[1])
198         {
199             i4_cost_least = i4_cost[1];
200             i4_distortion_least = i4_sad[1];
201 
202             i2_mv_u_x = (i2_mvx + 1);
203             i2_mv_u_y = i2_mvy;
204         }
205 
206         if (i4_cost_least > i4_cost[2])
207         {
208             i4_cost_least = i4_cost[2];
209             i4_distortion_least = i4_sad[2];
210 
211             i2_mv_u_x = i2_mvx;
212             i2_mv_u_y = i2_mvy - 1;
213         }
214 
215         if (i4_cost_least > i4_cost[3])
216         {
217             i4_cost_least = i4_cost[3];
218             i4_distortion_least = i4_sad[3];
219 
220             i2_mv_u_x = i2_mvx;
221             i2_mv_u_y = i2_mvy + 1;
222         }
223 
224         if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
225         {
226             ps_mb_part->u4_exit = 1;
227             break;
228         }
229         else
230         {
231             i2_mvx = i2_mv_u_x;
232             i2_mvy = i2_mv_u_y;
233         }
234 
235 
236     }
237 
238     if (i4_cost_least < ps_mb_part->i4_mb_cost)
239     {
240         ps_mb_part->i4_mb_cost = i4_cost_least;
241         ps_mb_part->i4_mb_distortion = i4_distortion_least;
242         ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
243         ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
244     }
245 
246 }
247 
248 
249 /**
250 *******************************************************************************
251 *
252 * @brief This function computes the best motion vector among the tentative mv
253 * candidates chosen.
254 *
255 * @par Description:
256 *  This function determines the position in the search window at which the motion
257 *  estimation should begin in order to minimise the number of search iterations.
258 *
259 * @param[in] ps_mb_part
260 *  pointer to current mb partition ctxt with respect to ME
261 *
262 * @param[in] u4_lambda_motion
263 *  lambda motion
264 *
265 * @param[in] u4_fast_flag
266 *  enable/disable fast sad computation
267 *
268 * @returns  mv pair & corresponding distortion and cost
269 *
270 * @remarks none
271 *
272 *******************************************************************************
273 */
274 
ime_evaluate_init_srchposn_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)275 void ime_evaluate_init_srchposn_16x16
276         (
277             me_ctxt_t *ps_me_ctxt,
278             WORD32 i4_reflist
279         )
280 {
281     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
282 
283     /* candidate mv cnt */
284     UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
285 
286     /* list of candidate mvs */
287     ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
288 
289     /* pointer to src macro block */
290     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
291     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
292 
293     /* strides */
294     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
295     WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
296 
297     /* enabled fast sad computation */
298     UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
299 
300     /* SAD(distortion metric) of an 8x8 block */
301     WORD32 i4_mb_distortion;
302 
303     /* cost = distortion + u4_lambda_motion * rate */
304     WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
305 
306     /* mb partitions info */
307     mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
308 
309     /* mv bits */
310     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
311 
312     /* temp var */
313     UWORD32  i, j;
314     WORD32 i4_srch_pos_idx = 0;
315     UWORD8 *pu1_ref = NULL;
316 
317     /* Carry out a search using each of the motion vector pairs identified above as predictors. */
318     /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
319     for(i = 0; i < u4_num_candidates; i++)
320     {
321         /* compute sad */
322         WORD32 c_sad = 1;
323 
324         for(j = 0; j < i; j++ )
325         {
326             if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
327                             (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
328             {
329                 c_sad = 0;
330                 break;
331             }
332         }
333         if(c_sad)
334         {
335             /* adjust ref pointer */
336             pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
337 
338             /* compute distortion */
339             ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
340 
341             DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
342             /* compute cost */
343             i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
344                             + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
345 
346             if (i4_mb_cost < i4_mb_cost_least)
347             {
348                 i4_mb_cost_least = i4_mb_cost;
349 
350                 i4_distortion_least = i4_mb_distortion;
351 
352                 i4_srch_pos_idx = i;
353             }
354         }
355     }
356 
357     if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
358     {
359         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
360         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
361         ps_mb_part->i4_mb_distortion = i4_distortion_least;
362         ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
363         ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
364     }
365 }
366 
367 /**
368 *******************************************************************************
369 *
370 * @brief Searches for the best matching full pixel predictor within the search
371 * range
372 *
373 * @par Description:
374 *  This function begins by computing the mv predict vector for the current mb.
375 *  This is used for cost computations. Further basing on the algo. chosen, it
376 *  looks through a set of candidate vectors that best represent the mb a least
377 *  cost and returns this information.
378 *
379 * @param[in] ps_proc
380 *  pointer to current proc ctxt
381 *
382 * @param[in] ps_me_ctxt
383 *  pointer to me context
384 *
385 * @returns  mv pair & corresponding distortion and cost
386 *
387 * @remarks none
388 *
389 *******************************************************************************
390 */
ime_full_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_ref_list)391 void ime_full_pel_motion_estimation_16x16
392     (
393         me_ctxt_t *ps_me_ctxt,
394         WORD32 i4_ref_list
395     )
396 {
397     /* mb part info */
398     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
399 
400     /******************************************************************/
401     /* Modify Search range about initial candidate instead of zero mv */
402     /******************************************************************/
403     /*
404      * FIXME: The motion vectors in a way can become unbounded. It may so happen that
405      * MV might exceed the limit of the profile configured.
406      */
407     ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
408                                       -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
409     ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
410                                        ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
411     ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
412                                       -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
413     ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
414                                        ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
415 
416     /************************************************************/
417     /* Traverse about best initial candidate for mv             */
418     /************************************************************/
419 
420     switch (ps_me_ctxt->u4_me_speed_preset)
421     {
422         case DMND_SRCH:
423             ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
424             break;
425         default:
426             assert(0);
427             break;
428     }
429 }
430 
431 /**
432 *******************************************************************************
433 *
434 * @brief Searches for the best matching sub pixel predictor within the search
435 * range
436 *
437 * @par Description:
438 *  This function begins by searching across all sub pixel sample points
439 *  around the full pel motion vector. The vector with least cost is chosen as
440 *  the mv for the current mb. If the skip mode is not evaluated while analysing
441 *  the initial search candidates then analyse it here and update the mv.
442 *
443 * @param[in] ps_proc
444 *  pointer to current proc ctxt
445 *
446 * @param[in] ps_me_ctxt
447 *  pointer to me context
448 *
449 * @returns none
450 *
451 * @remarks none
452 *
453 *******************************************************************************
454 */
ime_sub_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)455 void ime_sub_pel_motion_estimation_16x16
456     (
457         me_ctxt_t *ps_me_ctxt,
458         WORD32 i4_reflist
459     )
460 {
461     /* pointers to src & ref macro block */
462     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
463 
464     /* pointers to ref. half pel planes */
465     UWORD8 *pu1_ref_mb_half_x;
466     UWORD8 *pu1_ref_mb_half_y;
467     UWORD8 *pu1_ref_mb_half_xy;
468 
469     /* pointers to ref. half pel planes */
470     UWORD8 *pu1_ref_mb_half_x_temp;
471     UWORD8 *pu1_ref_mb_half_y_temp;
472     UWORD8 *pu1_ref_mb_half_xy_temp;
473 
474     /* strides */
475     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
476 
477     WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
478 
479     /* mb partitions info */
480     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
481 
482     /* SAD(distortion metric) of an mb */
483     WORD32 i4_mb_distortion;
484     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
485 
486     /* cost = distortion + u4_lambda_motion * rate */
487     WORD32 i4_mb_cost;
488     WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
489 
490     /*Best half pel buffer*/
491     UWORD8 *pu1_best_hpel_buf = NULL;
492 
493     /* mv bits */
494     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
495 
496     /* Motion vectors in full-pel units */
497     WORD16 mv_x, mv_y;
498 
499     /* lambda - lagrange constant */
500     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
501 
502     /* Flags to check if half pel points needs to be evaluated */
503     /**************************************/
504     /* 1 bit for each half pel candidate  */
505     /* bit 0 - half x = 1, half y = 0     */
506     /* bit 1 - half x = -1, half y = 0    */
507     /* bit 2 - half x = 0, half y = 1     */
508     /* bit 3 - half x = 0, half y = -1    */
509     /* bit 4 - half x = 1, half y = 1     */
510     /* bit 5 - half x = -1, half y = 1    */
511     /* bit 6 - half x = 1, half y = -1    */
512     /* bit 7 - half x = -1, half y = -1   */
513     /**************************************/
514     /* temp var */
515     WORD16 i2_mv_u_x, i2_mv_u_y;
516     WORD32 i, j;
517     WORD32 ai4_sad[8];
518 
519     WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
520 
521     i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
522     i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
523 
524     /************************************************************/
525     /* Evaluate half pel                                        */
526     /************************************************************/
527     mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
528     mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
529 
530 
531     /**************************************************************/
532     /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
533     /* left side of full pel                                      */
534     /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
535     /* top  side of full pel                                      */
536     /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
537     /* on the top left side of full pel                           */
538     /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
539     /* default postions are                                       */
540     /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
541     /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
542     /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
543     /* Hence corresponding adjustments made here                  */
544     /**************************************************************/
545 
546     pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
547     pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
548     pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
549 
550     ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
551                                                  pu1_ref_mb_half_y,
552                                                  pu1_ref_mb_half_xy,
553                                                  i4_src_strd, i4_ref_strd,
554                                                  ai4_sad);
555 
556     /* Half x plane */
557     for(i = 0; i < 2; i++)
558     {
559         WORD32 mv_x_tmp = (mv_x << 2) + 2;
560         WORD32 mv_y_tmp = (mv_y << 2);
561 
562         mv_x_tmp -= (i * 4);
563 
564         i4_mb_distortion = ai4_sad[i];
565 
566         /* compute cost */
567         i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
568                         + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
569 
570         if (i4_mb_cost < i4_mb_cost_least)
571         {
572             i4_mb_cost_least = i4_mb_cost;
573 
574             i4_distortion_least = i4_mb_distortion;
575 
576             i2_mv_u_x = mv_x_tmp;
577 
578             i2_mv_u_y = mv_y_tmp;
579 
580 #ifndef HP_PL /*choosing whether left or right half_x*/
581             ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
582             pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
583 
584             i4_srch_pos_idx = 0;
585 #endif
586         }
587 
588     }
589 
590     /* Half y plane */
591     for(i = 0; i < 2; i++)
592     {
593         WORD32 mv_x_tmp = (mv_x << 2);
594         WORD32 mv_y_tmp = (mv_y << 2) + 2;
595 
596         mv_y_tmp -= (i * 4);
597 
598         i4_mb_distortion = ai4_sad[2 + i];
599 
600         /* compute cost */
601         i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
602                         + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
603 
604         if (i4_mb_cost < i4_mb_cost_least)
605         {
606             i4_mb_cost_least = i4_mb_cost;
607 
608             i4_distortion_least = i4_mb_distortion;
609 
610             i2_mv_u_x = mv_x_tmp;
611 
612             i2_mv_u_y = mv_y_tmp;
613 
614 #ifndef HP_PL/*choosing whether top or bottom half_y*/
615             ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
616             pu1_best_hpel_buf = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
617 
618             i4_srch_pos_idx = 1;
619 #endif
620         }
621 
622     }
623 
624     /* Half xy plane */
625     for(j = 0; j < 2; j++)
626     {
627         for(i = 0; i < 2; i++)
628         {
629             WORD32 mv_x_tmp = (mv_x << 2) + 2;
630             WORD32 mv_y_tmp = (mv_y << 2) + 2;
631 
632             mv_x_tmp -= (i * 4);
633             mv_y_tmp -= (j * 4);
634 
635             i4_mb_distortion = ai4_sad[4 + i + 2 * j];
636 
637             /* compute cost */
638             i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
639                             + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
640 
641             if (i4_mb_cost < i4_mb_cost_least)
642             {
643                 i4_mb_cost_least = i4_mb_cost;
644 
645                 i4_distortion_least = i4_mb_distortion;
646 
647                 i2_mv_u_x = mv_x_tmp;
648 
649                 i2_mv_u_y = mv_y_tmp;
650 
651 #ifndef HP_PL /*choosing between four half_xy */
652                 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
653                 pu1_best_hpel_buf =  pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
654 
655                 i4_srch_pos_idx = 2;
656 #endif
657             }
658 
659         }
660     }
661 
662     if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
663     {
664         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
665         ps_mb_part->i4_mb_distortion = i4_distortion_least;
666         ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
667         ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
668         ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
669         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
670     }
671 }
672 
673 /**
674 *******************************************************************************
675 *
676 * @brief This function computes cost of skip macroblocks
677 *
678 * @par Description:
679 *
680 * @param[in] ps_me_ctxt
681 *  pointer to me ctxt
682 *
683 *
684 * @returns  none
685 *
686 * @remarks
687 * NOTE: while computing the skip cost, do not enable early exit from compute
688 * sad function because, a negative bias gets added later
689 * Note tha the last ME candidate in me ctxt is taken as skip motion vector
690 *
691 *******************************************************************************
692 */
ime_compute_skip_cost(me_ctxt_t * ps_me_ctxt,ime_mv_t * ps_skip_mv,mb_part_ctxt * ps_smb_part_info,UWORD32 u4_use_stat_sad,WORD32 i4_reflist,WORD32 i4_is_slice_type_b)693 void ime_compute_skip_cost
694     (
695          me_ctxt_t *ps_me_ctxt,
696          ime_mv_t *ps_skip_mv,
697          mb_part_ctxt *ps_smb_part_info,
698          UWORD32 u4_use_stat_sad,
699          WORD32 i4_reflist,
700          WORD32 i4_is_slice_type_b
701     )
702 {
703 
704     /* SAD(distortion metric) of an mb */
705     WORD32 i4_mb_distortion;
706 
707     /* cost = distortion + u4_lambda_motion * rate */
708     WORD32 i4_mb_cost;
709 
710     /* temp var */
711     UWORD8 *pu1_ref = NULL;
712 
713     ime_mv_t s_skip_mv;
714 
715     s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
716     s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
717 
718     /* Check if the skip mv is out of bounds or subpel */
719     {
720         /* skip mv */
721         ime_mv_t s_clip_skip_mv;
722 
723         s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
724         s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
725 
726         if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
727            (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
728            (ps_skip_mv->i2_mvx & 0x3) ||
729            (ps_skip_mv->i2_mvy & 0x3))
730         {
731             return ;
732         }
733     }
734 
735 
736     /* adjust ref pointer */
737     pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
738                     + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
739 
740     if(u4_use_stat_sad == 1)
741     {
742         UWORD32 u4_is_nonzero;
743 
744         ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
745                         ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
746                         ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
747                         &i4_mb_distortion, &u4_is_nonzero);
748 
749         if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
750         {
751             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
752             ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
753         }
754     }
755     else
756     {
757         ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
758                         ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
759                         ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
760 
761         if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
762         {
763             ps_me_ctxt->i4_min_sad = i4_mb_distortion;
764             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
765         }
766     }
767 
768 
769     /* for skip mode cost & distortion are identical
770      * But we shall add a bias to favor skip mode.
771      * Doc. JVT B118 Suggests SKIP_BIAS as 16.
772      * TODO : Empirical analysis of SKIP_BIAS is necessary */
773 
774     i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1]  * i4_is_slice_type_b));
775 
776     if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
777     {
778         ps_smb_part_info->i4_mb_cost = i4_mb_cost;
779         ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
780         ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
781         ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
782     }
783 }
784 
785