1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22  *******************************************************************************
23  * @file
24  *  ih264e_me.c
25  *
26  * @brief
27  *  Contains definition of functions for motion estimation
28  *
29  * @author
30  *  ittiam
31  *
32  * @par List of Functions:
33  *  - ih264e_init_mv_bits()
34  *  - ih264e_skip_analysis_chroma()
35  *  - ih264e_skip_analysis_luma()
36  *  - ih264e_analyse_skip()
37  *  - ih264e_get_search_candidates()
38  *  - ih264e_find_skip_motion_vector()
39  *  - ih264e_get_mv_predictor()
40  *  - ih264e_mv_pred()
41  *  - ih264e_mv_pred_me()
42  *  - ih264e_init_me()
43  *  - ih264e_compute_me()
44  *  - ih264e_compute_me_nmb()
45  *
46  * @remarks
47  *  None
48  *
49  *******************************************************************************
50  */
51 
52 /*****************************************************************************/
53 /* File Includes                                                             */
54 /*****************************************************************************/
55 
56 /* System include files */
57 #include <stdio.h>
58 #include <assert.h>
59 #include <limits.h>
60 
61 /* User include files */
62 #include "ih264_typedefs.h"
63 #include "iv2.h"
64 #include "ive2.h"
65 #include "ithread.h"
66 #include "ih264_platform_macros.h"
67 #include "ih264_defs.h"
68 #include "ime_defs.h"
69 #include "ime_distortion_metrics.h"
70 #include "ime_structs.h"
71 #include "ih264_structs.h"
72 #include "ih264_trans_quant_itrans_iquant.h"
73 #include "ih264_inter_pred_filters.h"
74 #include "ih264_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_intra_pred_filters.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_cabac_tables.h"
79 #include "ih264e_defs.h"
80 #include "ih264e_error.h"
81 #include "ih264e_bitstream.h"
82 #include "irc_cntrl_param.h"
83 #include "irc_frame_info_collector.h"
84 #include "ih264e_rate_control.h"
85 #include "ih264e_cabac_structs.h"
86 #include "ih264e_structs.h"
87 #include "ih264e_globals.h"
88 #include "ih264_macros.h"
89 #include "ih264e_me.h"
90 #include "ime.h"
91 #include "ih264_debug.h"
92 #include "ih264e_intra_modes_eval.h"
93 #include "ih264e_core_coding.h"
94 #include "ih264e_mc.h"
95 #include "ih264e_debug.h"
96 #include "ih264e_half_pel.h"
97 #include "ime_statistics.h"
98 #include "ih264e_platform_macros.h"
99 
100 
101 /*****************************************************************************/
102 /* Function Definitions                                                      */
103 /*****************************************************************************/
104 
105 /**
106 *******************************************************************************
107 *
108 * @brief
109 *  This function populates the length of the codewords for motion vectors in the
110 *  range (-search range, search range) in pixels
111 *
112 * @param[in] ps_me
113 *  Pointer to me ctxt
114 *
115 * @param[out] pu1_mv_bits
116 *  length of the codeword for all mv's
117 *
118 * @remarks The length of the code words are derived from signed exponential
119 * goloumb codes.
120 *
121 *******************************************************************************
122 */
ih264e_init_mv_bits(me_ctxt_t * ps_me_ctxt)123 void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
124 {
125     /* temp var */
126     WORD32 i, codesize = 3, diff, limit;
127     UWORD32 u4_code_num, u4_range;
128     UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
129 
130     /* max srch range */
131     diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
132     /* sub pel */
133     diff <<= 2;
134     /* delta mv */
135     diff <<= 1;
136 
137     /* codeNum for positive integer     =  2x-1     : Table9-3  */
138     u4_code_num = (diff << 1);
139 
140     /* get range of the bit string and put using put_bits()                 */
141     GETRANGE(u4_range, u4_code_num);
142 
143     limit = 2*u4_range - 1;
144 
145     /* init mv bits */
146     ps_me_ctxt->pu1_mv_bits[0] = 1;
147 
148     while (codesize < limit)
149     {
150         u4_uev_min = (1 << (codesize >> 1));
151         u4_uev_max = 2*u4_uev_min - 1;
152 
153         u4_sev_min = u4_uev_min >> 1;
154         u4_sev_max = u4_uev_max >> 1;
155 
156         DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
157 
158         for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
159         {
160             ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
161         }
162 
163         codesize += 2;
164     }
165 }
166 
167 
168 
169 /**
170 *******************************************************************************
171 *
172 * @brief Determines the valid candidates for which the initial search shall happen.
173 * The best of these candidates is used to center the diamond pixel search.
174 *
175 * @par Description: The function sends the skip, (0,0), left, top and top-right
176 * neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
177 * these are the same MVs that are used to form the MV predictor. This initial MV
178 * search candidates need not take care of slice boundaries and hence neighbor
179 * availability checks are not made here.
180 *
181 * @param[in] ps_left_mb_pu
182 *  pointer to left mb motion vector info
183 *
184 * @param[in] ps_top_mb_pu
185 *  pointer to top & top right mb motion vector info
186 *
187 * @param[in] ps_top_left_mb_pu
188 *  pointer to top left mb motion vector info
189 *
190 * @param[out] ps_skip_mv
191 *  pointer to skip motion vectors for the curr mb
192 *
193 * @param[in] i4_mb_x
194 *  mb index x
195 *
196 * @param[in] i4_mb_y
197 *  mb index y
198 *
199 * @param[in] i4_wd_mbs
200 *  pic width in mbs
201 *
202 * @param[in] ps_motionEst
203 *  pointer to me context
204 *
205 * @returns  The list of MVs to be used of priming the full pel search and the
206 * number of such MVs
207 *
208 * @remarks
209 *   Assumptions : 1. Assumes Only partition of size 16x16
210 *
211 *******************************************************************************
212 */
ih264e_get_search_candidates(process_ctxt_t * ps_proc,me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)213 static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
214                                          me_ctxt_t *ps_me_ctxt,
215                                          WORD32 i4_reflist)
216 {
217     /* curr mb indices */
218     WORD32 i4_mb_x = ps_proc->i4_mb_x;
219 
220     /* Motion vector */
221     mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
222 
223     /* Pred modes */
224     WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
225 
226     /* mb part info */
227     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
228 
229     /* mvs */
230     WORD32 mvx, mvy;
231 
232     /* ngbr availability */
233     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
234 
235     /* Current mode */
236     WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
237 
238     /* srch range*/
239     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
240     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
241     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
242     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
243 
244     /* num of candidate search candidates */
245     UWORD32 u4_num_candidates = 0;
246 
247     ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
248     ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
249     ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
250     ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
251 
252     i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
253     i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
254     i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
255     i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
256 
257     /* Taking the Zero motion vector as one of the candidates   */
258     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
259     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
260 
261     u4_num_candidates++;
262 
263     /* Taking the Left MV Predictor as one of the candidates    */
264     if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
265     {
266         mvx      = (ps_left_mv->i2_mvx + 2) >> 2;
267         mvy      = (ps_left_mv->i2_mvy + 2) >> 2;
268 
269         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
270         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
271 
272         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
273         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
274 
275         u4_num_candidates ++;
276     }
277 
278     /* Taking the Top MV Predictor as one of the candidates     */
279     if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
280     {
281         mvx      = (ps_top_mv->i2_mvx + 2) >> 2;
282         mvy      = (ps_top_mv->i2_mvy + 2) >> 2;
283 
284         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
285         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
286 
287         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
288         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
289 
290         u4_num_candidates ++;
291 
292         /* Taking the TopRt MV Predictor as one of the candidates   */
293         if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
294         {
295             mvx      = (ps_top_right_mv->i2_mvx + 2) >> 2;
296             mvy      = (ps_top_right_mv->i2_mvy + 2)>> 2;
297 
298             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
299             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
300 
301             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
302             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
303 
304             u4_num_candidates ++;
305         }
306         /* Taking the TopLt MV Predictor as one of the candidates   */
307         else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
308         {
309             mvx      = (ps_top_left_mv->i2_mvx + 2) >> 2;
310             mvy      = (ps_top_left_mv->i2_mvy + 2) >> 2;
311 
312             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
313             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
314 
315             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
316             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
317 
318             u4_num_candidates ++;
319         }
320     }
321 
322 
323     /********************************************************************/
324     /*                            MV Prediction                         */
325     /********************************************************************/
326     ih264e_mv_pred_me(ps_proc, i4_reflist);
327 
328     ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
329     ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
330 
331     /* Get the skip motion vector                               */
332     {
333         ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
334                                     [ps_proc->i4_slice_type](ps_proc, i4_reflist);
335 
336         /* Taking the Skip motion vector as one of the candidates   */
337         mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
338         mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
339 
340         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
341         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
342 
343         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
344         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
345         u4_num_candidates++;
346 
347         if (ps_proc->i4_slice_type == BSLICE)
348         {
349             /* Taking the temporal Skip motion vector as one of the candidates   */
350             mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
351             mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
352 
353             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
354             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
355 
356             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
357             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
358             u4_num_candidates++;
359         }
360     }
361 
362     ASSERT(u4_num_candidates <= 6);
363 
364     ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
365 }
366 
367 /**
368 *******************************************************************************
369 *
370 * @brief The function computes parameters for a PSKIP MB
371 *
372 * @par Description:
373 *  The function updates the skip motion vector and checks if the current
374 *  MB can be a skip PSKIP mB or not
375 *
376 * @param[in] ps_proc
377 *  Pointer to process context
378 *
379 * @param[in] u4_for_me
380 *  Flag to indicate function is called for ME or not
381 *
382 * @param[out] i4_ref_list
383 *  Current active refernce list
384 *
385 * @returns Flag indicating if the current MB can be marked as skip
386 *
387 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
388 *   specification.
389 *
390 *******************************************************************************
391 */
ih264e_find_pskip_params(process_ctxt_t * ps_proc,WORD32 i4_reflist)392 WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
393 {
394     /* left mb motion vector */
395     enc_pu_t *ps_left_mb_pu ;
396 
397     /* top mb motion vector */
398     enc_pu_t *ps_top_mb_pu ;
399 
400     /* Skip mv */
401     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
402 
403     UNUSED(i4_reflist);
404 
405     ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
406     ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
407 
408     if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
409         (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
410         (
411           (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
412           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
413           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
414        ) ||
415        (
416           (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
417           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
418           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
419        )
420      )
421 
422     {
423         ps_skip_mv->i2_mvx = 0;
424         ps_skip_mv->i2_mvy = 0;
425     }
426     else
427     {
428         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
429         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
430     }
431 
432     if ( (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
433      && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
434     {
435         return 1;
436     }
437 
438     return 0;
439 }
440 
441 /**
442 *******************************************************************************
443 *
444 * @brief The function computes parameters for a PSKIP MB
445 *
446 * @par Description:
447 *  The function updates the skip motion vector and checks if the current
448 *  MB can be a skip PSKIP mB or not
449 *
450 * @param[in] ps_proc
451 *  Pointer to process context
452 *
453 * @param[in] u4_for_me
454 *  Flag to dincate fucntion is called for ME or not
455 *
456 * @param[out] i4_ref_list
457 *  Current active refernce list
458 *
459 * @returns Flag indicating if the current MB can be marked as skip
460 *
461 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
462 *   specification.
463 *
464 *******************************************************************************
465 */
ih264e_find_pskip_params_me(process_ctxt_t * ps_proc,WORD32 i4_reflist)466 WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
467 {
468     /* left mb motion vector */
469     enc_pu_t *ps_left_mb_pu ;
470 
471     /* top mb motion vector */
472     enc_pu_t *ps_top_mb_pu ;
473 
474     /* Skip mv */
475     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
476 
477     UNUSED(i4_reflist);
478 
479     ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
480     ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
481 
482     if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
483         (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
484         (
485           (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
486           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
487           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
488         ) ||
489         (
490           (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
491           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
492           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
493         )
494      )
495 
496     {
497         ps_skip_mv->i2_mvx = 0;
498         ps_skip_mv->i2_mvy = 0;
499     }
500     else
501     {
502         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
503         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
504     }
505 
506     return PRED_L0;
507 }
508 
509 
510 /**
511 *******************************************************************************
512 *
513 * @brief motion vector predictor
514 *
515 * @par Description:
516 *  The routine calculates the motion vector predictor for a given block,
517 *  given the candidate MV predictors.
518 *
519 * @param[in] ps_left_mb_pu
520 *  pointer to left mb motion vector info
521 *
522 * @param[in] ps_top_row_pu
523 *  pointer to top & top right mb motion vector info
524 *
525 * @param[out] ps_pred_mv
526 *  pointer to candidate predictors for the current block
527 *
528 * @returns  The x & y components of the MV predictor.
529 *
530 * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
531 *   specification.
532 *   Assumptions : 1. Assumes Single reference frame
533 *                 2. Assumes Only partition of size 16x16
534 *
535 *******************************************************************************
536 */
ih264e_get_mv_predictor(enc_pu_t * ps_left_mb_pu,enc_pu_t * ps_top_row_pu,enc_pu_mv_t * ps_pred_mv,WORD32 i4_ref_list)537 void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
538                              enc_pu_t *ps_top_row_pu,
539                              enc_pu_mv_t *ps_pred_mv,
540                              WORD32 i4_ref_list)
541 {
542 
543     /* Indicated the current ref */
544     WORD8 i1_ref_idx;
545 
546     /* For pred L0 */
547     i1_ref_idx = -1;
548     {
549         /* temp var */
550         WORD32 pred_algo = 3, a, b, c;
551 
552         /* If only one of the candidate blocks has a reference frame equal to
553          * the current block then use the same block as the final predictor */
554         a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
555         b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
556         c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
557 
558         if (a == 0 && b == -1 && c == -1)
559             pred_algo = 0; /* LEFT */
560         else if(a == -1 && b == 0 && c == -1)
561             pred_algo = 1; /* TOP */
562         else if(a == -1 && b == -1 && c == 0)
563             pred_algo = 2; /* TOP RIGHT */
564 
565         switch (pred_algo)
566         {
567             case 0:
568                 /* left */
569                 ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
570                 ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
571                 break;
572             case 1:
573                 /* top */
574                 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
575                 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
576                 break;
577             case 2:
578                 /* top right */
579                 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
580                 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
581                 break;
582             case 3:
583                 /* median */
584                 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
585                        ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
586                        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
587                        ps_pred_mv->s_mv.i2_mvx);
588                 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
589                        ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
590                        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
591                        ps_pred_mv->s_mv.i2_mvy);
592 
593                 break;
594             default:
595                 break;
596         }
597     }
598 }
599 
600 /**
601 *******************************************************************************
602 *
603 * @brief This function performs MV prediction
604 *
605 * @par Description:
606 *
607 * @param[in] ps_proc
608 *  Process context corresponding to the job
609 *
610 * @returns  none
611 *
612 * @remarks none
613 *  This function will update the MB availability since intra inter decision
614 *  should be done before the call
615 *
616 *******************************************************************************
617 */
ih264e_mv_pred(process_ctxt_t * ps_proc,WORD32 i4_slice_type)618 void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
619 {
620 
621     /* left mb motion vector */
622     enc_pu_t *ps_left_mb_pu;
623 
624     /* top left mb motion vector */
625     enc_pu_t *ps_top_left_mb_pu;
626 
627     /* top row motion vector info */
628     enc_pu_t *ps_top_row_pu;
629 
630     /* predicted motion vector */
631     enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
632 
633     /* zero mv */
634     mv_t zero_mv = { 0, 0 };
635 
636     /*  mb neighbor availability */
637     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
638 
639     /* mb syntax elements of neighbors */
640     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
641     mb_info_t *ps_top_left_syn;
642     UWORD32 u4_left_is_intra;
643 
644     /* Temp var */
645     WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
646 
647     ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
648     u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
649     ps_left_mb_pu = &ps_proc->s_left_mb_pu;
650     ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
651     ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
652 
653     /* Number of ref lists to process */
654     max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
655 
656     for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
657     {
658         i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
659 
660         /* Before performing mv prediction prepare the ngbr information and
661          * reset motion vectors basing on their availability */
662         if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
663                         || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
664         {
665             /* left mv */
666             ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
667             ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
668         }
669         if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
670                         || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
671         {
672             /* top mv */
673             ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
674             ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
675         }
676 
677         if (!ps_ngbr_avbl->u1_mb_c)
678         {
679             /* top right mv - When top right partition is not available for
680              * prediction if top left is available use it for prediction else
681              * set the mv information to -1 and (0, 0)
682              * */
683             if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
684                             || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
685             {
686                 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
687                 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
688             }
689             else
690             {
691                 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
692                 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
693             }
694         }
695         else if(ps_top_syn[1].u2_is_intra
696                         || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
697         {
698             ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
699             ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
700         }
701 
702         ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
703     }
704 
705 }
706 
707 /**
708 *******************************************************************************
709 *
710 * @brief This function approximates Pred. MV
711 *
712 * @par Description:
713 *
714 * @param[in] ps_proc
715 *  Process context corresponding to the job
716 *
717 * @returns  none
718 *
719 * @remarks none
720 *  Motion estimation happens at nmb level. For cost calculations, mv is appro
721 *  ximated using this function
722 *
723 *******************************************************************************
724 */
ih264e_mv_pred_me(process_ctxt_t * ps_proc,WORD32 i4_ref_list)725 void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
726 {
727     /* left mb motion vector */
728     enc_pu_t *ps_left_mb_pu ;
729 
730     /* top left mb motion vector */
731     enc_pu_t *ps_top_left_mb_pu ;
732 
733     /* top row motion vector info */
734     enc_pu_t *ps_top_row_pu;
735 
736     enc_pu_t s_top_row_pu[2];
737 
738     /* predicted motion vector */
739     enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
740 
741     /* zero mv */
742     mv_t zero_mv = {0, 0};
743 
744     /* Complementary pred mode */
745     WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
746 
747     /*  mb neighbor availability */
748     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
749 
750     ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
751     ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
752     ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
753 
754     s_top_row_pu[0] = ps_top_row_pu[0];
755     s_top_row_pu[1] = ps_top_row_pu[1];
756 
757     /*
758      * Before performing mv prediction prepare the ngbr information and
759      * reset motion vectors basing on their availability
760      */
761 
762     if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
763     {
764         /* left mv */
765         ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
766         ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
767     }
768     if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
769     {
770         /* top mv */
771         s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
772         s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
773 
774     }
775     if (!ps_ngbr_avbl->u1_mb_c)
776     {
777         /* top right mv - When top right partition is not available for
778          * prediction if top left is available use it for prediction else
779          * set the mv information to -1 and (0, 0)
780          * */
781         if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
782         {
783             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
784             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
785 
786             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
787             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
788         }
789         else
790         {
791             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
792             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
793         }
794     }
795     else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
796     {
797         ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
798         ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
799     }
800 
801     ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
802                             &ps_pred_mv[i4_ref_list], i4_ref_list);
803 }
804 
805 /**
806 *******************************************************************************
807 *
808 * @brief This function initializes me ctxt
809 *
810 * @par Description:
811 *  Before dispatching the current job to me thread, the me context associated
812 *  with the job is initialized.
813 *
814 * @param[in] ps_proc
815 *  Process context corresponding to the job
816 *
817 * @returns  none
818 *
819 * @remarks none
820 *
821 *******************************************************************************
822 */
ih264e_init_me(process_ctxt_t * ps_proc)823 void ih264e_init_me(process_ctxt_t *ps_proc)
824 {
825     /* me ctxt */
826     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
827 
828     /* codec context */
829     codec_t *ps_codec = ps_proc->ps_codec;
830 
831     ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
832 
833     if (ps_codec->s_cfg.u4_num_bframes == 0)
834     {
835        ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
836     }
837     else
838     {
839        ps_me_ctxt->i4_skip_bias[PSLICE] =  SKIP_BIAS_P;
840     }
841 
842     /* src ptr */
843     ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
844     /* src stride */
845     ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
846 
847     /* ref ptrs and corresponding lagrange params */
848     ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
849     ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
850 
851     ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
852 
853 
854 }
855 
856 
857 /**
858 *******************************************************************************
859 *
860 * @brief This function performs motion estimation for the current mb using
861 *   single reference list
862 *
863 * @par Description:
864 *  The current mb is compared with a list of mb's in the reference frame for
865 *  least cost. The mb that offers least cost is chosen as predicted mb and the
866 *  displacement of the predicted mb from index location of the current mb is
867 *  signaled as mv. The list of the mb's that are chosen in the reference frame
868 *  are dependent on the speed of the ME configured.
869 *
870 * @param[in] ps_proc
871 *  Process context corresponding to the job
872 *
873 * @returns  motion vector of the pred mb, sad, cost.
874 *
875 * @remarks none
876 *
877 *******************************************************************************
878 */
ih264e_compute_me_single_reflist(process_ctxt_t * ps_proc)879 void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
880 {
881     /* me ctxt */
882     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
883 
884     /* codec context */
885     codec_t *ps_codec = ps_proc->ps_codec;
886 
887     /* recon stride */
888     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
889 
890     /* source buffer for halp pel generation functions */
891     UWORD8 *pu1_hpel_src;
892 
893     /* quantization parameters */
894     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
895 
896     /* Mb part ctxts for SKIP */
897     mb_part_ctxt s_skip_mbpart;
898 
899     /* Sad therholds */
900     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
901 
902     {
903         WORD32 rows_above, rows_below, columns_left, columns_right;
904 
905         /* During evaluation for motion vectors do not search through padded regions */
906         /* Obtain number of rows and columns that are effective for computing for me evaluation */
907         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
908         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
909         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
910         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
911 
912         /* init srch range */
913         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
914          * on all sides.
915          */
916         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
917         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
918         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
919         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
920 
921         /* this is to facilitate fast sub pel computation with minimal loads */
922         ps_me_ctxt->i4_srch_range_w += 1;
923         ps_me_ctxt->i4_srch_range_e -= 1;
924         ps_me_ctxt->i4_srch_range_n += 1;
925         ps_me_ctxt->i4_srch_range_s -= 1;
926     }
927 
928     /* Compute ME and store the MVs */
929 
930     /***********************************************************************
931      * Compute ME for list L0
932      ***********************************************************************/
933 
934     /* Init SATQD for the current list */
935     ps_me_ctxt->u4_min_sad_reached  = 0;
936     ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
937 
938     /* Get the seed motion vector candidates                    */
939     ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
940 
941     /* ****************************************************************
942      *Evaluate the SKIP for current list
943      * ****************************************************************/
944     s_skip_mbpart.s_mv_curr.i2_mvx = 0;
945     s_skip_mbpart.s_mv_curr.i2_mvy = 0;
946     s_skip_mbpart.i4_mb_cost = INT_MAX;
947     s_skip_mbpart.i4_mb_distortion = INT_MAX;
948 
949     ime_compute_skip_cost( ps_me_ctxt,
950                            (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
951                            &s_skip_mbpart,
952                            ps_proc->ps_codec->s_cfg.u4_enable_satqd,
953                            PRED_L0,
954                            0 /* Not a Bslice */ );
955 
956     s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
957     s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
958 
959     /******************************************************************
960      * Evaluate ME For current list
961      *****************************************************************/
962     ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
963     ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
964     ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
965     ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
966 
967     /* Init Hpel */
968     ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
969 
970     /* In case we found out the minimum SAD, exit the ME eval */
971     if (!ps_me_ctxt->u4_min_sad_reached)
972     {
973         /* Evaluate search candidates for initial mv pt */
974         ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
975 
976         /********************************************************************/
977         /*                  full pel motion estimation                      */
978         /********************************************************************/
979         ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
980 
981         /* Scale the MV to qpel resolution */
982         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
983         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
984 
985         if (ps_me_ctxt->u4_enable_hpel)
986         {
987             /* moving src pointer to the converged motion vector location*/
988             pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
989                              + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
990                              + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2)* i4_rec_strd;
991 
992             ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
993             ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
994             ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
995 
996             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
997 
998             /* half  pel search is done for both sides of full pel,
999              * hence half_x of width x height = 17x16 is created
1000              * starting from left half_x of converged full pel */
1001             pu1_hpel_src -= 1;
1002 
1003             /* computing half_x */
1004             ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1005                                                   ps_me_ctxt->apu1_subpel_buffs[0],
1006                                                   i4_rec_strd,
1007                                                   ps_me_ctxt->u4_subpel_buf_strd);
1008 
1009             /*
1010              * Halfpel search is done for both sides of full pel,
1011              * hence half_y of width x height = 16x17 is created
1012              * starting from top half_y of converged full pel
1013              * for half_xy top_left is required
1014              * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1015              */
1016             pu1_hpel_src -= i4_rec_strd;
1017 
1018             /* computing half_y , and half_xy*/
1019             ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1020                             pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1021                             ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1022                             ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1023                             ps_me_ctxt->u4_subpel_buf_strd);
1024 
1025             ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
1026         }
1027     }
1028 
1029 
1030     /***********************************************************************
1031      * If a particular skiip Mv is giving better sad, copy to the corresponding
1032      * MBPART
1033      * In B slices this loop should go only to PREDL1: If we found min sad
1034      * we will go to the skip ref list only
1035      * Have to find a way to make it without too much change or new vars
1036      **********************************************************************/
1037     if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
1038     {
1039         ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1040         ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1041         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1042     }
1043     else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
1044     {
1045         /* Now we have to copy the buffers */
1046         ps_codec->pf_inter_pred_luma_copy(
1047                         ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
1048                         ps_proc->pu1_best_subpel_buf,
1049                         ps_me_ctxt->u4_subpel_buf_strd,
1050                         ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
1051                         NULL, 0);
1052     }
1053 
1054     /**********************************************************************
1055      * Now get the minimum of MB part sads by searching over all ref lists
1056      **********************************************************************/
1057     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
1058     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
1059     ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
1060     ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
1061     ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1062     ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
1063 
1064     /* Mark the reflists */
1065     ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
1066     ps_proc->ps_pu->s_me_info[1].i1_ref_idx =  0;
1067 
1068     /* number of partitions */
1069     ps_proc->u4_num_sub_partitions = 1;
1070     *(ps_proc->pu4_mb_pu_cnt) = 1;
1071 
1072     /* position in-terms of PU */
1073     ps_proc->ps_pu->b4_pos_x = 0;
1074     ps_proc->ps_pu->b4_pos_y = 0;
1075 
1076     /* PU size */
1077     ps_proc->ps_pu->b4_wd = 3;
1078     ps_proc->ps_pu->b4_ht = 3;
1079 
1080     /* Update min sad conditions */
1081     if (ps_me_ctxt->u4_min_sad_reached == 1)
1082     {
1083         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1084         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1085     }
1086 
1087 }
1088 
1089 /**
1090 *******************************************************************************
1091 *
1092 * @brief This function performs motion estimation for the current NMB
1093 *
1094 * @par Description:
1095 * Intializes input and output pointers required by the function ih264e_compute_me
1096 * and calls the function ih264e_compute_me in a loop to process NMBs.
1097 *
1098 * @param[in] ps_proc
1099 *  Process context corresponding to the job
1100 *
1101 * @returns
1102 *
1103 * @remarks none
1104 *
1105 *******************************************************************************
1106 */
ih264e_compute_me_nmb(process_ctxt_t * ps_proc,UWORD32 u4_nmb_count)1107 void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1108 {
1109     /* pic pu */
1110     enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
1111 
1112     /* ME map */
1113     UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1114 
1115     /* temp var */
1116     UWORD32 u4_i;
1117 
1118     ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
1119     ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
1120 
1121     for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1122     {
1123         /* Wait for ME map */
1124         if (ps_proc->i4_mb_y > 0)
1125         {
1126             /* Wait for top right ME to be done */
1127             UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1128 
1129             while (1)
1130             {
1131                 volatile UWORD8 *pu1_buf;
1132                 WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1133 
1134                 idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1135                 pu1_buf =  pu1_me_map_tp_rw + idx;
1136                 if(*pu1_buf)
1137                     break;
1138                 ithread_yield();
1139             }
1140         }
1141 
1142         ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1143         ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1144         ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1145 
1146         ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1147 
1148         ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1149         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1150 
1151         ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1152         ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1153 
1154         /* Set the best subpel buf to the correct mb so that the buffer can be copied */
1155         ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1156         ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1157 
1158         /* Set the min sad conditions */
1159         ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
1160         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1161 
1162         /* Derive neighbor availability for the current macroblock */
1163         ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1164 
1165         /* init me */
1166         ih264e_init_me(ps_proc);
1167 
1168         /* Compute ME according to slice type */
1169         ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1170 
1171         /* update top and left structs */
1172         {
1173             mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1174             mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
1175             enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
1176             enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
1177             enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
1178 
1179             *ps_top_left_syn = *ps_top_syn;
1180 
1181             *ps_top_left_mb_pu = *ps_top_mv;
1182             *ps_left_mb_pu = *ps_proc->ps_pu;
1183         }
1184 
1185         ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1186 
1187         /* Copy the min sad reached info */
1188         ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1189         ps_proc->ps_nmb_info[u4_i].u4_min_sad   = ps_proc->ps_cur_mb->u4_min_sad;
1190 
1191         /*
1192          * To make sure that the MV map is properly sync to the
1193          * cache we need to do a DDB
1194          */
1195         {
1196             DATA_SYNC();
1197 
1198             pu1_me_map[ps_proc->i4_mb_x] = 1;
1199         }
1200         ps_proc->i4_mb_x++;
1201 
1202         ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1203         ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type  == PSKIP);
1204 
1205         /* update buffers pointers */
1206         ps_proc->pu1_src_buf_luma += MB_SIZE;
1207         ps_proc->pu1_rec_buf_luma += MB_SIZE;
1208         ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1209         ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1210 
1211         /*
1212          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1213          * the stride per MB is MB_SIZE
1214          */
1215         ps_proc->pu1_src_buf_chroma += MB_SIZE;
1216         ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1217         ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1218         ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1219 
1220 
1221         ps_proc->pu4_mb_pu_cnt += 1;
1222     }
1223 
1224 
1225     ps_proc->ps_pu = ps_pu_begin;
1226     ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1227 
1228     /* update buffers pointers */
1229     ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
1230     ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
1231     ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
1232     ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
1233 
1234     /*
1235      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1236      * the stride per MB is MB_SIZE
1237      */
1238     ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
1239     ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
1240     ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
1241     ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
1242 
1243 
1244     ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
1245 }
1246 
1247 
1248 /**
1249 *******************************************************************************
1250 *
1251 * @brief The function computes parameters for a BSKIP MB
1252 *
1253 * @par Description:
1254 *  The function updates the skip motion vector for B Mb, check if the Mb can be
1255 *  marked as skip and returns it
1256 *
1257 * @param[in] ps_proc
1258 *  Pointer to process context
1259 *
1260 * @param[in] u4_for_me
1261 *  Dummy
1262 *
1263 * @param[in] i4_reflist
1264 *  Dummy
1265 *
1266 * @returns Flag indicating if the current Mb can be skip or not
1267 *
1268 * @remarks
1269 *   The code implements the logic as described in sec 8.4.1.2.2
1270 *   It also computes co-located MB parmas according to sec 8.4.1.2.1
1271 *
1272 *   Need to add condition for this fucntion to be used in ME
1273 *
1274 *******************************************************************************/
ih264e_find_bskip_params_me(process_ctxt_t * ps_proc,WORD32 i4_reflist)1275 WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1276 {
1277     /* Colzero for co-located MB */
1278     WORD32 i4_colzeroflag;
1279 
1280     /* motion vectors for neighbouring MBs */
1281     enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1282 
1283     /* Variables to check if a particular mB is available */
1284     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1285 
1286     /* Mode availability, init to no modes available     */
1287     WORD32 i4_mode_avail;
1288 
1289     /*  mb neighbor availability */
1290     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1291 
1292     /* Temp var */
1293     WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
1294 
1295     /*
1296      * Colocated motion vector
1297      */
1298     mv_t s_mvcol;
1299 
1300     /*
1301      * Colocated picture idx
1302      */
1303     WORD32 i4_refidxcol;
1304 
1305     UNUSED(i4_reflist);
1306 
1307     /**************************************************************************
1308      *Find co-located MB parameters
1309      *      See sec 8.4.1.2.1  for reference
1310      **************************************************************************/
1311     {
1312         /*
1313          * Find the co-located Mb and update the skip and pred appropriately
1314          * 1) Default colpic is forward ref : Table 8-6
1315          * 2) Default mb col is current MB : Table 8-8
1316          */
1317 
1318         if (ps_proc->ps_colpu->b1_intra_flag)
1319         {
1320             s_mvcol.i2_mvx = 0;
1321             s_mvcol.i2_mvy = 0;
1322             i4_refidxcol = -1;
1323         }
1324         else
1325         {
1326             if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1327             {
1328                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1329                 i4_refidxcol = 0;
1330             }
1331             else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1332             {
1333                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1334                 i4_refidxcol = 0;
1335             }
1336         }
1337 
1338         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1339         i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1340                         && (ABS(s_mvcol.i2_mvy) <= 1));
1341 
1342     }
1343 
1344     /***************************************************************************
1345      * Evaluating skip params : Spatial Skip
1346      **************************************************************************/
1347     {
1348     /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
1349     ps_a_pu = &ps_proc->s_left_mb_pu_ME;
1350     ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
1351 
1352     i4_c_avail = 0;
1353     if (ps_ngbr_avbl->u1_mb_c)
1354     {
1355         ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
1356         i4_c_avail = 1;
1357     }
1358     else
1359     {
1360         ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
1361         i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1362     }
1363 
1364     i4_a = ps_ngbr_avbl->u1_mb_a;
1365     i4_b = ps_ngbr_avbl->u1_mb_b;
1366     i4_c = i4_c_avail;
1367 
1368     /* Init to no mode avail */
1369     i4_mode_avail = 0;
1370     for (i = 0; i < 2; i++)
1371     {
1372         i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1373 
1374         i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1375         i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1376         i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1377     }
1378 
1379     if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
1380     {
1381         i4_skip_type= PRED_BI;
1382     }
1383     else if(i4_mode_avail == 0x1)
1384     {
1385         i4_skip_type = PRED_L0;
1386     }
1387     else if(i4_mode_avail == 0x2)
1388     {
1389         i4_skip_type = PRED_L1;
1390     }
1391 
1392     /* Update skip MV for L0 */
1393     if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1394     {
1395         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1396         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1397     }
1398     else
1399     {
1400         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1401         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1402     }
1403 
1404     /* Update skip MV for L1 */
1405     if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1406     {
1407         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1408         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1409     }
1410     else
1411     {
1412         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1413         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1414     }
1415 
1416     }
1417 
1418     /***************************************************************************
1419      * Evaluating skip params : Temporal skip
1420      **************************************************************************/
1421     {
1422         pic_buf_t *  ps_ref_pic[MAX_REF_PIC_CNT];
1423         WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
1424         enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
1425 
1426         ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
1427         ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
1428 
1429         i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1430         i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1431 
1432         i4_tb = CLIP3(-128, 127, i4_tb);
1433         i4_td = CLIP3(-128, 127, i4_td);
1434 
1435         i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
1436         i4_dist_scale_factor =  CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
1437 
1438         /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc) operation */
1439         ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
1440         ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
1441 
1442         ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
1443         ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
1444 
1445     }
1446 
1447     return i4_skip_type;
1448 }
1449 
1450 /**
1451 *******************************************************************************
1452 *
1453 * @brief The function computes the skip motion vectoe for B mb
1454 *
1455 * @par Description:
1456 *  The function gives the skip motion vector for B Mb, check if the Mb can be
1457 *  marked as skip
1458 *
1459 * @param[in] ps_proc
1460 *  Pointer to process context
1461 *
1462 * @param[in] u4_for_me
1463 *  Dummy
1464 *
1465 * @param[in] u4_for_me
1466 *  Dummy
1467 *
1468 * @returns Flag indicating if the current Mb can be skip or not
1469 *
1470 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1471 *   specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
1472 *
1473 *******************************************************************************/
ih264e_find_bskip_params(process_ctxt_t * ps_proc,WORD32 i4_reflist)1474 WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1475 {
1476     WORD32 i4_colzeroflag;
1477 
1478     /* motion vectors */
1479     enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1480 
1481     /* Syntax elem */
1482     mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
1483 
1484     /* Variables to check if a particular mB is available */
1485     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1486 
1487     /* Mode availability, init to no modes available     */
1488     WORD32 i4_mode_avail;
1489 
1490     /*  mb neighbor availability */
1491     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1492 
1493     /* Temp var */
1494     WORD32 i, i4_cmpl_mode;
1495 
1496     UNUSED(i4_reflist);
1497 
1498     /**************************************************************************
1499      *Find co-locates parameters
1500      *      See sec 8.4.1.2.1  for reference
1501      **************************************************************************/
1502     {
1503         /*
1504          * Find the co-located Mb and update the skip and pred appropriately
1505          * 1) Default colpic is forward ref : Table 8-6
1506          * 2) Default mb col is current MB : Table 8-8
1507          */
1508 
1509         mv_t s_mvcol;
1510         WORD32 i4_refidxcol;
1511 
1512         if (ps_proc->ps_colpu->b1_intra_flag)
1513         {
1514             s_mvcol.i2_mvx = 0;
1515             s_mvcol.i2_mvy = 0;
1516             i4_refidxcol = -1;
1517         }
1518         else
1519         {
1520             if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1521             {
1522                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1523                 i4_refidxcol = 0;
1524             }
1525             else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1526             {
1527                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1528                 i4_refidxcol = 0;
1529             }
1530         }
1531 
1532         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1533         i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1534                         && (ABS(s_mvcol.i2_mvy) <= 1));
1535 
1536     }
1537 
1538     /***************************************************************************
1539      * Evaluating skip params
1540      **************************************************************************/
1541     /* Section 8.4.1.2.2 */
1542     ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
1543     ps_a_pu = &ps_proc->s_left_mb_pu;
1544 
1545     ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1546     ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
1547 
1548     i4_c_avail = 0;
1549     if (ps_ngbr_avbl->u1_mb_c)
1550     {
1551         ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
1552         ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
1553         i4_c_avail = 1;
1554     }
1555     else
1556     {
1557         ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
1558         ps_c_pu = &ps_proc->s_top_left_mb_pu;
1559         i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1560     }
1561 
1562 
1563     i4_a = ps_ngbr_avbl->u1_mb_a;
1564     i4_a &= !ps_a_syn->u2_is_intra;
1565 
1566     i4_b = ps_ngbr_avbl->u1_mb_b;
1567     i4_b &= !ps_b_syn->u2_is_intra;
1568 
1569     i4_c = i4_c_avail;
1570     i4_c &= !ps_c_syn->u2_is_intra;
1571 
1572     /* Init to no mode avail */
1573     i4_mode_avail = 0;
1574     for (i = 0; i < 2; i++)
1575     {
1576         i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1577 
1578         i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1579         i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1580         i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1581     }
1582 
1583     /* Update skip MV for L0 */
1584     if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1585     {
1586         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1587         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1588     }
1589     else
1590     {
1591         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1592         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1593     }
1594 
1595     /* Update skip MV for L1 */
1596     if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1597     {
1598         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1599         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1600     }
1601     else
1602     {
1603         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1604         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1605     }
1606 
1607     /* Now see if the ME information matches the SKIP information */
1608     switch (ps_proc->ps_pu->b2_pred_mode)
1609     {
1610         case PRED_BI:
1611             if (  (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1612                && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1613                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1614                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1615                && (i4_mode_avail ==  0x3 || i4_mode_avail == 0x0))
1616             {
1617                 return 1;
1618             }
1619             break;
1620 
1621         case PRED_L0:
1622             if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1623               && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1624               && (i4_mode_avail == 0x1))
1625             {
1626                 return 1;
1627             }
1628             break;
1629 
1630         case PRED_L1:
1631             if (  (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1632                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1633                && (i4_mode_avail == 0x2))
1634             {
1635                 return 1;
1636             }
1637             break;
1638     }
1639 
1640     return 0;
1641 }
1642 
1643 
1644 /**
1645 *******************************************************************************
1646 *
1647 * @brief This function computes the best motion vector among the tentative mv
1648 * candidates chosen.
1649 *
1650 * @par Description:
1651 *  This function determines the position in the search window at which the motion
1652 *  estimation should begin in order to minimise the number of search iterations.
1653 *
1654 * @param[in] ps_mb_part
1655 *  pointer to current mb partition ctxt with respect to ME
1656 *
1657 * @param[in] u4_lambda_motion
1658 *  lambda motion
1659 *
1660 * @param[in] u4_fast_flag
1661 *  enable/disable fast sad computation
1662 *
1663 * @returns  mv pair & corresponding distortion and cost
1664 *
1665 * @remarks Currently onyl 4 search candiates are supported
1666 *
1667 *******************************************************************************
1668 */
ih264e_evaluate_bipred(me_ctxt_t * ps_me_ctxt,process_ctxt_t * ps_proc,mb_part_ctxt * ps_mb_ctxt_bi)1669 void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
1670                             process_ctxt_t *ps_proc,
1671                             mb_part_ctxt *ps_mb_ctxt_bi)
1672 {
1673 
1674     UWORD32 i, u4_fast_sad;
1675 
1676     WORD32 i4_dest_buff;
1677 
1678     mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
1679 
1680     UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
1681 
1682     UWORD8 *pu1_dst_buf;
1683 
1684     WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
1685 
1686     WORD32 i4_mb_distortion, i4_mb_cost;
1687 
1688     u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
1689 
1690     i4_dest_buff = 0;
1691     for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
1692     {
1693         pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
1694 
1695         s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
1696         s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
1697         s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
1698         s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
1699 
1700         ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
1701         ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
1702 
1703         if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
1704                         (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
1705         {
1706             pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
1707             i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
1708         }
1709         else
1710         {
1711             pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1712             i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
1713         }
1714 
1715 
1716         if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
1717                         (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
1718         {
1719             pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
1720             i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
1721         }
1722         else
1723         {
1724             pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1725             i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
1726         }
1727 
1728         ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
1729                         pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
1730                         i4_ref_l0_stride, i4_ref_l1_stride,
1731                         ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
1732 
1733         ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
1734                         ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
1735                         ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
1736                         INT_MAX, &i4_mb_distortion);
1737 
1738         /* compute cost */
1739         i4_mb_cost =  ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
1740         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
1741         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx - ps_l1_pred_mv->i2_mvx];
1742         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy - ps_l1_pred_mv->i2_mvy];
1743 
1744         i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
1745 
1746 
1747         i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
1748         i4_mb_cost += i4_mb_distortion;
1749 
1750         if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
1751         {
1752             ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
1753             ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
1754             ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
1755             ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
1756             i4_dest_buff = (i4_dest_buff + 1) % 2;
1757         }
1758     }
1759 
1760 }
1761 
1762 /**
1763 *******************************************************************************
1764 *
1765 * @brief This function performs motion estimation for the current mb
1766 *
1767 * @par Description:
1768 *  The current mb is compared with a list of mb's in the reference frame for
1769 *  least cost. The mb that offers least cost is chosen as predicted mb and the
1770 *  displacement of the predicted mb from index location of the current mb is
1771 *  signaled as mv. The list of the mb's that are chosen in the reference frame
1772 *  are dependent on the speed of the ME configured.
1773 *
1774 * @param[in] ps_proc
1775 *  Process context corresponding to the job
1776 *
1777 * @returns  motion vector of the pred mb, sad, cost.
1778 *
1779 * @remarks none
1780 *
1781 *******************************************************************************
1782 */
ih264e_compute_me_multi_reflist(process_ctxt_t * ps_proc)1783 void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
1784 {
1785     /* me ctxt */
1786     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1787 
1788     /* codec context */
1789     codec_t *ps_codec = ps_proc->ps_codec;
1790 
1791     /* Temp variables for looping over ref lists */
1792     WORD32 i4_reflist, i4_max_reflist;
1793 
1794     /* recon stride */
1795     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1796 
1797     /* source buffer for halp pel generation functions */
1798     UWORD8 *pu1_hpel_src;
1799 
1800     /* quantization parameters */
1801     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1802 
1803     /* Mb part ctxts for SKIP */
1804     mb_part_ctxt as_skip_mbpart[2];
1805 
1806     /* Sad therholds */
1807     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1808 
1809     {
1810         WORD32 rows_above, rows_below, columns_left, columns_right;
1811 
1812         /* During evaluation for motion vectors do not search through padded regions */
1813         /* Obtain number of rows and columns that are effective for computing for me evaluation */
1814         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1815         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1816         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1817         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1818 
1819         /* init srch range */
1820         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
1821          * on all sides.
1822          */
1823         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1824         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1825         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1826         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1827 
1828         /* this is to facilitate fast sub pel computation with minimal loads */
1829         if (ps_me_ctxt->u4_enable_hpel)
1830         {
1831             ps_me_ctxt->i4_srch_range_w += 1;
1832             ps_me_ctxt->i4_srch_range_e -= 1;
1833             ps_me_ctxt->i4_srch_range_n += 1;
1834             ps_me_ctxt->i4_srch_range_s -= 1;
1835         }
1836     }
1837 
1838     /* Compute ME and store the MVs */
1839     {
1840         /***********************************************************************
1841          * Compute ME for lists L0 and L1
1842          *  For L0 -> L0 skip + L0
1843          *  for L1 -> L0 skip + L0 + L1 skip + L1
1844          ***********************************************************************/
1845         i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
1846 
1847         /* Init SATQD for the current list */
1848         ps_me_ctxt->u4_min_sad_reached  = 0;
1849         ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1850 
1851         for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
1852         {
1853 
1854             /* Get the seed motion vector candidates                    */
1855             ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
1856 
1857             /* ****************************************************************
1858              *Evaluate the SKIP for current list
1859              * ****************************************************************/
1860             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
1861             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
1862             as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
1863             as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
1864 
1865             if (ps_me_ctxt->i4_skip_type == i4_reflist)
1866             {
1867                 ime_compute_skip_cost( ps_me_ctxt,
1868                                        (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
1869                                        &as_skip_mbpart[i4_reflist],
1870                                        ps_proc->ps_codec->s_cfg.u4_enable_satqd,
1871                                        i4_reflist,
1872                                        (ps_proc->i4_slice_type == BSLICE) );
1873             }
1874 
1875             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1876             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1877 
1878             /******************************************************************
1879              * Evaluate ME For current list
1880              *****************************************************************/
1881             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
1882             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
1883             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
1884             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
1885 
1886             /* Init Hpel */
1887             ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
1888 
1889             /* In case we found out the minimum SAD, exit the ME eval */
1890             if (ps_me_ctxt->u4_min_sad_reached)
1891             {
1892                 i4_max_reflist = i4_reflist;
1893                 break;
1894             }
1895 
1896 
1897             /* Evaluate search candidates for initial mv pt */
1898             ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
1899 
1900             /********************************************************************/
1901             /*                  full pel motion estimation                      */
1902             /********************************************************************/
1903             ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1904 
1905             DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
1906                                    (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
1907 
1908             DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
1909 
1910             /* Scale the MV to qpel resolution */
1911             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1912             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1913 
1914             if (ps_me_ctxt->u4_enable_hpel)
1915             {
1916                 /* moving src pointer to the converged motion vector location */
1917                 pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
1918                                + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
1919                                + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
1920 
1921                 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1922                 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1923                 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1924 
1925                 /* Init the search position to an invalid number */
1926                 ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
1927 
1928                 /* Incase a buffer is still in use by L0, replace it with spare buff */
1929                 ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
1930                                 ps_proc->apu1_subpel_buffs[3];
1931 
1932 
1933                 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1934 
1935                 /* half  pel search is done for both sides of full pel,
1936                  * hence half_x of width x height = 17x16 is created
1937                  * starting from left half_x of converged full pel */
1938                 pu1_hpel_src -= 1;
1939 
1940                 /* computing half_x */
1941                 ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1942                                                       ps_me_ctxt->apu1_subpel_buffs[0],
1943                                                       i4_rec_strd,
1944                                                       ps_me_ctxt->u4_subpel_buf_strd);
1945 
1946                 /*
1947                  * Halfpel search is done for both sides of full pel,
1948                  * hence half_y of width x height = 16x17 is created
1949                  * starting from top half_y of converged full pel
1950                  * for half_xy top_left is required
1951                  * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1952                  */
1953                 pu1_hpel_src -= i4_rec_strd;
1954 
1955                 /* computing half_y and half_xy */
1956                 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1957                                 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1958                                 ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1959                                 ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1960                                 ps_me_ctxt->u4_subpel_buf_strd);
1961 
1962                 ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1963 
1964             }
1965         }
1966 
1967         /***********************************************************************
1968          * If a particular skiip Mv is giving better sad, copy to the corresponding
1969          * MBPART
1970          * In B slices this loop should go only to PREDL1: If we found min sad
1971          * we will go to the skip ref list only
1972          * Have to find a way to make it without too much change or new vars
1973          **********************************************************************/
1974         for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
1975         {
1976             if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
1977             {
1978                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
1979                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
1980                 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
1981             }
1982         }
1983 
1984         /***********************************************************************
1985          * Compute ME for BI
1986          *  In case of BI we do ME for two candidates
1987          *   1) The best L0 and L1 Mvs
1988          *   2) Skip L0 and L1 MVs
1989          *
1990          *   TODO
1991          *   one of the search candidates is skip. Hence it may be duplicated
1992          ***********************************************************************/
1993         if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
1994         {
1995             WORD32 i, j = 0;
1996             WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
1997             WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
1998 
1999             /* Get the free buffers */
2000             l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
2001             l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
2002 
2003             /* Search for the two free buffers in subpel list */
2004             for (i = 0; i < SUBPEL_BUFF_CNT; i++)
2005             {
2006                 if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
2007                 {
2008                     ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
2009                     j++;
2010                 }
2011             }
2012             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2013 
2014             /* Copy the statial SKIP MV of each list */
2015             i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
2016             i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
2017             ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2018             ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2019             ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2020             ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2021 
2022             /* Copy the SKIP MV temporal of each list */
2023             i4_l0_skip_mv_idx++;
2024             i4_l1_skip_mv_idx++;
2025             ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2026             ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2027             ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2028             ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2029 
2030             /* Copy the best MV after ME */
2031             ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
2032             ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
2033 
2034             ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
2035 
2036             ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
2037             ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
2038 
2039             ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
2040                                    &ps_me_ctxt->as_mb_part[PRED_BI]);
2041 
2042             i4_max_reflist = PRED_BI;
2043         }
2044 
2045         /**********************************************************************
2046          * Now get the minimum of MB part sads by searching over all ref lists
2047          **********************************************************************/
2048         ps_proc->ps_pu->b2_pred_mode = 0x3;
2049 
2050         for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2051         {
2052             if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2053             {
2054                 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2055                 ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2056                 ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2057                 ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
2058             }
2059         }
2060 
2061         /**********************************************************************
2062          * In case we have a BI MB, we have to copy the buffers and set proer MV's
2063          *  1)In case its BI, we need to get the best MVs given by BI and update
2064          *    to their corresponding MB part
2065          *  2)We also need to copy the buffer in which bipred buff is populated
2066          *
2067          *  Not that if we have
2068          **********************************************************************/
2069         if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
2070         {
2071             WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
2072             UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
2073 
2074             ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
2075             ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
2076 
2077             /* Now we have to copy the buffers */
2078             ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
2079                                               ps_proc->pu1_best_subpel_buf,
2080                                               ps_me_ctxt->u4_subpel_buf_strd,
2081                                               ps_proc->u4_bst_spel_buf_strd,
2082                                               MB_SIZE, MB_SIZE, NULL, 0);
2083 
2084         }
2085         else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
2086         {
2087             /* Now we have to copy the buffers */
2088             ps_codec->pf_inter_pred_luma_copy(
2089                             ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
2090                             ps_proc->pu1_best_subpel_buf,
2091                             ps_me_ctxt->u4_subpel_buf_strd,
2092                             ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
2093                             NULL, 0);
2094         }
2095     }
2096 
2097     /**************************************************************************
2098      *Now copy the MVs to the current PU with qpel scaling
2099      ***************************************************************************/
2100     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
2101     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
2102     ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
2103     ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
2104 
2105 
2106     ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
2107     ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
2108 
2109     /* number of partitions */
2110     ps_proc->u4_num_sub_partitions = 1;
2111     *(ps_proc->pu4_mb_pu_cnt) = 1;
2112 
2113     /* position in-terms of PU */
2114     ps_proc->ps_pu->b4_pos_x = 0;
2115     ps_proc->ps_pu->b4_pos_y = 0;
2116 
2117     /* PU size */
2118     ps_proc->ps_pu->b4_wd = 3;
2119     ps_proc->ps_pu->b4_ht = 3;
2120 
2121     /* Update min sad conditions */
2122     if (ps_me_ctxt->u4_min_sad_reached == 1)
2123     {
2124         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2125         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2126     }
2127 }
2128 
2129