1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file hme_refine.c
23 *
24 * @brief
25 *    Contains the implementation of the refinement layer searches and related
26 *    functionality like CU merge.
27 *
28 * @author
29 *    Ittiam
30 *
31 *
32 * List of Functions
33 *
34 *
35 ******************************************************************************
36 */
37 
38 /*****************************************************************************/
39 /* File Includes                                                             */
40 /*****************************************************************************/
41 /* System include files */
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <stdarg.h>
47 #include <math.h>
48 #include <limits.h>
49 
50 /* User include files */
51 #include "ihevc_typedefs.h"
52 #include "itt_video_api.h"
53 #include "ihevce_api.h"
54 
55 #include "rc_cntrl_param.h"
56 #include "rc_frame_info_collector.h"
57 #include "rc_look_ahead_params.h"
58 
59 #include "ihevc_defs.h"
60 #include "ihevc_structs.h"
61 #include "ihevc_platform_macros.h"
62 #include "ihevc_deblk.h"
63 #include "ihevc_itrans_recon.h"
64 #include "ihevc_chroma_itrans_recon.h"
65 #include "ihevc_chroma_intra_pred.h"
66 #include "ihevc_intra_pred.h"
67 #include "ihevc_inter_pred.h"
68 #include "ihevc_mem_fns.h"
69 #include "ihevc_padding.h"
70 #include "ihevc_weighted_pred.h"
71 #include "ihevc_sao.h"
72 #include "ihevc_resi_trans.h"
73 #include "ihevc_quant_iquant_ssd.h"
74 #include "ihevc_cabac_tables.h"
75 
76 #include "ihevce_defs.h"
77 #include "ihevce_lap_enc_structs.h"
78 #include "ihevce_multi_thrd_structs.h"
79 #include "ihevce_multi_thrd_funcs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_bs_compute_ctb.h"
92 #include "ihevce_global_tables.h"
93 #include "ihevce_dep_mngr_interface.h"
94 #include "hme_datatype.h"
95 #include "hme_interface.h"
96 #include "hme_common_defs.h"
97 #include "hme_defs.h"
98 #include "ihevce_me_instr_set_router.h"
99 #include "hme_globals.h"
100 #include "hme_utils.h"
101 #include "hme_coarse.h"
102 #include "hme_fullpel.h"
103 #include "hme_subpel.h"
104 #include "hme_refine.h"
105 #include "hme_err_compute.h"
106 #include "hme_common_utils.h"
107 #include "hme_search_algo.h"
108 #include "ihevce_stasino_helpers.h"
109 #include "ihevce_common_utils.h"
110 
111 /*****************************************************************************/
112 /* Globals                                                                   */
113 /*****************************************************************************/
114 
115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116 UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117     { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118 };
119 
120 /*****************************************************************************/
121 /* Extern Fucntion declaration                                               */
122 /*****************************************************************************/
123 extern ctb_boundary_attrs_t *
124     get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125 
126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127     search_node_t *ps_search_node,
128     layer_ctxt_t *ps_curr_layer,
129     layer_ctxt_t *ps_coarse_layer,
130     S32 i4_pos_x,
131     S32 i4_pos_y,
132     S08 i1_ref_id,
133     S32 i4_result_id);
134 
135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136     search_node_t *ps_search_node,
137     layer_ctxt_t *ps_curr_layer,
138     layer_ctxt_t *ps_coarse_layer,
139     S32 i4_pos_x,
140     S32 i4_pos_y,
141     S32 i4_num_act_ref_l0,
142     U08 u1_pred_dir,
143     U08 u1_default_ref_id,
144     S32 i4_result_id);
145 
146 /*****************************************************************************/
147 /* Function Definitions                                                      */
148 /*****************************************************************************/
149 
ihevce_no_wt_copy(coarse_me_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,pu_t * ps_pu,UWORD8 * pu1_temp_pred,WORD32 temp_stride,WORD32 blk_x,WORD32 blk_y)150 void ihevce_no_wt_copy(
151     coarse_me_ctxt_t *ps_ctxt,
152     layer_ctxt_t *ps_curr_layer,
153     pu_t *ps_pu,
154     UWORD8 *pu1_temp_pred,
155     WORD32 temp_stride,
156     WORD32 blk_x,
157     WORD32 blk_y)
158 {
159     UWORD8 *pu1_ref;
160     WORD32 ref_stride, ref_offset;
161     WORD32 row, col, i4_tmp;
162 
163     ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164 
165     if(ps_pu->b2_pred_mode == PRED_L0)
166     {
167         WORD8 i1_ref_idx;
168 
169         i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171 
172         ref_stride = ps_curr_layer->i4_inp_stride;
173 
174         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175         ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176 
177         pu1_ref += ref_offset;
178 
179         for(row = 0; row < temp_stride; row++)
180         {
181             for(col = 0; col < temp_stride; col++)
182             {
183                 i4_tmp = pu1_ref[col];
184                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185             }
186 
187             pu1_ref += ref_stride;
188             pu1_temp_pred += temp_stride;
189         }
190     }
191     else
192     {
193         WORD8 i1_ref_idx;
194 
195         i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197 
198         ref_stride = ps_curr_layer->i4_inp_stride;
199 
200         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201         ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202 
203         pu1_ref += ref_offset;
204 
205         for(row = 0; row < temp_stride; row++)
206         {
207             for(col = 0; col < temp_stride; col++)
208             {
209                 i4_tmp = pu1_ref[col];
210                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211             }
212 
213             pu1_ref += ref_stride;
214             pu1_temp_pred += temp_stride;
215         }
216     }
217 }
218 
hme_add_clustered_mvs_as_merge_cands(cluster_data_t * ps_cluster_base,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,WORD32 i4_num_clusters,U08 u1_pred_dir)219 static WORD32 hme_add_clustered_mvs_as_merge_cands(
220     cluster_data_t *ps_cluster_base,
221     search_node_t *ps_merge_cand,
222     range_prms_t **pps_range_prms,
223     U08 *pu1_refid_to_pred_dir_list,
224     WORD32 i4_num_clusters,
225     U08 u1_pred_dir)
226 {
227     WORD32 i, j, k;
228     WORD32 i4_num_cands_added = 0;
229     WORD32 i4_num_mvs_in_cluster;
230 
231     for(i = 0; i < i4_num_clusters; i++)
232     {
233         cluster_data_t *ps_data = &ps_cluster_base[i];
234 
235         if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236         {
237             i4_num_mvs_in_cluster = ps_data->num_mvs;
238 
239             for(j = 0; j < i4_num_mvs_in_cluster; j++)
240             {
241                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243                 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244 
245                 CLIP_MV_WITHIN_RANGE(
246                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248                     pps_range_prms[ps_data->ref_id],
249                     0,
250                     0,
251                     0);
252 
253                 for(k = 0; k < i4_num_cands_added; k++)
254                 {
255                     if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256                        (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257                        (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258                     {
259                         break;
260                     }
261                 }
262 
263                 if(k == i4_num_cands_added)
264                 {
265                     i4_num_cands_added++;
266                 }
267             }
268         }
269     }
270 
271     return i4_num_cands_added;
272 }
273 
hme_add_me_best_as_merge_cands(search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_quality_preset,S32 i4_num_cands_added,U08 u1_pred_dir)274 static WORD32 hme_add_me_best_as_merge_cands(
275     search_results_t **pps_child_data_array,
276     inter_cu_results_t *ps_8x8cu_results,
277     search_node_t *ps_merge_cand,
278     range_prms_t **pps_range_prms,
279     U08 *pu1_refid_to_pred_dir_list,
280     S08 *pi1_past_list,
281     S08 *pi1_future_list,
282     BLK_SIZE_T e_blk_size,
283     ME_QUALITY_PRESETS_T e_quality_preset,
284     S32 i4_num_cands_added,
285     U08 u1_pred_dir)
286 {
287     WORD32 i, j, k;
288     WORD32 i4_max_cands_to_add;
289 
290     WORD32 i4_result_id = 0;
291 
292     ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293     ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294     ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295     ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296 
297     switch(e_quality_preset)
298     {
299     case ME_PRISTINE_QUALITY:
300     {
301         i4_max_cands_to_add = MAX_MERGE_CANDTS;
302 
303         break;
304     }
305     case ME_HIGH_QUALITY:
306     {
307         /* All 4 children are split and each grandchild contributes an MV */
308         /* and 2 best results per grandchild */
309         i4_max_cands_to_add = 4 * 4 * 2;
310 
311         break;
312     }
313     case ME_MEDIUM_SPEED:
314     {
315         i4_max_cands_to_add = 4 * 2 * 2;
316 
317         break;
318     }
319     case ME_HIGH_SPEED:
320     case ME_XTREME_SPEED:
321     case ME_XTREME_SPEED_25:
322     {
323         i4_max_cands_to_add = 4 * 2 * 1;
324 
325         break;
326     }
327     }
328 
329     while(i4_result_id < 4)
330     {
331         for(i = 0; i < 4; i++)
332         {
333             inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334             inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335 
336             if(!pps_child_data_array[i]->u1_split_flag)
337             {
338                 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339 
340                 if(ps_child_data->u1_num_best_results <= i4_result_id)
341                 {
342                     continue;
343                 }
344 
345                 if(ps_data->as_pu_results->pu.b1_intra_flag)
346                 {
347                     continue;
348                 }
349 
350                 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351                 {
352                     mv_t *ps_mv;
353 
354                     S08 i1_ref_idx;
355 
356                     pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357 
358                     if(u1_pred_dir !=
359                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360                     {
361                         continue;
362                     }
363 
364                     if(u1_pred_dir)
365                     {
366                         ps_mv = &ps_pu->mv.s_l1_mv;
367                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368                     }
369                     else
370                     {
371                         ps_mv = &ps_pu->mv.s_l0_mv;
372                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373                     }
374 
375                     if(-1 == i1_ref_idx)
376                     {
377                         continue;
378                     }
379 
380                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383 
384                     CLIP_MV_WITHIN_RANGE(
385                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387                         pps_range_prms[i1_ref_idx],
388                         0,
389                         0,
390                         0);
391 
392                     for(k = 0; k < i4_num_cands_added; k++)
393                     {
394                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397                         {
398                             break;
399                         }
400                     }
401 
402                     if(k == i4_num_cands_added)
403                     {
404                         i4_num_cands_added++;
405 
406                         if(i4_max_cands_to_add <= i4_num_cands_added)
407                         {
408                             return i4_num_cands_added;
409                         }
410                     }
411                 }
412             }
413             else
414             {
415                 for(j = 0; j < 4; j++)
416                 {
417                     mv_t *ps_mv;
418 
419                     S08 i1_ref_idx;
420 
421                     part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422                     pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423 
424                     ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425 
426                     if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427                     {
428                         continue;
429                     }
430 
431                     if(ps_data->as_pu_results->pu.b1_intra_flag)
432                     {
433                         continue;
434                     }
435 
436                     if(u1_pred_dir !=
437                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438                     {
439                         continue;
440                     }
441 
442                     if(u1_pred_dir)
443                     {
444                         ps_mv = &ps_pu->mv.s_l1_mv;
445                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446                     }
447                     else
448                     {
449                         ps_mv = &ps_pu->mv.s_l0_mv;
450                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451                     }
452 
453                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456 
457                     CLIP_MV_WITHIN_RANGE(
458                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460                         pps_range_prms[i1_ref_idx],
461                         0,
462                         0,
463                         0);
464 
465                     for(k = 0; k < i4_num_cands_added; k++)
466                     {
467                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470                         {
471                             break;
472                         }
473                     }
474 
475                     if(k == i4_num_cands_added)
476                     {
477                         i4_num_cands_added++;
478 
479                         if(i4_max_cands_to_add <= i4_num_cands_added)
480                         {
481                             return i4_num_cands_added;
482                         }
483                     }
484                 }
485             }
486         }
487 
488         i4_result_id++;
489     }
490 
491     return i4_num_cands_added;
492 }
493 
hme_add_cands_for_merge_eval(ctb_cluster_info_t * ps_cluster_info,search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,range_prms_t ** pps_range_prms,search_node_t * ps_merge_cand,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,ME_QUALITY_PRESETS_T e_quality_preset,BLK_SIZE_T e_blk_size,U08 u1_pred_dir,U08 u1_blk_id)494 WORD32 hme_add_cands_for_merge_eval(
495     ctb_cluster_info_t *ps_cluster_info,
496     search_results_t **pps_child_data_array,
497     inter_cu_results_t *ps_8x8cu_results,
498     range_prms_t **pps_range_prms,
499     search_node_t *ps_merge_cand,
500     U08 *pu1_refid_to_pred_dir_list,
501     S08 *pi1_past_list,
502     S08 *pi1_future_list,
503     ME_QUALITY_PRESETS_T e_quality_preset,
504     BLK_SIZE_T e_blk_size,
505     U08 u1_pred_dir,
506     U08 u1_blk_id)
507 {
508     WORD32 i4_num_cands_added = 0;
509 
510     if(ME_PRISTINE_QUALITY == e_quality_preset)
511     {
512         cluster_data_t *ps_cluster_primo;
513 
514         WORD32 i4_num_clusters;
515 
516         if(BLK_32x32 == e_blk_size)
517         {
518             ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519             i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520         }
521         else
522         {
523             ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524             i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525         }
526 
527         i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528             ps_cluster_primo,
529             ps_merge_cand,
530             pps_range_prms,
531             pu1_refid_to_pred_dir_list,
532             i4_num_clusters,
533             u1_pred_dir);
534     }
535 
536     i4_num_cands_added = hme_add_me_best_as_merge_cands(
537         pps_child_data_array,
538         ps_8x8cu_results,
539         ps_merge_cand,
540         pps_range_prms,
541         pu1_refid_to_pred_dir_list,
542         pi1_past_list,
543         pi1_future_list,
544         e_blk_size,
545         e_quality_preset,
546         i4_num_cands_added,
547         u1_pred_dir);
548 
549     return i4_num_cands_added;
550 }
551 
552 /**
553 ********************************************************************************
554 *  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555 *                                           S08 i1_ref_idx,
556 *                                           S32 i4_best_part_type,
557 *                                           S32 i4_is_vert)
558 *
559 *  @brief  Given a target partition orientation in the merged CU, and the
560 *          partition type of most likely partition this fxn picks up
561 *          candidates from the 4 constituent CUs and does refinement search
562 *          to identify best results for the merge CU across active partitions
563 *
564 *  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565 *                  these params, the search result structure is also derived and
566 *                 updated during the search
567 *
568 *  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569 *               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570 *
571 *  @param[in] i4_best_part_type : partition type of potential partition in the
572 *              merged CU, -1 if the merge process has not yet been able to
573 *              determine this.
574 *
575 *  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576 *             orientation or horizontal orientation.
577 *
578 *  @return Number of merge candidates
579 ********************************************************************************
580 */
hme_pick_eval_merge_candts(hme_merge_prms_t * ps_merge_prms,hme_subpel_prms_t * ps_subpel_prms,S32 i4_search_idx,S32 i4_best_part_type,S32 i4_is_vert,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_frm_qstep,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)581 WORD32 hme_pick_eval_merge_candts(
582     hme_merge_prms_t *ps_merge_prms,
583     hme_subpel_prms_t *ps_subpel_prms,
584     S32 i4_search_idx,
585     S32 i4_best_part_type,
586     S32 i4_is_vert,
587     wgt_pred_ctxt_t *ps_wt_inp_prms,
588     S32 i4_frm_qstep,
589     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591 {
592     S32 x_off, y_off;
593     search_node_t *ps_search_node;
594     S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595     S32 i4_num_valid_parts;
596     pred_ctxt_t *ps_pred_ctxt;
597 
598     search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599     S32 num_unique_nodes_cu_merge = 0;
600 
601     search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602     CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603     S32 i4_part_mask = ps_search_results->i4_part_mask;
604 
605     search_results_t *aps_child_results[4];
606     layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607 
608     S32 i4_ref_stride, i, j;
609     result_upd_prms_t s_result_prms;
610 
611     BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612     S32 i4_offset;
613 
614     /*************************************************************************/
615     /* Function pointer for SAD/SATD, array and prms structure to pass to    */
616     /* This function                                                         */
617     /*************************************************************************/
618     PF_SAD_FXN_T pf_err_compute;
619     S32 ai4_sad_grid[9][17];
620     err_prms_t s_err_prms;
621 
622     /*************************************************************************/
623     /* Allowed MV RANGE                                                      */
624     /*************************************************************************/
625     range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626     PF_INTERP_FXN_T pf_qpel_interp;
627     PF_MV_COST_FXN pf_mv_cost_compute;
628     WORD32 pred_lx;
629     U08 *apu1_hpel_ref[4];
630 
631     interp_prms_t s_interp_prms;
632     S32 i4_interp_buf_id;
633 
634     S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635     S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636 
637     /* Sanity checks */
638     ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639 
640     s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641 
642     /* Initialize all the ptrs to child CUs for merge decision */
643     aps_child_results[0] = ps_merge_prms->ps_results_tl;
644     aps_child_results[1] = ps_merge_prms->ps_results_tr;
645     aps_child_results[2] = ps_merge_prms->ps_results_bl;
646     aps_child_results[3] = ps_merge_prms->ps_results_br;
647 
648     num_unique_nodes_cu_merge = 0;
649 
650     pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651 
652     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653     {
654         num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655             ps_merge_prms->ps_cluster_info,
656             aps_child_results,
657             ps_merge_prms->ps_8x8_cu_results,
658             pps_range_prms,
659             as_merge_unique_node,
660             ps_search_results->pu1_is_past,
661             ps_merge_prms->pi1_past_list,
662             ps_merge_prms->pi1_future_list,
663             ps_merge_prms->e_quality_preset,
664             e_blk_size,
665             i4_search_idx,
666             (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667                 (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668     }
669     else
670     {
671         /*************************************************************************/
672         /* Populate the list of unique search nodes in the child CUs for merge   */
673         /* evaluation                                                            */
674         /*************************************************************************/
675         for(i = 0; i < 4; i++)
676         {
677             search_node_t s_search_node;
678 
679             PART_TYPE_T e_part_type;
680             PART_ID_T e_part_id;
681 
682             WORD32 part_num;
683 
684             search_results_t *ps_child = aps_child_results[i];
685 
686             if(ps_child->ps_cu_results->u1_num_best_results)
687             {
688                 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689                      (1 == ps_child->ps_cu_results->u1_num_best_results)))
690                 {
691                     e_part_type =
692                         (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693 
694                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695 
696                     /* Insert mvs of NxN partitions. */
697                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698                         part_num++)
699                     {
700                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701 
702                         if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703                         {
704                             s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706                             {
707                                 CLIP_MV_WITHIN_RANGE(
708                                     s_search_node.s_mv.i2_mvx,
709                                     s_search_node.s_mv.i2_mvy,
710                                     pps_range_prms[s_search_node.i1_ref_idx],
711                                     0,
712                                     0,
713                                     0);
714 
715                                 INSERT_NEW_NODE_NOMAP(
716                                     as_merge_unique_node,
717                                     num_unique_nodes_cu_merge,
718                                     s_search_node,
719                                     1);
720                             }
721                         }
722                     }
723                 }
724             }
725             else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726                            .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727                       (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728                                 .ps_cu_results->u1_num_best_results)))
729             {
730                 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731 
732                 for(j = 0; j < 4; j++)
733                 {
734                     e_part_type = (PART_TYPE_T)ps_results_root[j]
735                                       .ps_cu_results->ps_best_results[0]
736                                       .u1_part_type;
737 
738                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739 
740                     /* Insert mvs of NxN partitions. */
741                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742                         part_num++)
743                     {
744                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745 
746                         if((ps_results_root[j]
747                                 .aps_part_results[i4_search_idx][e_part_id]
748                                 ->i1_ref_idx != -1) &&
749                            (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750                                  .b1_intra_flag))
751                         {
752                             s_search_node =
753                                 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755                             {
756                                 CLIP_MV_WITHIN_RANGE(
757                                     s_search_node.s_mv.i2_mvx,
758                                     s_search_node.s_mv.i2_mvy,
759                                     pps_range_prms[s_search_node.i1_ref_idx],
760                                     0,
761                                     0,
762                                     0);
763 
764                                 INSERT_NEW_NODE_NOMAP(
765                                     as_merge_unique_node,
766                                     num_unique_nodes_cu_merge,
767                                     s_search_node,
768                                     1);
769                             }
770                         }
771                     }
772                 }
773             }
774         }
775     }
776 
777     if(0 == num_unique_nodes_cu_merge)
778     {
779         return 0;
780     }
781 
782     /*************************************************************************/
783     /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784     /* fixed through this subpel refinement for this partition.              */
785     /* Note, we do not enable grid sads since one pt is evaluated per node   */
786     /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
787     /*************************************************************************/
788     i4_part_mask = ps_search_results->i4_part_mask;
789 
790     /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791     if(ps_subpel_prms->i4_use_satd)
792     {
793         if(BLK_32x32 == e_blk_size)
794         {
795             pf_err_compute = hme_evalsatd_pt_pu_32x32;
796         }
797         else
798         {
799             pf_err_compute = hme_evalsatd_pt_pu_64x64;
800         }
801     }
802     else
803     {
804         pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805     }
806 
807     i4_ref_stride = ps_curr_layer->i4_rec_stride;
808 
809     x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810     y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811     i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812 
813     /*************************************************************************/
814     /* This array stores the ids of the partitions whose                     */
815     /* SADs are updated. Since the partitions whose SADs are updated may not */
816     /* be in contiguous order, we supply another level of indirection.       */
817     /*************************************************************************/
818     i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819 
820     /* Initialize result params used for partition update */
821     s_result_prms.pf_mv_cost_compute = NULL;
822     s_result_prms.ps_search_results = ps_search_results;
823     s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824     s_result_prms.i1_ref_idx = i4_search_idx;
825     s_result_prms.i4_part_mask = i4_part_mask;
826     s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827     s_result_prms.i4_grid_mask = 1;
828 
829     /* One time Initialization of error params used for SAD/SATD compute */
830     s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831     s_err_prms.i4_ref_stride = i4_ref_stride;
832     s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833     s_err_prms.i4_grid_mask = 1;
834     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835     s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836     s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837     s_err_prms.i4_step = 1;
838 
839     /*************************************************************************/
840     /* One time preparation of non changing interpolation params.            */
841     /*************************************************************************/
842     s_interp_prms.i4_ref_stride = i4_ref_stride;
843     s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844     s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845     s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846     s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847     i4_interp_buf_id = 0;
848 
849     pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850 
851     /***************************************************************************/
852     /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853     /* results                                                                 */
854     /***************************************************************************/
855     for(i = 0; i < num_unique_nodes_cu_merge; i++)
856     {
857         WORD8 i1_ref_idx;
858         ps_search_node = &as_merge_unique_node[i];
859 
860         /*********************************************************************/
861         /* Compute the base pointer for input, interpolated buffers          */
862         /* The base pointers point as follows:                               */
863         /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864         /* To these, we need to add the offset of the current node           */
865         /*********************************************************************/
866         i1_ref_idx = ps_search_node->i1_ref_idx;
867         apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868         apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869         apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870         apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871 
872         s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873 
874         pf_qpel_interp(
875             &s_interp_prms,
876             ps_search_node->s_mv.i2_mvx,
877             ps_search_node->s_mv.i2_mvy,
878             i4_interp_buf_id);
879 
880         pred_lx = i4_search_idx;
881         ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882 
883         s_result_prms.u1_pred_lx = pred_lx;
884         s_result_prms.ps_search_node_base = ps_search_node;
885         s_err_prms.pu1_inp =
886             ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887         s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888         s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889 
890         /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891         Here the tu recursion logic is restricted with the size of the PU*/
892         pf_err_compute(&s_err_prms);
893 
894         if(ps_subpel_prms->u1_is_cu_noisy &&
895            ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896         {
897             ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898                 s_err_prms.pu1_ref,
899                 s_err_prms.i4_ref_stride,
900                 ai4_valid_part_ids,
901                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903                 s_err_prms.pi4_sad_grid,
904                 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905                 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906                 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907                 i4_num_valid_parts,
908                 ps_wt_inp_prms->wpred_log_wdc,
909                 (BLK_32x32 == e_blk_size) ? 32 : 64);
910         }
911 
912         /* Update the mv's */
913         s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914         s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915 
916         /* Update best results */
917         hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918     }
919 
920     /************************************************************************/
921     /* Update mv cost and total cost for each valid partition in the CU     */
922     /************************************************************************/
923     for(i = 0; i < TOT_NUM_PARTS; i++)
924     {
925         if(i4_part_mask & (1 << i))
926         {
927             WORD32 j;
928             WORD32 i4_mv_cost;
929 
930             ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931 
932             for(j = 0;
933                 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934                 j++)
935             {
936                 if(ps_search_node->i1_ref_idx != -1)
937                 {
938                     pred_lx = i4_search_idx;
939                     ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940 
941                     /* Prediction context should now deal with qpel units */
942                     HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943 
944                     ps_search_node->u1_subpel_done = 1;
945                     ps_search_node->u1_is_avail = 1;
946 
947                     i4_mv_cost =
948                         pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949 
950                     ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951                     ps_search_node->i4_mv_cost = i4_mv_cost;
952 
953                     ps_search_node++;
954                 }
955             }
956         }
957     }
958 
959     return num_unique_nodes_cu_merge;
960 }
961 
962 #define CU_MERGE_MAX_INTRA_PARTS 4
963 
964 /**
965 ********************************************************************************
966 *  @fn     hme_try_merge_high_speed
967 *
968 *  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969 entity or with partititons for high speed preset
970 *
971 *  @param[in,out]  hme_merge_prms_t: Params for CU merge
972 *
973 *  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974 ********************************************************************************
975 */
hme_try_merge_high_speed(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,hme_subpel_prms_t * ps_subpel_prms,hme_merge_prms_t * ps_merge_prms,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result)976 CU_MERGE_RESULT_T hme_try_merge_high_speed(
977     me_ctxt_t *ps_thrd_ctxt,
978     me_frm_ctxt_t *ps_ctxt,
979     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980     hme_subpel_prms_t *ps_subpel_prms,
981     hme_merge_prms_t *ps_merge_prms,
982     inter_pu_results_t *ps_pu_results,
983     pu_result_t *ps_pu_result)
984 {
985     search_results_t *ps_results_tl, *ps_results_tr;
986     search_results_t *ps_results_bl, *ps_results_br;
987 
988     S32 i;
989     S32 i4_search_idx;
990     S32 i4_cost_parent;
991     S32 intra_cu_size;
992     ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993 
994     search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995     wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996 
997     S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998     S32 is_vert = 0, i4_best_part_type = -1;
999     S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000     S32 i4_cost_children = 0;
1001     S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002     S32 i4_num_merge_cands_evaluated = 0;
1003     U08 u1_x_off = ps_results_merge->u1_x_off;
1004     U08 u1_y_off = ps_results_merge->u1_y_off;
1005     S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006 
1007     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011     ps_results_tl = ps_merge_prms->ps_results_tl;
1012     ps_results_tr = ps_merge_prms->ps_results_tr;
1013     ps_results_bl = ps_merge_prms->ps_results_bl;
1014     ps_results_br = ps_merge_prms->ps_results_br;
1015 
1016     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017     {
1018         i4_part_mask &= ~ENABLE_AMP;
1019     }
1020 
1021     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022     {
1023         i4_part_mask &= ~ENABLE_AMP;
1024 
1025         i4_part_mask &= ~ENABLE_SMP;
1026     }
1027 
1028     ps_merge_prms->i4_num_pred_dir_actual = 0;
1029 
1030     /*************************************************************************/
1031     /* The logic for High speed CU merge goes as follows:                    */
1032     /*                                                                       */
1033     /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034     /*    exceed 7                                                           */
1035     /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036     /*    are identical                                                      */
1037     /* 3. Find the all unique mvs of best partitions of children CUs and     */
1038     /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
1039     /*    best parent cost is lower than sum of the best children costs      */
1040     /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041     /*                                                                       */
1042     /*************************************************************************/
1043 
1044     /* Count the number of best partitions in child CUs, early exit if > 7 */
1045     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046        (CU_32x32 == ps_results_merge->e_cu_size))
1047     {
1048         S32 num_parts_in_32x32 = 0;
1049         WORD32 i4_part_type;
1050 
1051         if(ps_results_tl->u1_split_flag)
1052         {
1053             num_parts_in_32x32 += 4;
1054 
1055 #define COST_INTERCHANGE 0
1056             i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057                                ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058                                ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059                                ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060         }
1061         else
1062         {
1063             i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065             i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066         }
1067 
1068         if(ps_results_tr->u1_split_flag)
1069         {
1070             num_parts_in_32x32 += 4;
1071 
1072             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073                                 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074                                 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075                                 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076         }
1077         else
1078         {
1079             i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081             i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082         }
1083 
1084         if(ps_results_bl->u1_split_flag)
1085         {
1086             num_parts_in_32x32 += 4;
1087 
1088             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089                                 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090                                 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091                                 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092         }
1093         else
1094         {
1095             i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097             i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098         }
1099 
1100         if(ps_results_br->u1_split_flag)
1101         {
1102             num_parts_in_32x32 += 4;
1103 
1104             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105                                 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106                                 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107                                 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108         }
1109         else
1110         {
1111             i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113             i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114         }
1115 
1116         if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117         {
1118             return CU_SPLIT;
1119         }
1120 
1121         if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122            (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123         {
1124             return CU_SPLIT;
1125         }
1126     }
1127 
1128     /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
1129     /* Note : Each intra part represent a NxN unit of the children CUs          */
1130     /* This is essentially 1/16th of the CUsize under consideration for merge   */
1131     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132     {
1133         if(CU_64x64 == ps_results_merge->e_cu_size)
1134         {
1135             i4_intra_parts =
1136                 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137                     ? 16
1138                     : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139         }
1140         else
1141         {
1142             switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143             {
1144             case 0:
1145             {
1146                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147                                        ->u1_inter_eval_enable)
1148                                      ? 16
1149                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150                                             ->ps_child_node_tl->u1_intra_eval_enable);
1151 
1152                 break;
1153             }
1154             case 1:
1155             {
1156                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157                                        ->u1_inter_eval_enable)
1158                                      ? 16
1159                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160                                             ->ps_child_node_tr->u1_intra_eval_enable);
1161 
1162                 break;
1163             }
1164             case 2:
1165             {
1166                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167                                        ->u1_inter_eval_enable)
1168                                      ? 16
1169                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170                                             ->ps_child_node_bl->u1_intra_eval_enable);
1171 
1172                 break;
1173             }
1174             case 3:
1175             {
1176                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177                                        ->u1_inter_eval_enable)
1178                                      ? 16
1179                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180                                             ->ps_child_node_br->u1_intra_eval_enable);
1181 
1182                 break;
1183             }
1184             }
1185         }
1186     }
1187     else
1188     {
1189         for(i = 0; i < 4; i++)
1190         {
1191             search_results_t *ps_results =
1192                 (i == 0) ? ps_results_tl
1193                          : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194 
1195             part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196 
1197             if(ps_results->u1_split_flag)
1198             {
1199                 U08 u1_x_off = ps_results->u1_x_off;
1200                 U08 u1_y_off = ps_results->u1_y_off;
1201                 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202                                       2;
1203 
1204                 /* Special case to handle 8x8 CUs when 16x16 is split */
1205                 ASSERT(ps_results->e_cu_size == CU_16x16);
1206 
1207                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208 
1209                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210                     i4_intra_parts += 1;
1211 
1212                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213 
1214                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215                     i4_intra_parts += 1;
1216 
1217                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218 
1219                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220                     i4_intra_parts += 1;
1221 
1222                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223 
1224                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225                     i4_intra_parts += 1;
1226             }
1227             else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228             {
1229                 i4_intra_parts += 4;
1230             }
1231         }
1232     }
1233 
1234     /* Determine the max intra CU size indicated by IPE */
1235     intra_cu_size = CU_64x64;
1236     if(ps_cur_ipe_ctb->u1_split_flag)
1237     {
1238         intra_cu_size = CU_32x32;
1239         if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240         {
1241             intra_cu_size = CU_16x16;
1242         }
1243     }
1244 
1245     if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246         (intra_cu_size < ps_results_merge->e_cu_size) &&
1247         (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248        (i4_intra_parts == 16))
1249     {
1250         S32 i4_merge_outcome;
1251 
1252         i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253                                ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254                                   ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255                                : (!ps_cur_ipe_ctb->u1_split_flag);
1256 
1257         i4_merge_outcome = i4_merge_outcome ||
1258                            (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259 
1260         i4_merge_outcome = i4_merge_outcome &&
1261                            !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262 
1263         if(i4_merge_outcome)
1264         {
1265             inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266             part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267             pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268 
1269             ps_cu_results->u1_num_best_results = 1;
1270             ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271             ps_cu_results->u1_x_off = u1_x_off;
1272             ps_cu_results->u1_y_off = u1_y_off;
1273 
1274             ps_best_result->u1_part_type = PRT_2Nx2N;
1275             ps_best_result->ai4_tu_split_flag[0] = 0;
1276             ps_best_result->ai4_tu_split_flag[1] = 0;
1277             ps_best_result->ai4_tu_split_flag[2] = 0;
1278             ps_best_result->ai4_tu_split_flag[3] = 0;
1279             ps_best_result->i4_tot_cost =
1280                 (CU_64x64 == ps_results_merge->e_cu_size)
1281                     ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282                     : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283 
1284             ps_pu->b1_intra_flag = 1;
1285             ps_pu->b4_pos_x = u1_x_off >> 2;
1286             ps_pu->b4_pos_y = u1_y_off >> 2;
1287             ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288             ps_pu->b4_ht = ps_pu->b4_wd;
1289             ps_pu->mv.i1_l0_ref_idx = -1;
1290             ps_pu->mv.i1_l1_ref_idx = -1;
1291             ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292             ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293             ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294             ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295 
1296             return CU_MERGED;
1297         }
1298         else
1299         {
1300             return CU_SPLIT;
1301         }
1302     }
1303 
1304     if(i4_intra_parts)
1305     {
1306         i4_part_mask = ENABLE_2Nx2N;
1307     }
1308 
1309     ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310 
1311     hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312 
1313     ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314     ps_merge_prms->i4_num_pred_dir_actual = 0;
1315 
1316     if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317     {
1318         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319         S32 i4_num_valid_parts;
1320         S32 i4_sigma_array_offset;
1321 
1322         i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323 
1324         /*********************************************************************************************************************************************/
1325         /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
1326         /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327         /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
1328         /*********************************************************************************************************************************************/
1329         i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330                                 (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331 
1332         for(i = 0; i < i4_num_valid_parts; i++)
1333         {
1334             S32 i4_part_id = ai4_valid_part_ids[i];
1335 
1336             hme_compute_final_sigma_of_pu_from_base_blocks(
1337                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339                 au8_final_src_sigmaX,
1340                 au8_final_src_sigmaXSquared,
1341                 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342                 4,
1343                 i4_part_id,
1344                 16);
1345         }
1346 
1347         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349     }
1350 
1351     /*************************************************************************/
1352     /* Loop through all ref idx and pick the merge candts and refine based   */
1353     /* on the active partitions. At this stage num ref will be 1 or 2        */
1354     /*************************************************************************/
1355     for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356     {
1357         S32 i4_cands;
1358         U08 u1_pred_dir = 0;
1359 
1360         if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361         {
1362             u1_pred_dir = i4_search_idx;
1363         }
1364         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365         {
1366             u1_pred_dir = 1;
1367         }
1368         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369         {
1370             u1_pred_dir = 0;
1371         }
1372         else
1373         {
1374             ASSERT(0);
1375         }
1376 
1377         /* call the function to pick and evaluate the merge candts, given */
1378         /* a ref id and a part mask.                                      */
1379         i4_cands = hme_pick_eval_merge_candts(
1380             ps_merge_prms,
1381             ps_subpel_prms,
1382             u1_pred_dir,
1383             i4_best_part_type,
1384             is_vert,
1385             ps_wt_inp_prms,
1386             i4_frm_qstep,
1387             ps_cmn_utils_optimised_function_list,
1388             ps_me_optimised_function_list);
1389 
1390         if(i4_cands)
1391         {
1392             ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393                 u1_pred_dir;
1394             ps_merge_prms->i4_num_pred_dir_actual++;
1395         }
1396 
1397         i4_num_merge_cands_evaluated += i4_cands;
1398     }
1399 
1400     /* Call the decide_part_types function here */
1401     /* Populate the new PU struct with the results post subpel refinement*/
1402     if(i4_num_merge_cands_evaluated)
1403     {
1404         inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405 
1406         hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407 
1408         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410 
1411         hme_populate_pus(
1412             ps_thrd_ctxt,
1413             ps_ctxt,
1414             ps_subpel_prms,
1415             ps_results_merge,
1416             ps_cu_results,
1417             ps_pu_results,
1418             ps_pu_result,
1419             ps_merge_prms->ps_inter_ctb_prms,
1420             &ps_ctxt->s_wt_pred,
1421             ps_merge_prms->ps_layer_ctxt,
1422             ps_merge_prms->au1_pred_dir_searched,
1423             ps_merge_prms->i4_num_pred_dir_actual);
1424 
1425         ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426 
1427         hme_decide_part_types(
1428             ps_cu_results,
1429             ps_pu_results,
1430             ps_merge_prms->ps_inter_ctb_prms,
1431             ps_ctxt,
1432             ps_cmn_utils_optimised_function_list,
1433             ps_me_optimised_function_list
1434 
1435         );
1436 
1437         /*****************************************************************/
1438         /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
1439         /*****************************************************************/
1440 #if DISABLE_INTRA_IN_BPICS
1441         if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442                  (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443 #endif
1444         {
1445             if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446             {
1447                 hme_insert_intra_nodes_post_bipred(
1448                     ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449             }
1450         }
1451     }
1452     else
1453     {
1454         return CU_SPLIT;
1455     }
1456 
1457     /* We check the best result of ref idx 0 and compare for parent vs child */
1458     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459        (CU_32x32 == ps_results_merge->e_cu_size))
1460     {
1461         i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462         /*********************************************************************/
1463         /* Add the cost of signaling the CU tree bits.                       */
1464         /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465         /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466         /* So, 4*lambda is extra for children cost. :Lokesh                  */
1467         /*********************************************************************/
1468         {
1469             pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470 
1471             i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472         }
1473 
1474         if(i4_cost_parent < i4_cost_children)
1475         {
1476             return CU_MERGED;
1477         }
1478 
1479         return CU_SPLIT;
1480     }
1481     else
1482     {
1483         return CU_MERGED;
1484     }
1485 }
1486 
1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
1488     {                                                                                              \
1489         (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
1490         (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
1491         *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
1492     }
1493 
1494 /**
1495 ********************************************************************************
1496 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497 *                               layer_mv_t *ps_layer_mv,
1498 *                               S32 i4_search_blk_x,
1499 *                               S32 i4_search_blk_y,
1500 *                               mvbank_update_prms_t *ps_prms)
1501 *
1502 *  @brief  Updates the mv bank in case there is no further encodign to be done
1503 *
1504 *  @param[in]  ps_search_results: contains results for the block just searched
1505 *
1506 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1507 *
1508 *  @param[in] i4_search_blk_x  : col num of blk being searched
1509 *
1510 *  @param[in] i4_search_blk_y : row num of blk being searched
1511 *
1512 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
1513 *
1514 *  @return None
1515 ********************************************************************************
1516 */
1517 
hme_update_mv_bank_noencode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1518 void hme_update_mv_bank_noencode(
1519     search_results_t *ps_search_results,
1520     layer_mv_t *ps_layer_mv,
1521     S32 i4_search_blk_x,
1522     S32 i4_search_blk_y,
1523     mvbank_update_prms_t *ps_prms)
1524 {
1525     hme_mv_t *ps_mv;
1526     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528     S32 i4_blk_x, i4_blk_y, i4_offset;
1529     S32 i4_j, i4_ref_id;
1530     search_node_t *ps_search_node;
1531     search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532     search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533     search_node_t *ps_search_node_4x4_4;
1534 
1535     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538 
1539     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540 
1541     /* Identify the correct offset in the mvbank and the reference id buf */
1542     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544 
1545     /*************************************************************************/
1546     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1547     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1548     /* do a straightforward single update of results. This will have a 1-1   */
1549     /* correspondence.                                                       */
1550     /*************************************************************************/
1551     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552     {
1553         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554         {
1555             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557             {
1558                 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559                 ps_mv++;
1560                 pi1_ref_idx++;
1561                 ps_search_node++;
1562             }
1563         }
1564         return;
1565     }
1566 
1567     /*************************************************************************/
1568     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569     /* case, we need to have NxN partitions enabled in search.               */
1570     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572     /*************************************************************************/
1573     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576 
1577     /*************************************************************************/
1578     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579     /* hence the below check.                                                */
1580     /*************************************************************************/
1581     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582 
1583     ps_mv1 = ps_mv;
1584     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587     pi1_ref_idx1 = pi1_ref_idx;
1588     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591 
1592     for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593     {
1594         ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595 
1596         ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597 
1598         ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599 
1600         ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601 
1602         ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603 
1604         COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605         ps_mv1++;
1606         pi1_ref_idx1++;
1607         ps_search_node_4x4_1++;
1608         COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609         ps_mv2++;
1610         pi1_ref_idx2++;
1611         ps_search_node_4x4_2++;
1612         COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613         ps_mv3++;
1614         pi1_ref_idx3++;
1615         ps_search_node_4x4_3++;
1616         COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617         ps_mv4++;
1618         pi1_ref_idx4++;
1619         ps_search_node_4x4_4++;
1620 
1621         if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622         {
1623             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624             ps_mv1++;
1625             pi1_ref_idx1++;
1626             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627             ps_mv2++;
1628             pi1_ref_idx2++;
1629             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630             ps_mv3++;
1631             pi1_ref_idx3++;
1632             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633             ps_mv4++;
1634             pi1_ref_idx4++;
1635         }
1636 
1637         for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638         {
1639             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640             ps_mv1++;
1641             pi1_ref_idx1++;
1642             ps_search_node_4x4_1++;
1643             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644             ps_mv2++;
1645             pi1_ref_idx2++;
1646             ps_search_node_4x4_2++;
1647             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648             ps_mv3++;
1649             pi1_ref_idx3++;
1650             ps_search_node_4x4_3++;
1651             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652             ps_mv4++;
1653             pi1_ref_idx4++;
1654             ps_search_node_4x4_4++;
1655         }
1656     }
1657 }
1658 
hme_update_mv_bank_encode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms,U08 * pu1_pred_dir_searched,S32 i4_num_act_ref_l0)1659 void hme_update_mv_bank_encode(
1660     search_results_t *ps_search_results,
1661     layer_mv_t *ps_layer_mv,
1662     S32 i4_search_blk_x,
1663     S32 i4_search_blk_y,
1664     mvbank_update_prms_t *ps_prms,
1665     U08 *pu1_pred_dir_searched,
1666     S32 i4_num_act_ref_l0)
1667 {
1668     hme_mv_t *ps_mv;
1669     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671     S32 i4_blk_x, i4_blk_y, i4_offset;
1672     S32 j, i, num_parts;
1673     search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674     search_node_t *ps_search_node_bl, *ps_search_node_br;
1675     search_node_t s_zero_mv;
1676     WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677 
1678     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681 
1682     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683 
1684     /* Identify the correct offset in the mvbank and the reference id buf */
1685     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687 
1688     ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689     ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690 
1691     /*************************************************************************/
1692     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693     /* hence the below check.                                                */
1694     /*************************************************************************/
1695     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696 
1697     ps_mv1 = ps_mv;
1698     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701     pi1_ref_idx1 = pi1_ref_idx;
1702     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705 
1706     /* Initialize zero mv: default mv used for intra mvs */
1707     s_zero_mv.s_mv.i2_mvx = 0;
1708     s_zero_mv.s_mv.i2_mvy = 0;
1709     s_zero_mv.i1_ref_idx = 0;
1710 
1711     if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712        (ps_search_results->i4_part_mask & ENABLE_NxN))
1713     {
1714         i4_part_type = PRT_NxN;
1715     }
1716 
1717     for(i = 0; i < ps_prms->i4_num_ref; i++)
1718     {
1719         for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720         {
1721             WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722 
1723             num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724 
1725             ps_search_node_tl =
1726                 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727 
1728             if(num_parts == 1)
1729             {
1730                 ps_search_node_tr = ps_search_node_tl;
1731                 ps_search_node_bl = ps_search_node_tl;
1732                 ps_search_node_br = ps_search_node_tl;
1733             }
1734             else if(num_parts == 2)
1735             {
1736                 /* For vertically oriented partitions, tl, bl pt to same result */
1737                 /* For horizontally oriented partition, tl, tr pt to same result */
1738                 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739                 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740                 /* and right 2 8x8 have 12x16R partition */
1741                 if(gau1_is_vert_part[i4_part_type])
1742                 {
1743                     ps_search_node_tr =
1744                         ps_search_results
1745                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746                     ps_search_node_bl = ps_search_node_tl;
1747                 }
1748                 else
1749                 {
1750                     ps_search_node_tr = ps_search_node_tl;
1751                     ps_search_node_bl =
1752                         ps_search_results
1753                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754                 }
1755                 ps_search_node_br =
1756                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757             }
1758             else
1759             {
1760                 /* 4 unique results */
1761                 ps_search_node_tr =
1762                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763                 ps_search_node_bl =
1764                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765                 ps_search_node_br =
1766                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767             }
1768 
1769             if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770                 ps_search_node_tl++;
1771             if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772                 ps_search_node_tr++;
1773             if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774                 ps_search_node_bl++;
1775             if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776                 ps_search_node_br++;
1777 
1778             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779             ps_mv1++;
1780             pi1_ref_idx1++;
1781             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782             ps_mv2++;
1783             pi1_ref_idx2++;
1784             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785             ps_mv3++;
1786             pi1_ref_idx3++;
1787             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788             ps_mv4++;
1789             pi1_ref_idx4++;
1790 
1791             if(ps_prms->i4_num_results_to_store > 1)
1792             {
1793                 ps_search_node_tl =
1794                     &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795 
1796                 if(num_parts == 1)
1797                 {
1798                     ps_search_node_tr = ps_search_node_tl;
1799                     ps_search_node_bl = ps_search_node_tl;
1800                     ps_search_node_br = ps_search_node_tl;
1801                 }
1802                 else if(num_parts == 2)
1803                 {
1804                     /* For vertically oriented partitions, tl, bl pt to same result */
1805                     /* For horizontally oriented partition, tl, tr pt to same result */
1806                     /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807                     /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808                     /* and right 2 8x8 have 12x16R partition */
1809                     if(gau1_is_vert_part[i4_part_type])
1810                     {
1811                         ps_search_node_tr =
1812                             &ps_search_results
1813                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814                         ps_search_node_bl = ps_search_node_tl;
1815                     }
1816                     else
1817                     {
1818                         ps_search_node_tr = ps_search_node_tl;
1819                         ps_search_node_bl =
1820                             &ps_search_results
1821                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822                     }
1823                     ps_search_node_br =
1824                         &ps_search_results
1825                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826                 }
1827                 else
1828                 {
1829                     /* 4 unique results */
1830                     ps_search_node_tr =
1831                         &ps_search_results
1832                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833                     ps_search_node_bl =
1834                         &ps_search_results
1835                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836                     ps_search_node_br =
1837                         &ps_search_results
1838                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839                 }
1840 
1841                 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842                     ps_search_node_tl++;
1843                 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844                     ps_search_node_tr++;
1845                 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846                     ps_search_node_bl++;
1847                 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848                     ps_search_node_br++;
1849 
1850                 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851                 ps_mv1++;
1852                 pi1_ref_idx1++;
1853                 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854                 ps_mv2++;
1855                 pi1_ref_idx2++;
1856                 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857                 ps_mv3++;
1858                 pi1_ref_idx3++;
1859                 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860                 ps_mv4++;
1861                 pi1_ref_idx4++;
1862             }
1863         }
1864     }
1865 }
1866 
1867 /**
1868 ********************************************************************************
1869 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870 *                               layer_mv_t *ps_layer_mv,
1871 *                               S32 i4_search_blk_x,
1872 *                               S32 i4_search_blk_y,
1873 *                               mvbank_update_prms_t *ps_prms)
1874 *
1875 *  @brief  Updates the mv bank in case there is no further encodign to be done
1876 *
1877 *  @param[in]  ps_search_results: contains results for the block just searched
1878 *
1879 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1880 *
1881 *  @param[in] i4_search_blk_x  : col num of blk being searched
1882 *
1883 *  @param[in] i4_search_blk_y : row num of blk being searched
1884 *
1885 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
1886 *
1887 *  @return None
1888 ********************************************************************************
1889 */
1890 
hme_update_mv_bank_in_l1_me(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1891 void hme_update_mv_bank_in_l1_me(
1892     search_results_t *ps_search_results,
1893     layer_mv_t *ps_layer_mv,
1894     S32 i4_search_blk_x,
1895     S32 i4_search_blk_y,
1896     mvbank_update_prms_t *ps_prms)
1897 {
1898     hme_mv_t *ps_mv;
1899     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901     S32 i4_blk_x, i4_blk_y, i4_offset;
1902     S32 i4_j, i4_ref_id;
1903     search_node_t *ps_search_node;
1904     search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905 
1906     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909 
1910     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911 
1912     /* Identify the correct offset in the mvbank and the reference id buf */
1913     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915 
1916     /*************************************************************************/
1917     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1918     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1919     /* do a straightforward single update of results. This will have a 1-1   */
1920     /* correspondence.                                                       */
1921     /*************************************************************************/
1922     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923     {
1924         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925 
1926         hme_mv_t *ps_mv_l0_root = ps_mv;
1927         hme_mv_t *ps_mv_l1_root =
1928             ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929 
1930         U32 u4_num_l0_results_updated = 0;
1931         U32 u4_num_l1_results_updated = 0;
1932 
1933         S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934         S08 *pi1_ref_idx_l1_root =
1935             pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936 
1937         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938         {
1939             U32 *pu4_num_results_updated;
1940             search_node_t **pps_result_nodes;
1941 
1942             U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943 
1944             if(u1_pred_dir_of_cur_ref)
1945             {
1946                 pu4_num_results_updated = &u4_num_l1_results_updated;
1947                 pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948             }
1949             else
1950             {
1951                 pu4_num_results_updated = &u4_num_l0_results_updated;
1952                 pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953             }
1954 
1955             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956 
1957             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958             {
1959                 hme_add_new_node_to_a_sorted_array(
1960                     &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961 
1962                 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963                 (*pu4_num_results_updated)++;
1964             }
1965         }
1966 
1967         for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968         {
1969             COPY_SEARCH_RESULT(
1970                 &ps_mv_l0_root[i4_j],
1971                 &pi1_ref_idx_l0_root[i4_j],
1972                 aps_result_nodes_sorted[0][i4_j],
1973                 0);
1974         }
1975 
1976         for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977         {
1978             COPY_SEARCH_RESULT(
1979                 &ps_mv_l1_root[i4_j],
1980                 &pi1_ref_idx_l1_root[i4_j],
1981                 aps_result_nodes_sorted[1][i4_j],
1982                 0);
1983         }
1984 
1985         return;
1986     }
1987 
1988     /*************************************************************************/
1989     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990     /* case, we need to have NxN partitions enabled in search.               */
1991     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993     /*************************************************************************/
1994     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997 
1998     /*************************************************************************/
1999     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000     /* hence the below check.                                                */
2001     /*************************************************************************/
2002     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003 
2004     ps_mv1 = ps_mv;
2005     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008     pi1_ref_idx1 = pi1_ref_idx;
2009     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012 
2013     {
2014         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4];
2015         U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4];
2016 
2017         S32 i;
2018 
2019         hme_mv_t *ps_mv1_l0_root = ps_mv1;
2020         hme_mv_t *ps_mv1_l1_root =
2021             ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2022         hme_mv_t *ps_mv2_l0_root = ps_mv2;
2023         hme_mv_t *ps_mv2_l1_root =
2024             ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2025         hme_mv_t *ps_mv3_l0_root = ps_mv3;
2026         hme_mv_t *ps_mv3_l1_root =
2027             ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2028         hme_mv_t *ps_mv4_l0_root = ps_mv4;
2029         hme_mv_t *ps_mv4_l1_root =
2030             ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2031 
2032         U32 u4_num_l0_results_updated = 0;
2033         U32 u4_num_l1_results_updated = 0;
2034 
2035         S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2036         S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2037                                                             ps_layer_mv->i4_num_mvs_per_ref);
2038         S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2039         S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2040                                                             ps_layer_mv->i4_num_mvs_per_ref);
2041         S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2042         S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2043                                                             ps_layer_mv->i4_num_mvs_per_ref);
2044         S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2045         S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2046                                                             ps_layer_mv->i4_num_mvs_per_ref);
2047 
2048         for(i = 0; i < 4; i++)
2049         {
2050             hme_mv_t *ps_mv_l0_root;
2051             hme_mv_t *ps_mv_l1_root;
2052 
2053             S08 *pi1_ref_idx_l0_root;
2054             S08 *pi1_ref_idx_l1_root;
2055 
2056             for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2057             {
2058                 U32 *pu4_num_results_updated;
2059                 search_node_t **pps_result_nodes;
2060                 U08 *pu1_cost_shifts_for_sorted_node;
2061 
2062                 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2063 
2064                 if(u1_pred_dir_of_cur_ref)
2065                 {
2066                     pu4_num_results_updated = &u4_num_l1_results_updated;
2067                     pps_result_nodes = &aps_result_nodes_sorted[1][0];
2068                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2069                 }
2070                 else
2071                 {
2072                     pu4_num_results_updated = &u4_num_l0_results_updated;
2073                     pps_result_nodes = &aps_result_nodes_sorted[0][0];
2074                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2075                 }
2076 
2077                 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2078 
2079                 ps_search_node_4x4 =
2080                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2081 
2082                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2083                 {
2084                     hme_add_new_node_to_a_sorted_array(
2085                         &ps_search_node_4x4[i4_j],
2086                         pps_result_nodes,
2087                         pu1_cost_shifts_for_sorted_node,
2088                         *pu4_num_results_updated,
2089                         0);
2090 
2091                     (*pu4_num_results_updated)++;
2092 
2093                     hme_add_new_node_to_a_sorted_array(
2094                         &ps_search_node_8x8[i4_j],
2095                         pps_result_nodes,
2096                         pu1_cost_shifts_for_sorted_node,
2097                         *pu4_num_results_updated,
2098                         2);
2099 
2100                     (*pu4_num_results_updated)++;
2101                 }
2102             }
2103 
2104             switch(i)
2105             {
2106             case 0:
2107             {
2108                 ps_mv_l0_root = ps_mv1_l0_root;
2109                 ps_mv_l1_root = ps_mv1_l1_root;
2110 
2111                 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2112                 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2113 
2114                 break;
2115             }
2116             case 1:
2117             {
2118                 ps_mv_l0_root = ps_mv2_l0_root;
2119                 ps_mv_l1_root = ps_mv2_l1_root;
2120 
2121                 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2122                 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2123 
2124                 break;
2125             }
2126             case 2:
2127             {
2128                 ps_mv_l0_root = ps_mv3_l0_root;
2129                 ps_mv_l1_root = ps_mv3_l1_root;
2130 
2131                 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2132                 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2133 
2134                 break;
2135             }
2136             case 3:
2137             {
2138                 ps_mv_l0_root = ps_mv4_l0_root;
2139                 ps_mv_l1_root = ps_mv4_l1_root;
2140 
2141                 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2142                 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2143 
2144                 break;
2145             }
2146             }
2147 
2148             u4_num_l0_results_updated =
2149                 MIN((S32)u4_num_l0_results_updated,
2150                     ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2151 
2152             u4_num_l1_results_updated =
2153                 MIN((S32)u4_num_l1_results_updated,
2154                     ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2155 
2156             for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2157             {
2158                 COPY_SEARCH_RESULT(
2159                     &ps_mv_l0_root[i4_j],
2160                     &pi1_ref_idx_l0_root[i4_j],
2161                     aps_result_nodes_sorted[0][i4_j],
2162                     0);
2163             }
2164 
2165             for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2166             {
2167                 COPY_SEARCH_RESULT(
2168                     &ps_mv_l1_root[i4_j],
2169                     &pi1_ref_idx_l1_root[i4_j],
2170                     aps_result_nodes_sorted[1][i4_j],
2171                     0);
2172             }
2173         }
2174     }
2175 }
2176 
2177 /**
2178 ******************************************************************************
2179 *  @brief Scales motion vector component projecte from a diff layer in same
2180 *         picture (so no ref id related delta poc scaling required)
2181 ******************************************************************************
2182 */
2183 
2184 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
2185     ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2186 /**
2187 ********************************************************************************
2188 *  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
2189 *                                   layer_ctxt_t *ps_curr_layer,
2190 *                                   layer_ctxt_t *ps_coarse_layer,
2191 *                                   S32 i4_pos_x,
2192 *                                   S32 i4_pos_y,
2193 *                                   S08 i1_ref_id,
2194 *                                   S08 i1_result_id)
2195 *
2196 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
2197 *          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2198 *
2199 *  @param[out]  ps_search_node : contains the projected result
2200 *
2201 *  @param[in]   ps_curr_layer : current layer context
2202 *
2203 *  @param[in]   ps_coarse_layer  : coarser layer context
2204 *
2205 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2206 *
2207 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2208 *
2209 *  @param[in]   i1_ref_id : reference id for which the candidate required
2210 *
2211 *  @param[in]   i4_result_id : result id for which the candidate required
2212 *                              (0 : best result, 1 : next best)
2213 *
2214 *  @return None
2215 ********************************************************************************
2216 */
2217 
hme_project_coloc_candt(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2218 void hme_project_coloc_candt(
2219     search_node_t *ps_search_node,
2220     layer_ctxt_t *ps_curr_layer,
2221     layer_ctxt_t *ps_coarse_layer,
2222     S32 i4_pos_x,
2223     S32 i4_pos_y,
2224     S08 i1_ref_id,
2225     S32 i4_result_id)
2226 {
2227     S32 wd_c, ht_c, wd_p, ht_p;
2228     S32 blksize_p, blk_x, blk_y, i4_offset;
2229     layer_mv_t *ps_layer_mvbank;
2230     hme_mv_t *ps_mv;
2231     S08 *pi1_ref_idx;
2232 
2233     /* Width and ht of current and prev layers */
2234     wd_c = ps_curr_layer->i4_wd;
2235     ht_c = ps_curr_layer->i4_ht;
2236     wd_p = ps_coarse_layer->i4_wd;
2237     ht_p = ps_coarse_layer->i4_ht;
2238 
2239     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2240     blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2241 
2242     /* Safety check to avoid uninitialized access across temporal layers */
2243     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2244     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2245 
2246     /* Project the positions to prev layer */
2247     /* TODO: convert these to scale factors at pic level */
2248     blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2249     blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2250 
2251     /* Pick up the mvs from the location */
2252     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2253     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2254 
2255     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2256     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2257 
2258     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2259     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260 
2261     ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2262     ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2263     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2264     ps_search_node->u1_subpel_done = 0;
2265     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2266     {
2267         ps_search_node->i1_ref_idx = i1_ref_id;
2268         ps_search_node->s_mv.i2_mvx = 0;
2269         ps_search_node->s_mv.i2_mvy = 0;
2270     }
2271 }
2272 
2273 /**
2274 ********************************************************************************
2275 *  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2276 *                                   layer_ctxt_t *ps_curr_layer,
2277 *                                   layer_ctxt_t *ps_coarse_layer,
2278 *                                   S32 i4_pos_x,
2279 *                                   S32 i4_pos_y,
2280 *                                   S08 i1_ref_id,
2281 *                                   S08 i1_result_id)
2282 *
2283 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
2284 *          position in the picture when the ratios are dyadic
2285 *
2286 *  @param[out]  ps_search_node : contains the projected result
2287 *
2288 *  @param[in]   ps_curr_layer : current layer context
2289 *
2290 *  @param[in]   ps_coarse_layer  : coarser layer context
2291 *
2292 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2293 *
2294 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2295 *
2296 *  @param[in]   i1_ref_id : reference id for which the candidate required
2297 *
2298 *  @param[in]   i4_result_id : result id for which the candidate required
2299 *                              (0 : best result, 1 : next best)
2300 *
2301 *  @return None
2302 ********************************************************************************
2303 */
2304 
hme_project_coloc_candt_dyadic(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2305 void hme_project_coloc_candt_dyadic(
2306     search_node_t *ps_search_node,
2307     layer_ctxt_t *ps_curr_layer,
2308     layer_ctxt_t *ps_coarse_layer,
2309     S32 i4_pos_x,
2310     S32 i4_pos_y,
2311     S08 i1_ref_id,
2312     S32 i4_result_id)
2313 {
2314     S32 wd_c, ht_c, wd_p, ht_p;
2315     S32 blksize_p, blk_x, blk_y, i4_offset;
2316     layer_mv_t *ps_layer_mvbank;
2317     hme_mv_t *ps_mv;
2318     S08 *pi1_ref_idx;
2319 
2320     /* Width and ht of current and prev layers */
2321     wd_c = ps_curr_layer->i4_wd;
2322     ht_c = ps_curr_layer->i4_ht;
2323     wd_p = ps_coarse_layer->i4_wd;
2324     ht_p = ps_coarse_layer->i4_ht;
2325 
2326     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2327     /* blksize_p = log2(wd) + 1 */
2328     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2329 
2330     /* ASSERT for valid sizes */
2331     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2332 
2333     /* Safety check to avoid uninitialized access across temporal layers */
2334     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2335     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2336 
2337     /* Project the positions to prev layer */
2338     /* TODO: convert these to scale factors at pic level */
2339     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2340     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2341 
2342     /* Pick up the mvs from the location */
2343     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2344     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2345 
2346     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2347     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2348 
2349     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2350     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351 
2352     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2353     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2354     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2355     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2356     {
2357         ps_search_node->i1_ref_idx = i1_ref_id;
2358         ps_search_node->s_mv.i2_mvx = 0;
2359         ps_search_node->s_mv.i2_mvy = 0;
2360     }
2361 }
2362 
hme_project_coloc_candt_dyadic_implicit(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S32 i4_num_act_ref_l0,U08 u1_pred_dir,U08 u1_default_ref_id,S32 i4_result_id)2363 void hme_project_coloc_candt_dyadic_implicit(
2364     search_node_t *ps_search_node,
2365     layer_ctxt_t *ps_curr_layer,
2366     layer_ctxt_t *ps_coarse_layer,
2367     S32 i4_pos_x,
2368     S32 i4_pos_y,
2369     S32 i4_num_act_ref_l0,
2370     U08 u1_pred_dir,
2371     U08 u1_default_ref_id,
2372     S32 i4_result_id)
2373 {
2374     S32 wd_c, ht_c, wd_p, ht_p;
2375     S32 blksize_p, blk_x, blk_y, i4_offset;
2376     layer_mv_t *ps_layer_mvbank;
2377     hme_mv_t *ps_mv;
2378     S08 *pi1_ref_idx;
2379 
2380     /* Width and ht of current and prev layers */
2381     wd_c = ps_curr_layer->i4_wd;
2382     ht_c = ps_curr_layer->i4_ht;
2383     wd_p = ps_coarse_layer->i4_wd;
2384     ht_p = ps_coarse_layer->i4_ht;
2385 
2386     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2387     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2388 
2389     /* ASSERT for valid sizes */
2390     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2391 
2392     /* Safety check to avoid uninitialized access across temporal layers */
2393     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2394     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2395     /* Project the positions to prev layer */
2396     /* TODO: convert these to scale factors at pic level */
2397     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2398     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2399 
2400     /* Pick up the mvs from the location */
2401     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2402     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2403 
2404     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2405     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2406 
2407     if(u1_pred_dir == 1)
2408     {
2409         ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2410         pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411     }
2412 
2413     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2414     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2415     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2416     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2417     {
2418         ps_search_node->i1_ref_idx = u1_default_ref_id;
2419         ps_search_node->s_mv.i2_mvx = 0;
2420         ps_search_node->s_mv.i2_mvy = 0;
2421     }
2422 }
2423 
2424 #define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
2425     {                                                                                              \
2426         prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
2427         prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
2428         prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
2429         prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
2430     }
2431 
2432 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
2433     {                                                                                              \
2434         prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
2435         prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
2436         prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
2437         prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
2438     }
2439 
2440 /**
2441 ********************************************************************************
2442 *  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2443 *                       refine_layer_prms_t *ps_refine_prms)
2444 *
2445 *  @brief  Frame init of refinemnet layers in ME
2446 *
2447 *  @param[in,out]  ps_ctxt: ME Handle
2448 *
2449 *  @param[in]  ps_refine_prms : refinement layer prms
2450 *
2451 *  @return None
2452 ********************************************************************************
2453 */
hme_refine_frm_init(layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,layer_ctxt_t * ps_coarse_layer)2454 void hme_refine_frm_init(
2455     layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2456 {
2457     /* local variables */
2458     BLK_SIZE_T e_result_blk_size = BLK_8x8;
2459     S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2460 
2461     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2462 
2463     if(ps_refine_prms->explicit_ref)
2464     {
2465         i4_num_ref_fpel = i4_num_ref_prev_layer;
2466     }
2467     else
2468     {
2469         i4_num_ref_fpel = 2;
2470     }
2471 
2472     if(ps_refine_prms->i4_enable_4x4_part)
2473     {
2474         e_result_blk_size = BLK_4x4;
2475     }
2476 
2477     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2478 
2479     hme_init_mv_bank(
2480         ps_curr_layer,
2481         e_result_blk_size,
2482         i4_num_ref_fpel,
2483         ps_refine_prms->i4_num_mvbank_results,
2484         ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2485 }
2486 
2487 #if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2488 /**
2489 ********************************************************************************
2490 *  @fn   void hme_init_clusters_16x16
2491 *               (
2492 *                   cluster_16x16_blk_t *ps_cluster_blk_16x16
2493 *               )
2494 *
2495 *  @brief  Intialisations for the structs used in clustering algorithm
2496 *
2497 *  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
2498 *                                        of 16x16 block
2499 *
2500 *  @return None
2501 ********************************************************************************
2502 */
2503 static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t * ps_cluster_blk_16x16,S32 bidir_enabled)2504     hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2505 {
2506     S32 i;
2507 
2508     ps_cluster_blk_16x16->num_clusters = 0;
2509     ps_cluster_blk_16x16->intra_mv_area = 0;
2510     ps_cluster_blk_16x16->best_inter_cost = 0;
2511 
2512     for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2513     {
2514         ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2515             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2516 
2517         ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2518 
2519         ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2520         ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2521     }
2522     for(i = 0; i < MAX_NUM_REF; i++)
2523     {
2524         ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2525     }
2526 }
2527 
2528 /**
2529 ********************************************************************************
2530 *  @fn   void hme_init_clusters_32x32
2531 *               (
2532 *                   cluster_32x32_blk_t *ps_cluster_blk_32x32
2533 *               )
2534 *
2535 *  @brief  Intialisations for the structs used in clustering algorithm
2536 *
2537 *  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
2538 *                                        of 32x32 block
2539 *
2540 *  @return None
2541 ********************************************************************************
2542 */
2543 static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t * ps_cluster_blk_32x32,S32 bidir_enabled)2544     hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2545 {
2546     S32 i;
2547 
2548     ps_cluster_blk_32x32->num_clusters = 0;
2549     ps_cluster_blk_32x32->intra_mv_area = 0;
2550     ps_cluster_blk_32x32->best_alt_ref = -1;
2551     ps_cluster_blk_32x32->best_uni_ref = -1;
2552     ps_cluster_blk_32x32->best_inter_cost = 0;
2553     ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2554 
2555     for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2556     {
2557         ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2558             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2559         ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2560 
2561         ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2562         ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2563     }
2564     for(i = 0; i < MAX_NUM_REF; i++)
2565     {
2566         ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2567     }
2568 }
2569 
2570 /**
2571 ********************************************************************************
2572 *  @fn   void hme_init_clusters_64x64
2573 *               (
2574 *                   cluster_64x64_blk_t *ps_cluster_blk_64x64
2575 *               )
2576 *
2577 *  @brief  Intialisations for the structs used in clustering algorithm
2578 *
2579 *  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
2580 *                                        of 64x64 block
2581 *
2582 *  @return None
2583 ********************************************************************************
2584 */
2585 static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t * ps_cluster_blk_64x64,S32 bidir_enabled)2586     hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2587 {
2588     S32 i;
2589 
2590     ps_cluster_blk_64x64->num_clusters = 0;
2591     ps_cluster_blk_64x64->intra_mv_area = 0;
2592     ps_cluster_blk_64x64->best_alt_ref = -1;
2593     ps_cluster_blk_64x64->best_uni_ref = -1;
2594     ps_cluster_blk_64x64->best_inter_cost = 0;
2595 
2596     for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2597     {
2598         ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2599             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2600         ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2601 
2602         ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2603         ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2604     }
2605     for(i = 0; i < MAX_NUM_REF; i++)
2606     {
2607         ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2608     }
2609 }
2610 
2611 /**
2612 ********************************************************************************
2613 *  @fn   void hme_sort_and_assign_top_ref_ids_areawise
2614 *               (
2615 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2616 *               )
2617 *
2618 *  @brief  Finds best_uni_ref and best_alt_ref
2619 *
2620 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2621 *
2622 *  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
2623 *                             enabled
2624 *
2625 *  @param[in]  block_width: width of the block in pels
2626 *
2627 *  @param[in]  e_cu_pos: position of the block within the CTB
2628 *
2629 *  @return None
2630 ********************************************************************************
2631 */
hme_sort_and_assign_top_ref_ids_areawise(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width,CU_POS_T e_cu_pos)2632 void hme_sort_and_assign_top_ref_ids_areawise(
2633     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2634 {
2635     cluster_32x32_blk_t *ps_32x32 = NULL;
2636     cluster_64x64_blk_t *ps_64x64 = NULL;
2637     cluster_data_t *ps_data;
2638 
2639     S32 j, k;
2640 
2641     S32 ai4_uni_area[MAX_NUM_REF];
2642     S32 ai4_bi_area[MAX_NUM_REF];
2643     S32 ai4_ref_id_found[MAX_NUM_REF];
2644     S32 ai4_ref_id[MAX_NUM_REF];
2645 
2646     S32 best_uni_ref = -1, best_alt_ref = -1;
2647     S32 num_clusters;
2648     S32 num_ref = 0;
2649     S32 num_clusters_evaluated = 0;
2650     S32 is_cur_blk_valid;
2651 
2652     if(32 == block_width)
2653     {
2654         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2655         ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2656         num_clusters = ps_32x32->num_clusters;
2657         ps_data = &ps_32x32->as_cluster_data[0];
2658     }
2659     else
2660     {
2661         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2662         ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2663         num_clusters = ps_64x64->num_clusters;
2664         ps_data = &ps_64x64->as_cluster_data[0];
2665     }
2666 
2667 #if !ENABLE_4CTB_EVALUATION
2668     if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2669     {
2670         return;
2671     }
2672 #endif
2673     if(num_clusters == 0)
2674     {
2675         return;
2676     }
2677     else if(!is_cur_blk_valid)
2678     {
2679         return;
2680     }
2681 
2682     memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2683     memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2684     memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2685     memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2686 
2687     for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2688     {
2689         S32 ref_id;
2690 
2691         if(!ps_data->is_valid_cluster)
2692         {
2693             continue;
2694         }
2695 
2696         ref_id = ps_data->ref_id;
2697 
2698         num_clusters_evaluated++;
2699 
2700         ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2701         ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2702 
2703         if(!ai4_ref_id_found[ref_id])
2704         {
2705             ai4_ref_id[ref_id] = ref_id;
2706             ai4_ref_id_found[ref_id] = 1;
2707             num_ref++;
2708         }
2709     }
2710 
2711     {
2712         S32 ai4_ref_id_temp[MAX_NUM_REF];
2713 
2714         memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2715 
2716         for(k = 1; k < MAX_NUM_REF; k++)
2717         {
2718             if(ai4_uni_area[k] > ai4_uni_area[0])
2719             {
2720                 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2721                 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2722             }
2723         }
2724 
2725         best_uni_ref = ai4_ref_id_temp[0];
2726     }
2727 
2728     if(bidir_enabled)
2729     {
2730         for(k = 1; k < MAX_NUM_REF; k++)
2731         {
2732             if(ai4_bi_area[k] > ai4_bi_area[0])
2733             {
2734                 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2735                 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2736             }
2737         }
2738 
2739         if(!ai4_bi_area[0])
2740         {
2741             best_alt_ref = -1;
2742 
2743             if(32 == block_width)
2744             {
2745                 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2746             }
2747             else
2748             {
2749                 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2750             }
2751 
2752             return;
2753         }
2754 
2755         if(best_uni_ref == ai4_ref_id[0])
2756         {
2757             for(k = 2; k < MAX_NUM_REF; k++)
2758             {
2759                 if(ai4_bi_area[k] > ai4_bi_area[1])
2760                 {
2761                     SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2762                     SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2763                 }
2764             }
2765 
2766             best_alt_ref = ai4_ref_id[1];
2767         }
2768         else
2769         {
2770             best_alt_ref = ai4_ref_id[0];
2771         }
2772     }
2773 
2774     if(32 == block_width)
2775     {
2776         SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2777     }
2778     else
2779     {
2780         SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2781     }
2782 }
2783 
2784 /**
2785 ********************************************************************************
2786 *  @fn   void hme_find_top_ref_ids
2787 *               (
2788 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2789 *               )
2790 *
2791 *  @brief  Finds best_uni_ref and best_alt_ref
2792 *
2793 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2794 *
2795 *  @return None
2796 ********************************************************************************
2797 */
hme_find_top_ref_ids(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width)2798 void hme_find_top_ref_ids(
2799     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2800 {
2801     S32 i;
2802 
2803     if(32 == block_width)
2804     {
2805         for(i = 0; i < 4; i++)
2806         {
2807             hme_sort_and_assign_top_ref_ids_areawise(
2808                 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2809         }
2810     }
2811     else if(64 == block_width)
2812     {
2813         hme_sort_and_assign_top_ref_ids_areawise(
2814             ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2815     }
2816 }
2817 
2818 /**
2819 ********************************************************************************
2820 *  @fn   void hme_boot_out_outlier
2821 *               (
2822 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2823 *               )
2824 *
2825 *  @brief  Removes outlier clusters before CU tree population
2826 *
2827 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2828 *
2829 *  @return None
2830 ********************************************************************************
2831 */
hme_boot_out_outlier(ctb_cluster_info_t * ps_ctb_cluster_info,S32 blk_width)2832 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2833 {
2834     cluster_32x32_blk_t *ps_32x32;
2835 
2836     S32 i;
2837 
2838     cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2839 
2840     S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2841 
2842     if(32 == blk_width)
2843     {
2844         /* 32x32 clusters */
2845         for(i = 0; i < 4; i++)
2846         {
2847             ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2848 
2849             if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2850             {
2851                 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2852             }
2853         }
2854     }
2855     else if(64 == blk_width)
2856     {
2857         /* 64x64 clusters */
2858         if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2859         {
2860             BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2861         }
2862     }
2863 }
2864 
2865 /**
2866 ********************************************************************************
2867 *  @fn   void hme_update_cluster_attributes
2868 *               (
2869 *                   cluster_data_t *ps_cluster_data,
2870 *                   S32 mvx,
2871 *                   S32 mvy,
2872 *                   PART_ID_T e_part_id
2873 *               )
2874 *
2875 *  @brief  Implementation fo the clustering algorithm
2876 *
2877 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2878 *
2879 *  @param[in]  mvx : x co-ordinate of the motion vector
2880 *
2881 *  @param[in]  mvy : y co-ordinate of the motion vector
2882 *
2883 *  @param[in]  ref_idx : ref_id of the motion vector
2884 *
2885 *  @param[in]  e_part_id : partition id of the motion vector
2886 *
2887 *  @return None
2888 ********************************************************************************
2889 */
hme_update_cluster_attributes(cluster_data_t * ps_cluster_data,S32 mvx,S32 mvy,S32 mvdx,S32 mvdy,S32 ref_id,S32 sdi,U08 is_part_of_bi,PART_ID_T e_part_id)2890 static __inline void hme_update_cluster_attributes(
2891     cluster_data_t *ps_cluster_data,
2892     S32 mvx,
2893     S32 mvy,
2894     S32 mvdx,
2895     S32 mvdy,
2896     S32 ref_id,
2897     S32 sdi,
2898     U08 is_part_of_bi,
2899     PART_ID_T e_part_id)
2900 {
2901     LWORD64 i8_mvx_sum_q8;
2902     LWORD64 i8_mvy_sum_q8;
2903 
2904     S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2905     S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2906 
2907     if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2908     {
2909         ps_cluster_data->min_x = mvx;
2910     }
2911     else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2912     {
2913         ps_cluster_data->max_x = mvx;
2914     }
2915 
2916     if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2917     {
2918         ps_cluster_data->min_y = mvy;
2919     }
2920     else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2921     {
2922         ps_cluster_data->max_y = mvy;
2923     }
2924 
2925     {
2926         S32 num_mvs = ps_cluster_data->num_mvs;
2927 
2928         ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2929         ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2930         ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2931 
2932         /***************************/
2933         ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2934         ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2935         /**************************/
2936     }
2937 
2938     /* Updation of centroid */
2939     {
2940         i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2941         i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2942 
2943         ps_cluster_data->num_mvs++;
2944 
2945         ps_cluster_data->s_centroid.i4_pos_x_q8 =
2946             (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2947         ps_cluster_data->s_centroid.i4_pos_y_q8 =
2948             (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2949     }
2950 
2951     ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2952 
2953     if(is_part_of_bi)
2954     {
2955         ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2956     }
2957     else
2958     {
2959         ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2960     }
2961 }
2962 
2963 /**
2964 ********************************************************************************
2965 *  @fn   void hme_try_cluster_merge
2966 *               (
2967 *                   cluster_data_t *ps_cluster_data,
2968 *                   S32 *pi4_num_clusters,
2969 *                   S32 idx_of_updated_cluster
2970 *               )
2971 *
2972 *  @brief  Implementation fo the clustering algorithm
2973 *
2974 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2975 *
2976 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
2977 *
2978 *  @param[in]  idx_of_updated_cluster : index of the cluster most recently
2979 *                                       updated
2980 *
2981 *  @return Nothing
2982 ********************************************************************************
2983 */
hme_try_cluster_merge(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S32 idx_of_updated_cluster)2984 void hme_try_cluster_merge(
2985     cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2986 {
2987     centroid_t *ps_centroid;
2988 
2989     S32 cur_pos_x_q8;
2990     S32 cur_pos_y_q8;
2991     S32 i;
2992     S32 max_dist_from_centroid;
2993     S32 mvd;
2994     S32 mvdx_q8;
2995     S32 mvdx;
2996     S32 mvdy_q8;
2997     S32 mvdy;
2998     S32 num_clusters, num_clusters_evaluated;
2999     S32 other_pos_x_q8;
3000     S32 other_pos_y_q8;
3001 
3002     cluster_data_t *ps_root = ps_cluster_data;
3003     cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3004     centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3005 
3006     /* Merge is superfluous if num_clusters is 1 */
3007     if(*pu1_num_clusters == 1)
3008     {
3009         return;
3010     }
3011 
3012     cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3013     cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3014 
3015     max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3016 
3017     num_clusters = *pu1_num_clusters;
3018     num_clusters_evaluated = 0;
3019 
3020     for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3021     {
3022         if(!ps_cluster_data->is_valid_cluster)
3023         {
3024             continue;
3025         }
3026         if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3027         {
3028             num_clusters_evaluated++;
3029             continue;
3030         }
3031 
3032         ps_centroid = &ps_cluster_data->s_centroid;
3033 
3034         other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3035         other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3036 
3037         mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3038         mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3039         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3040         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3041 
3042         mvd = ABS(mvdx) + ABS(mvdy);
3043 
3044         if(mvd <= (max_dist_from_centroid >> 1))
3045         {
3046             /* 0 => no updates */
3047             /* 1 => min updated */
3048             /* 2 => max updated */
3049             S32 minmax_x_update_id;
3050             S32 minmax_y_update_id;
3051 
3052             LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3053             LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3054             LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3055             LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3056 
3057             (*pu1_num_clusters)--;
3058 
3059             ps_cluster_data->is_valid_cluster = 0;
3060 
3061             memcpy(
3062                 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3063                 ps_cluster_data->as_mv,
3064                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3065 
3066             ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3067             ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3068             ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3069             ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3070             i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3071             i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3072 
3073             ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3074             ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3075 
3076             minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3077                                      ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3078                                      : 1;
3079             minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3080                                      ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3081                                      : 1;
3082 
3083             /* Updation of centroid spread */
3084             switch(minmax_x_update_id + (minmax_y_update_id << 2))
3085             {
3086             case 1:
3087             {
3088                 S32 mvd, mvd_q8;
3089 
3090                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3091 
3092                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3093                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3094 
3095                 if(mvd > (max_dist_from_centroid))
3096                 {
3097                     ps_cluster_data->max_dist_from_centroid = mvd;
3098                 }
3099                 break;
3100             }
3101             case 2:
3102             {
3103                 S32 mvd, mvd_q8;
3104 
3105                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3106 
3107                 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3108                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3109 
3110                 if(mvd > (max_dist_from_centroid))
3111                 {
3112                     ps_cluster_data->max_dist_from_centroid = mvd;
3113                 }
3114                 break;
3115             }
3116             case 4:
3117             {
3118                 S32 mvd, mvd_q8;
3119 
3120                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3121 
3122                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3123                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3124 
3125                 if(mvd > (max_dist_from_centroid))
3126                 {
3127                     ps_cluster_data->max_dist_from_centroid = mvd;
3128                 }
3129                 break;
3130             }
3131             case 5:
3132             {
3133                 S32 mvd;
3134                 S32 mvdx, mvdx_q8;
3135                 S32 mvdy, mvdy_q8;
3136 
3137                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3138                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3139 
3140                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3141                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3142 
3143                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3144 
3145                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3146                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3147 
3148                 if(mvd > max_dist_from_centroid)
3149                 {
3150                     ps_cluster_data->max_dist_from_centroid = mvd;
3151                 }
3152                 break;
3153             }
3154             case 6:
3155             {
3156                 S32 mvd;
3157                 S32 mvdx, mvdx_q8;
3158                 S32 mvdy, mvdy_q8;
3159 
3160                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3161                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3162 
3163                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3164                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3165 
3166                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3167 
3168                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3169                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3170 
3171                 if(mvd > max_dist_from_centroid)
3172                 {
3173                     ps_cluster_data->max_dist_from_centroid = mvd;
3174                 }
3175                 break;
3176             }
3177             case 8:
3178             {
3179                 S32 mvd, mvd_q8;
3180 
3181                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3182 
3183                 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3184                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3185 
3186                 if(mvd > (max_dist_from_centroid))
3187                 {
3188                     ps_cluster_data->max_dist_from_centroid = mvd;
3189                 }
3190                 break;
3191             }
3192             case 9:
3193             {
3194                 S32 mvd;
3195                 S32 mvdx, mvdx_q8;
3196                 S32 mvdy, mvdy_q8;
3197 
3198                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3199                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3200 
3201                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3202                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3203 
3204                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3205 
3206                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3207                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3208 
3209                 if(mvd > max_dist_from_centroid)
3210                 {
3211                     ps_cluster_data->max_dist_from_centroid = mvd;
3212                 }
3213                 break;
3214             }
3215             case 10:
3216             {
3217                 S32 mvd;
3218                 S32 mvdx, mvdx_q8;
3219                 S32 mvdy, mvdy_q8;
3220 
3221                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3222                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3223 
3224                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3225                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3226 
3227                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3228 
3229                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3230                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3231 
3232                 if(mvd > ps_cluster_data->max_dist_from_centroid)
3233                 {
3234                     ps_cluster_data->max_dist_from_centroid = mvd;
3235                 }
3236                 break;
3237             }
3238             default:
3239             {
3240                 break;
3241             }
3242             }
3243 
3244             hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3245 
3246             return;
3247         }
3248 
3249         num_clusters_evaluated++;
3250     }
3251 }
3252 
3253 /**
3254 ********************************************************************************
3255 *  @fn   void hme_find_and_update_clusters
3256 *               (
3257 *                   cluster_data_t *ps_cluster_data,
3258 *                   S32 *pi4_num_clusters,
3259 *                   S32 mvx,
3260 *                   S32 mvy,
3261 *                   S32 ref_idx,
3262 *                   PART_ID_T e_part_id
3263 *               )
3264 *
3265 *  @brief  Implementation fo the clustering algorithm
3266 *
3267 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
3268 *
3269 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
3270 *
3271 *  @param[in]  mvx : x co-ordinate of the motion vector
3272 *
3273 *  @param[in]  mvy : y co-ordinate of the motion vector
3274 *
3275 *  @param[in]  ref_idx : ref_id of the motion vector
3276 *
3277 *  @param[in]  e_part_id : partition id of the motion vector
3278 *
3279 *  @return None
3280 ********************************************************************************
3281 */
hme_find_and_update_clusters(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S16 i2_mv_x,S16 i2_mv_y,U08 i1_ref_idx,S32 i4_sdi,PART_ID_T e_part_id,U08 is_part_of_bi)3282 void hme_find_and_update_clusters(
3283     cluster_data_t *ps_cluster_data,
3284     U08 *pu1_num_clusters,
3285     S16 i2_mv_x,
3286     S16 i2_mv_y,
3287     U08 i1_ref_idx,
3288     S32 i4_sdi,
3289     PART_ID_T e_part_id,
3290     U08 is_part_of_bi)
3291 {
3292     S32 i;
3293     S32 min_mvd_cluster_id = -1;
3294     S32 mvd, mvd_limit, mvdx, mvdy;
3295     S32 min_mvdx, min_mvdy;
3296 
3297     S32 min_mvd = MAX_32BIT_VAL;
3298     S32 num_clusters = *pu1_num_clusters;
3299 
3300     S32 mvx = i2_mv_x;
3301     S32 mvy = i2_mv_y;
3302     S32 ref_idx = i1_ref_idx;
3303     S32 sdi = i4_sdi;
3304     S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3305 
3306     if(num_clusters == 0)
3307     {
3308         cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3309 
3310         ps_data->num_mvs = 1;
3311         ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3312         ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3313         ps_data->ref_id = ref_idx;
3314         ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3315         ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3316         ps_data->as_mv[0].mvx = mvx;
3317         ps_data->as_mv[0].mvy = mvy;
3318 
3319         /***************************/
3320         ps_data->as_mv[0].is_uni = !is_part_of_bi;
3321         ps_data->as_mv[0].sdi = sdi;
3322         if(is_part_of_bi)
3323         {
3324             ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3325         }
3326         else
3327         {
3328             ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3329         }
3330         /**************************/
3331         ps_data->max_x = mvx;
3332         ps_data->min_x = mvx;
3333         ps_data->max_y = mvy;
3334         ps_data->min_y = mvy;
3335 
3336         ps_data->is_valid_cluster = 1;
3337 
3338         *pu1_num_clusters = 1;
3339     }
3340     else
3341     {
3342         S32 num_clusters_evaluated = 0;
3343 
3344         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3345         {
3346             cluster_data_t *ps_data = &ps_cluster_data[i];
3347 
3348             centroid_t *ps_centroid;
3349 
3350             S32 mvx_q8;
3351             S32 mvy_q8;
3352             S32 posx_q8;
3353             S32 posy_q8;
3354             S32 mvdx_q8;
3355             S32 mvdy_q8;
3356 
3357             /* In anticipation of a possible merging of clusters */
3358             if(ps_data->is_valid_cluster == 0)
3359             {
3360                 new_cluster_idx = i;
3361                 continue;
3362             }
3363 
3364             if(ref_idx != ps_data->ref_id)
3365             {
3366                 num_clusters_evaluated++;
3367                 continue;
3368             }
3369 
3370             ps_centroid = &ps_data->s_centroid;
3371             posx_q8 = ps_centroid->i4_pos_x_q8;
3372             posy_q8 = ps_centroid->i4_pos_y_q8;
3373 
3374             mvx_q8 = mvx << 8;
3375             mvy_q8 = mvy << 8;
3376 
3377             mvdx_q8 = posx_q8 - mvx_q8;
3378             mvdy_q8 = posy_q8 - mvy_q8;
3379 
3380             mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3381             mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3382 
3383             mvd = ABS(mvdx) + ABS(mvdy);
3384 
3385             if(mvd < min_mvd)
3386             {
3387                 min_mvd = mvd;
3388                 min_mvdx = mvdx;
3389                 min_mvdy = mvdy;
3390                 min_mvd_cluster_id = i;
3391             }
3392 
3393             num_clusters_evaluated++;
3394         }
3395 
3396         mvd_limit = (min_mvd_cluster_id == -1)
3397                         ? ps_cluster_data[0].max_dist_from_centroid
3398                         : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3399 
3400         /* This condition implies that min_mvd has been updated */
3401         if(min_mvd <= mvd_limit)
3402         {
3403             hme_update_cluster_attributes(
3404                 &ps_cluster_data[min_mvd_cluster_id],
3405                 mvx,
3406                 mvy,
3407                 min_mvdx,
3408                 min_mvdy,
3409                 ref_idx,
3410                 sdi,
3411                 is_part_of_bi,
3412                 e_part_id);
3413 
3414             if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3415             {
3416                 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3417             }
3418         }
3419         else
3420         {
3421             cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3422                                           ? &ps_cluster_data[num_clusters]
3423                                           : &ps_cluster_data[new_cluster_idx];
3424 
3425             ps_data->num_mvs = 1;
3426             ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3427             ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3428             ps_data->ref_id = ref_idx;
3429             ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3430             ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3431             ps_data->as_mv[0].mvx = mvx;
3432             ps_data->as_mv[0].mvy = mvy;
3433 
3434             /***************************/
3435             ps_data->as_mv[0].is_uni = !is_part_of_bi;
3436             ps_data->as_mv[0].sdi = sdi;
3437             if(is_part_of_bi)
3438             {
3439                 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3440             }
3441             else
3442             {
3443                 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3444             }
3445             /**************************/
3446             ps_data->max_x = mvx;
3447             ps_data->min_x = mvx;
3448             ps_data->max_y = mvy;
3449             ps_data->min_y = mvy;
3450 
3451             ps_data->is_valid_cluster = 1;
3452 
3453             num_clusters++;
3454             *pu1_num_clusters = num_clusters;
3455         }
3456     }
3457 }
3458 
3459 /**
3460 ********************************************************************************
3461 *  @fn   void hme_update_32x32_cluster_attributes
3462 *               (
3463 *                   cluster_32x32_blk_t *ps_blk_32x32,
3464 *                   cluster_data_t *ps_cluster_data
3465 *               )
3466 *
3467 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
3468 *          the constituent 16x16 clusters
3469 *
3470 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
3471 *
3472 *  @param[in]  ps_cluster_data : structure containing 16x16 block results
3473 *
3474 *  @return None
3475 ********************************************************************************
3476 */
hme_update_32x32_cluster_attributes(cluster_32x32_blk_t * ps_blk_32x32,cluster_data_t * ps_cluster_data)3477 void hme_update_32x32_cluster_attributes(
3478     cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3479 {
3480     cluster_data_t *ps_cur_cluster_32;
3481 
3482     S32 i;
3483     S32 mvd_limit;
3484 
3485     S32 num_clusters = ps_blk_32x32->num_clusters;
3486 
3487     if(0 == num_clusters)
3488     {
3489         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3490 
3491         ps_blk_32x32->num_clusters++;
3492         ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3493 
3494         ps_cur_cluster_32->is_valid_cluster = 1;
3495 
3496         ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3497         ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3498         ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3499 
3500         memcpy(
3501             ps_cur_cluster_32->as_mv,
3502             ps_cluster_data->as_mv,
3503             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3504 
3505         ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3506 
3507         ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3508 
3509         ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3510         ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3511         ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3512         ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3513 
3514         ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3515     }
3516     else
3517     {
3518         centroid_t *ps_centroid;
3519 
3520         S32 cur_posx_q8, cur_posy_q8;
3521         S32 min_mvd_cluster_id = -1;
3522         S32 mvd;
3523         S32 mvdx;
3524         S32 mvdy;
3525         S32 mvdx_min;
3526         S32 mvdy_min;
3527         S32 mvdx_q8;
3528         S32 mvdy_q8;
3529 
3530         S32 num_clusters_evaluated = 0;
3531 
3532         S32 mvd_min = MAX_32BIT_VAL;
3533 
3534         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3535         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3536 
3537         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3538         {
3539             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3540 
3541             if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3542             {
3543                 num_clusters_evaluated++;
3544                 continue;
3545             }
3546             if(!ps_cluster_data->is_valid_cluster)
3547             {
3548                 continue;
3549             }
3550 
3551             num_clusters_evaluated++;
3552 
3553             ps_centroid = &ps_cur_cluster_32->s_centroid;
3554 
3555             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3556             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3557 
3558             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3559             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3560 
3561             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3562             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3563 
3564             mvd = ABS(mvdx) + ABS(mvdy);
3565 
3566             if(mvd < mvd_min)
3567             {
3568                 mvd_min = mvd;
3569                 mvdx_min = mvdx;
3570                 mvdy_min = mvdy;
3571                 min_mvd_cluster_id = i;
3572             }
3573         }
3574 
3575         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3576 
3577         mvd_limit = (min_mvd_cluster_id == -1)
3578                         ? ps_cur_cluster_32[0].max_dist_from_centroid
3579                         : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3580 
3581         if(mvd_min <= mvd_limit)
3582         {
3583             LWORD64 i8_updated_posx;
3584             LWORD64 i8_updated_posy;
3585             WORD32 minmax_updated_x = 0;
3586             WORD32 minmax_updated_y = 0;
3587 
3588             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3589 
3590             ps_centroid = &ps_cur_cluster_32->s_centroid;
3591 
3592             ps_cur_cluster_32->is_valid_cluster = 1;
3593 
3594             ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3595             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3596             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3597 
3598             memcpy(
3599                 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3600                 ps_cluster_data->as_mv,
3601                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3602 
3603             if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3604             {
3605                 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3606                 minmax_updated_x = 1;
3607             }
3608             else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3609             {
3610                 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3611                 minmax_updated_x = 2;
3612             }
3613 
3614             if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3615             {
3616                 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3617                 minmax_updated_y = 1;
3618             }
3619             else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3620             {
3621                 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3622                 minmax_updated_y = 2;
3623             }
3624 
3625             switch((minmax_updated_y << 2) + minmax_updated_x)
3626             {
3627             case 1:
3628             {
3629                 S32 mvd, mvd_q8;
3630 
3631                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3632                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3633 
3634                 if(mvd > (mvd_limit))
3635                 {
3636                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3637                 }
3638                 break;
3639             }
3640             case 2:
3641             {
3642                 S32 mvd, mvd_q8;
3643 
3644                 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3645                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3646 
3647                 if(mvd > (mvd_limit))
3648                 {
3649                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3650                 }
3651                 break;
3652             }
3653             case 4:
3654             {
3655                 S32 mvd, mvd_q8;
3656 
3657                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3658                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3659 
3660                 if(mvd > (mvd_limit))
3661                 {
3662                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3663                 }
3664                 break;
3665             }
3666             case 5:
3667             {
3668                 S32 mvd;
3669                 S32 mvdx, mvdx_q8;
3670                 S32 mvdy, mvdy_q8;
3671 
3672                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3673                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3674 
3675                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3676                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3677 
3678                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3679 
3680                 if(mvd > mvd_limit)
3681                 {
3682                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3683                 }
3684                 break;
3685             }
3686             case 6:
3687             {
3688                 S32 mvd;
3689                 S32 mvdx, mvdx_q8;
3690                 S32 mvdy, mvdy_q8;
3691 
3692                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3693                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3694 
3695                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3696                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3697 
3698                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3699 
3700                 if(mvd > mvd_limit)
3701                 {
3702                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3703                 }
3704                 break;
3705             }
3706             case 8:
3707             {
3708                 S32 mvd, mvd_q8;
3709 
3710                 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3711                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3712 
3713                 if(mvd > (mvd_limit))
3714                 {
3715                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3716                 }
3717                 break;
3718             }
3719             case 9:
3720             {
3721                 S32 mvd;
3722                 S32 mvdx, mvdx_q8;
3723                 S32 mvdy, mvdy_q8;
3724 
3725                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3726                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3727 
3728                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3729                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3730 
3731                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3732 
3733                 if(mvd > mvd_limit)
3734                 {
3735                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3736                 }
3737                 break;
3738             }
3739             case 10:
3740             {
3741                 S32 mvd;
3742                 S32 mvdx, mvdx_q8;
3743                 S32 mvdy, mvdy_q8;
3744 
3745                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3746                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3747 
3748                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3749                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3750 
3751                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3752 
3753                 if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3754                 {
3755                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3756                 }
3757                 break;
3758             }
3759             default:
3760             {
3761                 break;
3762             }
3763             }
3764 
3765             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3766                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3767             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3768                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3769 
3770             ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3771 
3772             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3773             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3774         }
3775         else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3776         {
3777             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3778 
3779             ps_blk_32x32->num_clusters++;
3780             ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3781 
3782             ps_cur_cluster_32->is_valid_cluster = 1;
3783 
3784             ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3785             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3786             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3787 
3788             memcpy(
3789                 ps_cur_cluster_32->as_mv,
3790                 ps_cluster_data->as_mv,
3791                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3792 
3793             ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3794 
3795             ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3796 
3797             ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3798             ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3799             ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3800             ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3801 
3802             ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3803         }
3804     }
3805 }
3806 
3807 /**
3808 ********************************************************************************
3809 *  @fn   void hme_update_64x64_cluster_attributes
3810 *               (
3811 *                   cluster_64x64_blk_t *ps_blk_32x32,
3812 *                   cluster_data_t *ps_cluster_data
3813 *               )
3814 *
3815 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
3816 *          the constituent 16x16 clusters
3817 *
3818 *  @param[out]  ps_blk_64x64: structure containing 64x64 block results
3819 *
3820 *  @param[in]  ps_cluster_data : structure containing 32x32 block results
3821 *
3822 *  @return None
3823 ********************************************************************************
3824 */
hme_update_64x64_cluster_attributes(cluster_64x64_blk_t * ps_blk_64x64,cluster_data_t * ps_cluster_data)3825 void hme_update_64x64_cluster_attributes(
3826     cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3827 {
3828     cluster_data_t *ps_cur_cluster_64;
3829 
3830     S32 i;
3831     S32 mvd_limit;
3832 
3833     S32 num_clusters = ps_blk_64x64->num_clusters;
3834 
3835     if(0 == num_clusters)
3836     {
3837         ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3838 
3839         ps_blk_64x64->num_clusters++;
3840         ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3841 
3842         ps_cur_cluster_64->is_valid_cluster = 1;
3843 
3844         ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3845         ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3846         ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3847 
3848         memcpy(
3849             ps_cur_cluster_64->as_mv,
3850             ps_cluster_data->as_mv,
3851             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3852 
3853         ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3854 
3855         ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3856 
3857         ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3858         ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3859         ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3860         ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3861 
3862         ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3863     }
3864     else
3865     {
3866         centroid_t *ps_centroid;
3867 
3868         S32 cur_posx_q8, cur_posy_q8;
3869         S32 min_mvd_cluster_id = -1;
3870         S32 mvd;
3871         S32 mvdx;
3872         S32 mvdy;
3873         S32 mvdx_min;
3874         S32 mvdy_min;
3875         S32 mvdx_q8;
3876         S32 mvdy_q8;
3877 
3878         S32 num_clusters_evaluated = 0;
3879 
3880         S32 mvd_min = MAX_32BIT_VAL;
3881 
3882         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3883         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3884 
3885         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3886         {
3887             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3888 
3889             if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3890             {
3891                 num_clusters_evaluated++;
3892                 continue;
3893             }
3894 
3895             if(!ps_cur_cluster_64->is_valid_cluster)
3896             {
3897                 continue;
3898             }
3899 
3900             num_clusters_evaluated++;
3901 
3902             ps_centroid = &ps_cur_cluster_64->s_centroid;
3903 
3904             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3905             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3906 
3907             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3908             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3909 
3910             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3911             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3912 
3913             mvd = ABS(mvdx) + ABS(mvdy);
3914 
3915             if(mvd < mvd_min)
3916             {
3917                 mvd_min = mvd;
3918                 mvdx_min = mvdx;
3919                 mvdy_min = mvdy;
3920                 min_mvd_cluster_id = i;
3921             }
3922         }
3923 
3924         ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3925 
3926         mvd_limit = (min_mvd_cluster_id == -1)
3927                         ? ps_cur_cluster_64[0].max_dist_from_centroid
3928                         : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3929 
3930         if(mvd_min <= mvd_limit)
3931         {
3932             LWORD64 i8_updated_posx;
3933             LWORD64 i8_updated_posy;
3934             WORD32 minmax_updated_x = 0;
3935             WORD32 minmax_updated_y = 0;
3936 
3937             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3938 
3939             ps_centroid = &ps_cur_cluster_64->s_centroid;
3940 
3941             ps_cur_cluster_64->is_valid_cluster = 1;
3942 
3943             ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3944             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3945             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3946 
3947             memcpy(
3948                 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3949                 ps_cluster_data->as_mv,
3950                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3951 
3952             if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3953             {
3954                 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3955                 minmax_updated_x = 1;
3956             }
3957             else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3958             {
3959                 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3960                 minmax_updated_x = 2;
3961             }
3962 
3963             if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3964             {
3965                 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3966                 minmax_updated_y = 1;
3967             }
3968             else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3969             {
3970                 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3971                 minmax_updated_y = 2;
3972             }
3973 
3974             switch((minmax_updated_y << 2) + minmax_updated_x)
3975             {
3976             case 1:
3977             {
3978                 S32 mvd, mvd_q8;
3979 
3980                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3981                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3982 
3983                 if(mvd > (mvd_limit))
3984                 {
3985                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
3986                 }
3987                 break;
3988             }
3989             case 2:
3990             {
3991                 S32 mvd, mvd_q8;
3992 
3993                 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3994                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3995 
3996                 if(mvd > (mvd_limit))
3997                 {
3998                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
3999                 }
4000                 break;
4001             }
4002             case 4:
4003             {
4004                 S32 mvd, mvd_q8;
4005 
4006                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4007                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4008 
4009                 if(mvd > (mvd_limit))
4010                 {
4011                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4012                 }
4013                 break;
4014             }
4015             case 5:
4016             {
4017                 S32 mvd;
4018                 S32 mvdx, mvdx_q8;
4019                 S32 mvdy, mvdy_q8;
4020 
4021                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4022                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4023 
4024                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4025                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4026 
4027                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4028 
4029                 if(mvd > mvd_limit)
4030                 {
4031                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4032                 }
4033                 break;
4034             }
4035             case 6:
4036             {
4037                 S32 mvd;
4038                 S32 mvdx, mvdx_q8;
4039                 S32 mvdy, mvdy_q8;
4040 
4041                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4042                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4043 
4044                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4045                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4046 
4047                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4048 
4049                 if(mvd > mvd_limit)
4050                 {
4051                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4052                 }
4053                 break;
4054             }
4055             case 8:
4056             {
4057                 S32 mvd, mvd_q8;
4058 
4059                 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4060                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4061 
4062                 if(mvd > (mvd_limit))
4063                 {
4064                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4065                 }
4066                 break;
4067             }
4068             case 9:
4069             {
4070                 S32 mvd;
4071                 S32 mvdx, mvdx_q8;
4072                 S32 mvdy, mvdy_q8;
4073 
4074                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4075                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4076 
4077                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4078                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4079 
4080                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4081 
4082                 if(mvd > mvd_limit)
4083                 {
4084                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4085                 }
4086                 break;
4087             }
4088             case 10:
4089             {
4090                 S32 mvd;
4091                 S32 mvdx, mvdx_q8;
4092                 S32 mvdy, mvdy_q8;
4093 
4094                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4095                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4096 
4097                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4098                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4099 
4100                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4101 
4102                 if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4103                 {
4104                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4105                 }
4106                 break;
4107             }
4108             default:
4109             {
4110                 break;
4111             }
4112             }
4113 
4114             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4115                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4116             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4117                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4118 
4119             ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4120 
4121             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4122             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4123         }
4124         else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4125         {
4126             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4127 
4128             ps_blk_64x64->num_clusters++;
4129             ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4130 
4131             ps_cur_cluster_64->is_valid_cluster = 1;
4132 
4133             ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4134             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4135             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4136 
4137             memcpy(
4138                 &ps_cur_cluster_64->as_mv[0],
4139                 ps_cluster_data->as_mv,
4140                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4141 
4142             ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4143 
4144             ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4145 
4146             ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4147             ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4148             ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4149             ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4150 
4151             ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4152         }
4153     }
4154 }
4155 
4156 /**
4157 ********************************************************************************
4158 *  @fn   void hme_update_32x32_clusters
4159 *               (
4160 *                   cluster_32x32_blk_t *ps_blk_32x32,
4161 *                   cluster_16x16_blk_t *ps_blk_16x16
4162 *               )
4163 *
4164 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
4165 *          the constituent 16x16 clusters
4166 *
4167 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
4168 *
4169 *  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
4170 *
4171 *  @return None
4172 ********************************************************************************
4173 */
4174 static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t * ps_blk_32x32,cluster_16x16_blk_t * ps_blk_16x16)4175     hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4176 {
4177     cluster_16x16_blk_t *ps_blk_16x16_cur;
4178     cluster_data_t *ps_cur_cluster;
4179 
4180     S32 i, j;
4181     S32 num_clusters_cur_16x16_blk;
4182 
4183     for(i = 0; i < 4; i++)
4184     {
4185         S32 num_clusters_evaluated = 0;
4186 
4187         ps_blk_16x16_cur = &ps_blk_16x16[i];
4188 
4189         num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4190 
4191         ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4192 
4193         ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4194 
4195         for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4196         {
4197             ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4198 
4199             if(!ps_cur_cluster->is_valid_cluster)
4200             {
4201                 continue;
4202             }
4203 
4204             hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4205 
4206             num_clusters_evaluated++;
4207         }
4208     }
4209 }
4210 
4211 /**
4212 ********************************************************************************
4213 *  @fn   void hme_update_64x64_clusters
4214 *               (
4215 *                   cluster_64x64_blk_t *ps_blk_64x64,
4216 *                   cluster_32x32_blk_t *ps_blk_32x32
4217 *               )
4218 *
4219 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
4220 *          the constituent 16x16 clusters
4221 *
4222 *  @param[out]  ps_blk_64x64: structure containing 32x32 block results
4223 *
4224 *  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
4225 *
4226 *  @return None
4227 ********************************************************************************
4228 */
4229 static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t * ps_blk_64x64,cluster_32x32_blk_t * ps_blk_32x32)4230     hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4231 {
4232     cluster_32x32_blk_t *ps_blk_32x32_cur;
4233     cluster_data_t *ps_cur_cluster;
4234 
4235     S32 i, j;
4236     S32 num_clusters_cur_32x32_blk;
4237 
4238     for(i = 0; i < 4; i++)
4239     {
4240         S32 num_clusters_evaluated = 0;
4241 
4242         ps_blk_32x32_cur = &ps_blk_32x32[i];
4243 
4244         num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4245 
4246         ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4247         ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4248 
4249         for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4250         {
4251             ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4252 
4253             if(!ps_cur_cluster->is_valid_cluster)
4254             {
4255                 continue;
4256             }
4257 
4258             hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4259 
4260             num_clusters_evaluated++;
4261         }
4262     }
4263 }
4264 
4265 /**
4266 ********************************************************************************
4267 *  @fn   void hme_try_merge_clusters_blksize_gt_16
4268 *               (
4269 *                   cluster_data_t *ps_cluster_data,
4270 *                   S32 num_clusters
4271 *               )
4272 *
4273 *  @brief  Merging clusters from blocks of size 32x32 and greater
4274 *
4275 *  @param[in/out]  ps_cluster_data: structure containing cluster data
4276 *
4277 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
4278 *
4279 *  @return Success or failure
4280 ********************************************************************************
4281 */
hme_try_merge_clusters_blksize_gt_16(cluster_data_t * ps_cluster_data,S32 num_clusters)4282 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4283 {
4284     centroid_t *ps_cur_centroid;
4285     cluster_data_t *ps_cur_cluster;
4286 
4287     S32 i, mvd;
4288     S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4289 
4290     centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4291 
4292     S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4293     S32 ref_id = ps_cluster_data->ref_id;
4294 
4295     S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4296     S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4297     S32 num_clusters_evaluated = 1;
4298     S32 ret_value = 0;
4299 
4300     if(1 >= num_clusters)
4301     {
4302         return ret_value;
4303     }
4304 
4305     for(i = 1; num_clusters_evaluated < num_clusters; i++)
4306     {
4307         S32 cur_posx_q8;
4308         S32 cur_posy_q8;
4309 
4310         ps_cur_cluster = &ps_cluster_data[i];
4311 
4312         if((ref_id != ps_cur_cluster->ref_id))
4313         {
4314             num_clusters_evaluated++;
4315             continue;
4316         }
4317 
4318         if((!ps_cur_cluster->is_valid_cluster))
4319         {
4320             continue;
4321         }
4322 
4323         num_clusters_evaluated++;
4324 
4325         ps_cur_centroid = &ps_cur_cluster->s_centroid;
4326 
4327         cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4328         cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4329 
4330         mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4331         mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4332 
4333         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4334         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4335 
4336         mvd = ABS(mvdx) + ABS(mvdy);
4337 
4338         if(mvd <= (mvd_limit >> 1))
4339         {
4340             LWORD64 i8_updated_posx;
4341             LWORD64 i8_updated_posy;
4342             WORD32 minmax_updated_x = 0;
4343             WORD32 minmax_updated_y = 0;
4344 
4345             ps_cur_cluster->is_valid_cluster = 0;
4346 
4347             ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4348             ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4349             ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4350 
4351             memcpy(
4352                 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4353                 ps_cur_cluster->as_mv,
4354                 sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4355 
4356             if(mvdx > 0)
4357             {
4358                 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4359                 minmax_updated_x = 1;
4360             }
4361             else
4362             {
4363                 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4364                 minmax_updated_x = 2;
4365             }
4366 
4367             if(mvdy > 0)
4368             {
4369                 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4370                 minmax_updated_y = 1;
4371             }
4372             else
4373             {
4374                 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4375                 minmax_updated_y = 2;
4376             }
4377 
4378             switch((minmax_updated_y << 2) + minmax_updated_x)
4379             {
4380             case 1:
4381             {
4382                 S32 mvd, mvd_q8;
4383 
4384                 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4385                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4386 
4387                 if(mvd > (mvd_limit))
4388                 {
4389                     ps_cluster_data->max_dist_from_centroid = mvd;
4390                 }
4391                 break;
4392             }
4393             case 2:
4394             {
4395                 S32 mvd, mvd_q8;
4396 
4397                 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4398                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4399 
4400                 if(mvd > (mvd_limit))
4401                 {
4402                     ps_cluster_data->max_dist_from_centroid = mvd;
4403                 }
4404                 break;
4405             }
4406             case 4:
4407             {
4408                 S32 mvd, mvd_q8;
4409 
4410                 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4411                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4412 
4413                 if(mvd > (mvd_limit))
4414                 {
4415                     ps_cluster_data->max_dist_from_centroid = mvd;
4416                 }
4417                 break;
4418             }
4419             case 5:
4420             {
4421                 S32 mvd;
4422                 S32 mvdx, mvdx_q8;
4423                 S32 mvdy, mvdy_q8;
4424 
4425                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4426                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4427 
4428                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4429                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4430 
4431                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4432 
4433                 if(mvd > mvd_limit)
4434                 {
4435                     ps_cluster_data->max_dist_from_centroid = mvd;
4436                 }
4437                 break;
4438             }
4439             case 6:
4440             {
4441                 S32 mvd;
4442                 S32 mvdx, mvdx_q8;
4443                 S32 mvdy, mvdy_q8;
4444 
4445                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4446                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4447 
4448                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4449                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4450 
4451                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4452 
4453                 if(mvd > mvd_limit)
4454                 {
4455                     ps_cluster_data->max_dist_from_centroid = mvd;
4456                 }
4457                 break;
4458             }
4459             case 8:
4460             {
4461                 S32 mvd, mvd_q8;
4462 
4463                 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4464                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4465 
4466                 if(mvd > (mvd_limit))
4467                 {
4468                     ps_cluster_data->max_dist_from_centroid = mvd;
4469                 }
4470                 break;
4471             }
4472             case 9:
4473             {
4474                 S32 mvd;
4475                 S32 mvdx, mvdx_q8;
4476                 S32 mvdy, mvdy_q8;
4477 
4478                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4479                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4480 
4481                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4482                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4483 
4484                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4485 
4486                 if(mvd > mvd_limit)
4487                 {
4488                     ps_cluster_data->max_dist_from_centroid = mvd;
4489                 }
4490                 break;
4491             }
4492             case 10:
4493             {
4494                 S32 mvd;
4495                 S32 mvdx, mvdx_q8;
4496                 S32 mvdy, mvdy_q8;
4497 
4498                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4499                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4500 
4501                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4502                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4503 
4504                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4505 
4506                 if(mvd > ps_cluster_data->max_dist_from_centroid)
4507                 {
4508                     ps_cluster_data->max_dist_from_centroid = mvd;
4509                 }
4510                 break;
4511             }
4512             default:
4513             {
4514                 break;
4515             }
4516             }
4517 
4518             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4519                               ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4520             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4521                               ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4522 
4523             ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4524 
4525             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4526             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4527 
4528             if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4529             {
4530                 num_clusters--;
4531                 num_clusters_evaluated = 1;
4532                 i = 0;
4533                 ret_value++;
4534             }
4535             else
4536             {
4537                 ret_value++;
4538 
4539                 return ret_value;
4540             }
4541         }
4542     }
4543 
4544     if(ret_value)
4545     {
4546         for(i = 1; i < (num_clusters + ret_value); i++)
4547         {
4548             if(ps_cluster_data[i].is_valid_cluster)
4549             {
4550                 break;
4551             }
4552         }
4553         if(i == (num_clusters + ret_value))
4554         {
4555             return ret_value;
4556         }
4557     }
4558     else
4559     {
4560         i = 1;
4561     }
4562 
4563     return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4564            ret_value;
4565 }
4566 
4567 /**
4568 ********************************************************************************
4569 *  @fn   S32 hme_determine_validity_32x32
4570 *               (
4571 *                   ctb_cluster_info_t *ps_ctb_cluster_info
4572 *               )
4573 *
4574 *  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
4575 *           while recursing through the CU tree or not
4576 *
4577 *  @param[in]  ps_cluster_data: structure containing cluster data
4578 *
4579 *  @return Success or failure
4580 ********************************************************************************
4581 */
hme_determine_validity_32x32(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4582 __inline S32 hme_determine_validity_32x32(
4583     ctb_cluster_info_t *ps_ctb_cluster_info,
4584     S32 *pi4_children_nodes_required,
4585     S32 blk_validity_wrt_pic_bndry,
4586     S32 parent_blk_validity_wrt_pic_bndry)
4587 {
4588     cluster_data_t *ps_data;
4589 
4590     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4591     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4592 
4593     S32 num_clusters = ps_32x32_blk->num_clusters;
4594     S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4595 
4596     if(!blk_validity_wrt_pic_bndry)
4597     {
4598         *pi4_children_nodes_required = 1;
4599         return 0;
4600     }
4601 
4602     if(!parent_blk_validity_wrt_pic_bndry)
4603     {
4604         *pi4_children_nodes_required = 1;
4605         return 1;
4606     }
4607 
4608     if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4609     {
4610         *pi4_children_nodes_required = 1;
4611         return 0;
4612     }
4613 
4614     if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4615     {
4616         *pi4_children_nodes_required = 1;
4617 
4618         return 1;
4619     }
4620     else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4621     {
4622         *pi4_children_nodes_required = 0;
4623 
4624         return 1;
4625     }
4626     else
4627     {
4628         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4629         {
4630             *pi4_children_nodes_required = 0;
4631             return 1;
4632         }
4633         else
4634         {
4635             S32 i;
4636 
4637             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4638             S32 min_area = MAX_32BIT_VAL;
4639             S32 num_clusters_evaluated = 0;
4640 
4641             for(i = 0; num_clusters_evaluated < num_clusters; i++)
4642             {
4643                 ps_data = &ps_32x32_blk->as_cluster_data[i];
4644 
4645                 if(!ps_data->is_valid_cluster)
4646                 {
4647                     continue;
4648                 }
4649 
4650                 num_clusters_evaluated++;
4651 
4652                 if(ps_data->area_in_pixels < min_area)
4653                 {
4654                     min_area = ps_data->area_in_pixels;
4655                 }
4656             }
4657 
4658             if((min_area << 4) < area_of_parent)
4659             {
4660                 *pi4_children_nodes_required = 1;
4661                 return 0;
4662             }
4663             else
4664             {
4665                 *pi4_children_nodes_required = 0;
4666                 return 1;
4667             }
4668         }
4669     }
4670 }
4671 
4672 /**
4673 ********************************************************************************
4674 *  @fn   S32 hme_determine_validity_16x16
4675 *               (
4676 *                   ctb_cluster_info_t *ps_ctb_cluster_info
4677 *               )
4678 *
4679 *  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
4680 *           while recursing through the CU tree or not
4681 *
4682 *  @param[in]  ps_cluster_data: structure containing cluster data
4683 *
4684 *  @return Success or failure
4685 ********************************************************************************
4686 */
hme_determine_validity_16x16(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4687 __inline S32 hme_determine_validity_16x16(
4688     ctb_cluster_info_t *ps_ctb_cluster_info,
4689     S32 *pi4_children_nodes_required,
4690     S32 blk_validity_wrt_pic_bndry,
4691     S32 parent_blk_validity_wrt_pic_bndry)
4692 {
4693     cluster_data_t *ps_data;
4694 
4695     cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4696     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4697     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4698 
4699     S32 num_clusters = ps_16x16_blk->num_clusters;
4700     S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4701     S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4702 
4703     if(!blk_validity_wrt_pic_bndry)
4704     {
4705         *pi4_children_nodes_required = 1;
4706         return 0;
4707     }
4708 
4709     if(!parent_blk_validity_wrt_pic_bndry)
4710     {
4711         *pi4_children_nodes_required = 1;
4712         return 1;
4713     }
4714 
4715     if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4716        (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4717     {
4718         *pi4_children_nodes_required = 1;
4719         return 1;
4720     }
4721 
4722     /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4723     /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4724     if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4725     {
4726         if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4727         {
4728             *pi4_children_nodes_required = 0;
4729 
4730             return 1;
4731         }
4732         else
4733         {
4734             *pi4_children_nodes_required = 1;
4735 
4736             return 0;
4737         }
4738     }
4739     /* Implies nc_64 >= 3 */
4740     else
4741     {
4742         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4743         {
4744             *pi4_children_nodes_required = 0;
4745             return 1;
4746         }
4747         else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4748         {
4749             *pi4_children_nodes_required = 1;
4750             return 0;
4751         }
4752         else
4753         {
4754             S32 i;
4755 
4756             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4757             S32 min_area = MAX_32BIT_VAL;
4758             S32 num_clusters_evaluated = 0;
4759 
4760             for(i = 0; num_clusters_evaluated < num_clusters; i++)
4761             {
4762                 ps_data = &ps_16x16_blk->as_cluster_data[i];
4763 
4764                 if(!ps_data->is_valid_cluster)
4765                 {
4766                     continue;
4767                 }
4768 
4769                 num_clusters_evaluated++;
4770 
4771                 if(ps_data->area_in_pixels < min_area)
4772                 {
4773                     min_area = ps_data->area_in_pixels;
4774                 }
4775             }
4776 
4777             if((min_area << 4) < area_of_parent)
4778             {
4779                 *pi4_children_nodes_required = 1;
4780                 return 0;
4781             }
4782             else
4783             {
4784                 *pi4_children_nodes_required = 0;
4785                 return 1;
4786             }
4787         }
4788     }
4789 }
4790 
4791 /**
4792 ********************************************************************************
4793 *  @fn   void hme_build_cu_tree
4794 *               (
4795 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
4796 *                   cur_ctb_cu_tree_t *ps_cu_tree,
4797 *                   S32 tree_depth,
4798 *                   CU_POS_T e_grand_parent_blk_pos,
4799 *                   CU_POS_T e_parent_blk_pos,
4800 *                   CU_POS_T e_cur_blk_pos
4801 *               )
4802 *
4803 *  @brief  Recursive function for CU tree initialisation
4804 *
4805 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4806 *                                   corresponding to all block sizes from 64x64
4807 *                                   to 16x16
4808 *
4809 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4810 *                                applicable
4811 *
4812 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
4813 *
4814 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4815 *
4816 *  @param[in]  tree_depth : specifies depth of the CU tree
4817 *
4818 *  @return Nothing
4819 ********************************************************************************
4820 */
hme_build_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4821 void hme_build_cu_tree(
4822     ctb_cluster_info_t *ps_ctb_cluster_info,
4823     cur_ctb_cu_tree_t *ps_cu_tree,
4824     S32 tree_depth,
4825     CU_POS_T e_grandparent_blk_pos,
4826     CU_POS_T e_parent_blk_pos,
4827     CU_POS_T e_cur_blk_pos)
4828 {
4829     ihevce_cu_tree_init(
4830         ps_cu_tree,
4831         ps_ctb_cluster_info->ps_cu_tree_root,
4832         &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4833         tree_depth,
4834         e_grandparent_blk_pos,
4835         e_parent_blk_pos,
4836         e_cur_blk_pos);
4837 }
4838 
4839 /**
4840 ********************************************************************************
4841 *  @fn   S32 hme_sdi_based_cluster_spread_eligibility
4842 *               (
4843 *                   cluster_32x32_blk_t *ps_blk_32x32
4844 *               )
4845 *
4846 *  @brief  Determines whether the spread of high SDI MV's around each cluster
4847 *          center is below a pre-determined threshold
4848 *
4849 *  @param[in]  ps_blk_32x32: structure containing pointers to clusters
4850 *                                   corresponding to all block sizes from 64x64
4851 *                                   to 16x16
4852 *
4853 *  @return 1 if the spread is constrained, else 0
4854 ********************************************************************************
4855 */
4856 __inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t * ps_blk_32x32,S32 sdi_threshold)4857     hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4858 {
4859     S32 cumulative_mv_distance;
4860     S32 i, j;
4861     S32 num_high_sdi_mvs;
4862 
4863     S32 num_clusters = ps_blk_32x32->num_clusters;
4864 
4865     for(i = 0; i < num_clusters; i++)
4866     {
4867         cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4868 
4869         num_high_sdi_mvs = 0;
4870         cumulative_mv_distance = 0;
4871 
4872         for(j = 0; j < ps_data->num_mvs; j++)
4873         {
4874             mv_data_t *ps_mv = &ps_data->as_mv[j];
4875 
4876             if(ps_mv->sdi >= sdi_threshold)
4877             {
4878                 num_high_sdi_mvs++;
4879 
4880                 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4881             }
4882         }
4883 
4884         if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4885         {
4886             return 0;
4887         }
4888     }
4889 
4890     return 1;
4891 }
4892 
4893 /**
4894 ********************************************************************************
4895 *  @fn   S32 hme_populate_cu_tree
4896 *               (
4897 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
4898 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4899 *                   cur_ctb_cu_tree_t *ps_cu_tree,
4900 *                   S32 tree_depth,
4901 *                   CU_POS_T e_parent_blk_pos,
4902 *                   CU_POS_T e_cur_blk_pos
4903 *               )
4904 *
4905 *  @brief  Recursive function for CU tree population based on output of
4906 *          clustering algorithm
4907 *
4908 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4909 *                                   corresponding to all block sizes from 64x64
4910 *                                   to 16x16
4911 *
4912 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4913 applicable
4914 *
4915 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
4916 *
4917 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
4918 *
4919 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4920 *
4921 *  @param[in]  tree_depth : specifies depth of the CU tree
4922 *
4923 *  @param[in]  ipe_decision_precedence : specifies whether precedence should
4924 *               be given to decisions made either by IPE(1) or clustering algos.
4925 *
4926 *  @return 1 if re-evaluation of parent node's validity is not required,
4927 else 0
4928 ********************************************************************************
4929 */
hme_populate_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,ME_QUALITY_PRESETS_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4930 void hme_populate_cu_tree(
4931     ctb_cluster_info_t *ps_ctb_cluster_info,
4932     cur_ctb_cu_tree_t *ps_cu_tree,
4933     S32 tree_depth,
4934     ME_QUALITY_PRESETS_T e_quality_preset,
4935     CU_POS_T e_grandparent_blk_pos,
4936     CU_POS_T e_parent_blk_pos,
4937     CU_POS_T e_cur_blk_pos)
4938 {
4939     S32 area_of_cur_blk;
4940     S32 area_limit_for_me_decision_precedence;
4941     S32 children_nodes_required;
4942     S32 intra_mv_area;
4943     S32 intra_eval_enable;
4944     S32 inter_eval_enable;
4945     S32 ipe_decision_precedence;
4946     S32 node_validity;
4947     S32 num_clusters;
4948 
4949     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4950 
4951     if(NULL == ps_cu_tree)
4952     {
4953         return;
4954     }
4955 
4956     switch(tree_depth)
4957     {
4958     case 0:
4959     {
4960         /* 64x64 block */
4961         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4962 
4963         cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4964 
4965         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4966         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4967         children_nodes_required = 0;
4968         intra_mv_area = ps_blk_64x64->intra_mv_area;
4969 
4970         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4971 
4972         intra_eval_enable = ipe_decision_precedence;
4973         inter_eval_enable = !!ps_blk_64x64->num_clusters;
4974 
4975 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4976         if(e_quality_preset >= ME_HIGH_QUALITY)
4977         {
4978             inter_eval_enable = 1;
4979             node_validity = (blk_32x32_mask == 0xf);
4980 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4981             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4982 #endif
4983             break;
4984         }
4985 #endif
4986 
4987 #if ENABLE_4CTB_EVALUATION
4988         node_validity = (blk_32x32_mask == 0xf);
4989 
4990         break;
4991 #else
4992         {
4993             S32 i;
4994 
4995             num_clusters = ps_blk_64x64->num_clusters;
4996 
4997             node_validity = (ipe_decision_precedence)
4998                                 ? (!ps_cur_ipe_ctb->u1_split_flag)
4999                                 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5000 
5001             for(i = 0; i < MAX_NUM_REF; i++)
5002             {
5003                 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5004                                                   MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5005             }
5006 
5007             node_validity = node_validity && (blk_32x32_mask == 0xf);
5008         }
5009         break;
5010 #endif
5011     }
5012     case 1:
5013     {
5014         /* 32x32 block */
5015         S32 is_percent_intra_area_gt_threshold;
5016 
5017         cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5018 
5019         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5020 
5021 #if !ENABLE_4CTB_EVALUATION
5022         S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5023         S32 best_intra_cost =
5024             ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5025               ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5026                   4) < 0)
5027                 ? MAX_32BIT_VAL
5028                 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5029                    ps_ctb_cluster_info->i4_frame_qstep *
5030                        ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5031         S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5032         S32 cost_differential = (best_inter_cost - best_cost);
5033 #endif
5034 
5035         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5036         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5037         intra_mv_area = ps_blk_32x32->intra_mv_area;
5038         is_percent_intra_area_gt_threshold =
5039             (intra_mv_area > area_limit_for_me_decision_precedence);
5040         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5041 
5042         intra_eval_enable = ipe_decision_precedence;
5043         inter_eval_enable = !!ps_blk_32x32->num_clusters;
5044         children_nodes_required = 1;
5045 
5046 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5047         if(e_quality_preset >= ME_HIGH_QUALITY)
5048         {
5049             inter_eval_enable = 1;
5050             node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5051 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5052             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5053 #endif
5054             break;
5055         }
5056 #endif
5057 
5058 #if ENABLE_4CTB_EVALUATION
5059         node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5060 
5061         break;
5062 #else
5063         {
5064             S32 i;
5065             num_clusters = ps_blk_32x32->num_clusters;
5066 
5067             if(ipe_decision_precedence)
5068             {
5069                 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5070                 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5071             }
5072             else
5073             {
5074                 node_validity =
5075                     ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5076                     (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5077                     (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5078 
5079                 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5080                 {
5081                     node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5082                                                       MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5083                 }
5084 
5085                 if(node_validity)
5086                 {
5087                     node_validity = node_validity &&
5088                                     hme_sdi_based_cluster_spread_eligibility(
5089                                         ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5090                 }
5091             }
5092         }
5093 
5094         break;
5095 #endif
5096     }
5097     case 2:
5098     {
5099         cluster_16x16_blk_t *ps_blk_16x16 =
5100             &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5101 
5102         S32 blk_8x8_mask =
5103             ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5104 
5105         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5106         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5107         children_nodes_required = 1;
5108         intra_mv_area = ps_blk_16x16->intra_mv_area;
5109         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5110         num_clusters = ps_blk_16x16->num_clusters;
5111 
5112         intra_eval_enable = ipe_decision_precedence;
5113         inter_eval_enable = 1;
5114 
5115 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5116         if(e_quality_preset >= ME_HIGH_QUALITY)
5117         {
5118             node_validity =
5119                 !ps_ctb_cluster_info
5120                      ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5121             children_nodes_required = !node_validity;
5122             break;
5123         }
5124 #endif
5125 
5126 #if ENABLE_4CTB_EVALUATION
5127         node_validity = (blk_8x8_mask == 0xf);
5128 
5129 #if ENABLE_CU_TREE_CULLING
5130         {
5131             cur_ctb_cu_tree_t *ps_32x32_root;
5132 
5133             switch(e_parent_blk_pos)
5134             {
5135             case POS_TL:
5136             {
5137                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5138 
5139                 break;
5140             }
5141             case POS_TR:
5142             {
5143                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5144 
5145                 break;
5146             }
5147             case POS_BL:
5148             {
5149                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5150 
5151                 break;
5152             }
5153             case POS_BR:
5154             {
5155                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5156 
5157                 break;
5158             }
5159             }
5160 
5161             if(ps_32x32_root->is_node_valid)
5162             {
5163                 node_validity =
5164                     node_validity &&
5165                     !ps_ctb_cluster_info
5166                          ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5167                 children_nodes_required = !node_validity;
5168             }
5169         }
5170 #endif
5171 
5172         break;
5173 #else
5174 
5175         if(ipe_decision_precedence)
5176         {
5177             S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5178                                      .as_intra16_analyse[e_cur_blk_pos]
5179                                      .b1_merge_flag);
5180             S32 valid_flag = (blk_8x8_mask == 0xf);
5181 
5182             node_validity = merge_flag_16 && valid_flag;
5183         }
5184         else
5185         {
5186             node_validity = (blk_8x8_mask == 0xf);
5187         }
5188 
5189         break;
5190 #endif
5191     }
5192     case 3:
5193     {
5194         S32 blk_8x8_mask =
5195             ps_ctb_cluster_info
5196                 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5197         S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5198                                  .as_intra16_analyse[e_parent_blk_pos]
5199                                  .b1_merge_flag);
5200         S32 merge_flag_32 =
5201             (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5202 
5203         intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5204         inter_eval_enable = 1;
5205         children_nodes_required = 0;
5206 
5207 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5208         if(e_quality_preset >= ME_HIGH_QUALITY)
5209         {
5210             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5211             break;
5212         }
5213 #endif
5214 
5215 #if ENABLE_4CTB_EVALUATION
5216         node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217 
5218         break;
5219 #else
5220         {
5221             cur_ctb_cu_tree_t *ps_32x32_root;
5222             cur_ctb_cu_tree_t *ps_16x16_root;
5223             cluster_32x32_blk_t *ps_32x32_blk;
5224 
5225             switch(e_grandparent_blk_pos)
5226             {
5227             case POS_TL:
5228             {
5229                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5230 
5231                 break;
5232             }
5233             case POS_TR:
5234             {
5235                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5236 
5237                 break;
5238             }
5239             case POS_BL:
5240             {
5241                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5242 
5243                 break;
5244             }
5245             case POS_BR:
5246             {
5247                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5248 
5249                 break;
5250             }
5251             }
5252 
5253             switch(e_parent_blk_pos)
5254             {
5255             case POS_TL:
5256             {
5257                 ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5258 
5259                 break;
5260             }
5261             case POS_TR:
5262             {
5263                 ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5264 
5265                 break;
5266             }
5267             case POS_BL:
5268             {
5269                 ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5270 
5271                 break;
5272             }
5273             case POS_BR:
5274             {
5275                 ps_16x16_root = ps_32x32_root->ps_child_node_br;
5276 
5277                 break;
5278             }
5279             }
5280 
5281             ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5282 
5283             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5284                             ((!ps_32x32_root->is_node_valid) ||
5285                              (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5286                              (!ps_16x16_root->is_node_valid));
5287 
5288             break;
5289         }
5290 #endif
5291     }
5292     }
5293 
5294     /* Fill the current cu_tree node */
5295     ps_cu_tree->is_node_valid = node_validity;
5296     ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5297     ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5298 
5299     if(children_nodes_required)
5300     {
5301         tree_depth++;
5302 
5303         hme_populate_cu_tree(
5304             ps_ctb_cluster_info,
5305             ps_cu_tree->ps_child_node_tl,
5306             tree_depth,
5307             e_quality_preset,
5308             e_parent_blk_pos,
5309             e_cur_blk_pos,
5310             POS_TL);
5311 
5312         hme_populate_cu_tree(
5313             ps_ctb_cluster_info,
5314             ps_cu_tree->ps_child_node_tr,
5315             tree_depth,
5316             e_quality_preset,
5317             e_parent_blk_pos,
5318             e_cur_blk_pos,
5319             POS_TR);
5320 
5321         hme_populate_cu_tree(
5322             ps_ctb_cluster_info,
5323             ps_cu_tree->ps_child_node_bl,
5324             tree_depth,
5325             e_quality_preset,
5326             e_parent_blk_pos,
5327             e_cur_blk_pos,
5328             POS_BL);
5329 
5330         hme_populate_cu_tree(
5331             ps_ctb_cluster_info,
5332             ps_cu_tree->ps_child_node_br,
5333             tree_depth,
5334             e_quality_preset,
5335             e_parent_blk_pos,
5336             e_cur_blk_pos,
5337             POS_BR);
5338     }
5339 }
5340 
5341 /**
5342 ********************************************************************************
5343 *  @fn   void hme_analyse_mv_clustering
5344 *               (
5345 *                   search_results_t *ps_search_results,
5346 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5347 *                   cur_ctb_cu_tree_t *ps_cu_tree
5348 *               )
5349 *
5350 *  @brief  Implementation for the clustering algorithm
5351 *
5352 *  @param[in]  ps_search_results: structure containing 16x16 block results
5353 *
5354 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
5355 *
5356 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
5357 *
5358 *  @return None
5359 ********************************************************************************
5360 */
hme_analyse_mv_clustering(search_results_t * ps_search_results,inter_cu_results_t * ps_16x16_cu_results,inter_cu_results_t * ps_8x8_cu_results,ctb_cluster_info_t * ps_ctb_cluster_info,S08 * pi1_future_list,S08 * pi1_past_list,S32 bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)5361 void hme_analyse_mv_clustering(
5362     search_results_t *ps_search_results,
5363     inter_cu_results_t *ps_16x16_cu_results,
5364     inter_cu_results_t *ps_8x8_cu_results,
5365     ctb_cluster_info_t *ps_ctb_cluster_info,
5366     S08 *pi1_future_list,
5367     S08 *pi1_past_list,
5368     S32 bidir_enabled,
5369     ME_QUALITY_PRESETS_T e_quality_preset)
5370 {
5371     cluster_16x16_blk_t *ps_blk_16x16;
5372     cluster_32x32_blk_t *ps_blk_32x32;
5373     cluster_64x64_blk_t *ps_blk_64x64;
5374 
5375     part_type_results_t *ps_best_result;
5376     pu_result_t *aps_part_result[MAX_NUM_PARTS];
5377     pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5378 
5379     PART_ID_T e_part_id;
5380     PART_TYPE_T e_part_type;
5381 
5382     S32 enable_64x64_merge;
5383     S32 i, j, k;
5384     S32 mvx, mvy;
5385     S32 num_parts;
5386     S32 ref_idx;
5387     S32 ai4_pred_mode[MAX_NUM_PARTS];
5388 
5389     S32 num_32x32_merges = 0;
5390 
5391     /*****************************************/
5392     /*****************************************/
5393     /********* Enter ye who is HQ ************/
5394     /*****************************************/
5395     /*****************************************/
5396 
5397     ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5398 
5399     /* Initialise data in each of the clusters */
5400     for(i = 0; i < 16; i++)
5401     {
5402         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5403 
5404 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5405         if(e_quality_preset < ME_HIGH_QUALITY)
5406         {
5407             hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5408         }
5409         else
5410         {
5411             ps_blk_16x16->best_inter_cost = 0;
5412             ps_blk_16x16->intra_mv_area = 0;
5413         }
5414 #else
5415         hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5416 #endif
5417     }
5418 
5419     for(i = 0; i < 4; i++)
5420     {
5421         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5422 
5423 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5424         if(e_quality_preset < ME_HIGH_QUALITY)
5425         {
5426             hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5427         }
5428         else
5429         {
5430             ps_blk_32x32->best_inter_cost = 0;
5431             ps_blk_32x32->intra_mv_area = 0;
5432         }
5433 #else
5434         hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5435 #endif
5436     }
5437 
5438 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5439     if(e_quality_preset < ME_HIGH_QUALITY)
5440     {
5441         hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5442     }
5443     else
5444     {
5445         ps_blk_64x64->best_inter_cost = 0;
5446         ps_blk_64x64->intra_mv_area = 0;
5447     }
5448 #else
5449     hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5450 #endif
5451 
5452     /* Initialise data for all nodes in the CU tree */
5453     hme_build_cu_tree(
5454         ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5455 
5456     if(e_quality_preset >= ME_HIGH_QUALITY)
5457     {
5458         memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5459     }
5460 
5461 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5462     return;
5463 #endif
5464 
5465     for(i = 0; i < 16; i++)
5466     {
5467         S32 blk_8x8_mask;
5468         S32 is_16x16_blk_valid;
5469         S32 num_clusters_updated;
5470         S32 num_clusters;
5471 
5472         blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5473 
5474         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5475 
5476         is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5477 
5478         if(is_16x16_blk_valid)
5479         {
5480             /* Use 8x8 data when 16x16 CU is split */
5481             if(ps_search_results[i].u1_split_flag)
5482             {
5483                 S32 blk_8x8_idx = i << 2;
5484 
5485                 num_parts = 4;
5486                 e_part_type = PRT_NxN;
5487 
5488                 for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5489                 {
5490                     /* Only 2Nx2N partition supported for 8x8 block */
5491                     ASSERT(
5492                         ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5493                         ((PART_TYPE_T)PRT_2Nx2N));
5494 
5495                     aps_part_result[j] =
5496                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5497                     aps_inferior_parts[j] =
5498                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5499                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5500                 }
5501             }
5502             else
5503             {
5504                 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5505 
5506                 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5507                 num_parts = gau1_num_parts_in_part_type[e_part_type];
5508 
5509                 for(j = 0; j < num_parts; j++)
5510                 {
5511                     aps_part_result[j] = &ps_best_result->as_pu_results[j];
5512                     aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5513                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5514                 }
5515 
5516                 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5517             }
5518 
5519             for(j = 0; j < num_parts; j++)
5520             {
5521                 pu_result_t *ps_part_result = aps_part_result[j];
5522 
5523                 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5524 
5525                 e_part_id = ge_part_type_to_part_id[e_part_type][j];
5526 
5527                 /* Skip clustering if best mode is intra */
5528                 if((ps_part_result->pu.b1_intra_flag))
5529                 {
5530                     ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5531                     ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5532                     continue;
5533                 }
5534                 else
5535                 {
5536                     ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5537                 }
5538 
5539 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5540                 if(e_quality_preset >= ME_HIGH_QUALITY)
5541                 {
5542                     continue;
5543                 }
5544 #endif
5545 
5546                 for(k = 0; k < num_mvs; k++)
5547                 {
5548                     mv_t *ps_mv;
5549 
5550                     pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5551 
5552                     S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5553 
5554                     ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5555 
5556                     mvx = ps_mv->i2_mvx;
5557                     mvy = ps_mv->i2_mvy;
5558 
5559                     ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5560                                          : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5561 
5562                     num_clusters = ps_blk_16x16->num_clusters;
5563 
5564                     hme_find_and_update_clusters(
5565                         ps_blk_16x16->as_cluster_data,
5566                         &(ps_blk_16x16->num_clusters),
5567                         mvx,
5568                         mvy,
5569                         ref_idx,
5570                         ps_part_result->i4_sdi,
5571                         e_part_id,
5572                         (ai4_pred_mode[j] == 2));
5573 
5574                     num_clusters_updated = (ps_blk_16x16->num_clusters);
5575 
5576                     ps_blk_16x16->au1_num_clusters[ref_idx] +=
5577                         (num_clusters_updated - num_clusters);
5578                 }
5579             }
5580         }
5581     }
5582 
5583     /* Search for 32x32 clusters */
5584     for(i = 0; i < 4; i++)
5585     {
5586         S32 num_clusters_merged;
5587 
5588         S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5589 
5590         if(is_32x32_blk_valid)
5591         {
5592             ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5593             ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5594 
5595 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5596             if(e_quality_preset >= ME_HIGH_QUALITY)
5597             {
5598                 for(j = 0; j < 4; j++, ps_blk_16x16++)
5599                 {
5600                     ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5601 
5602                     ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5603                 }
5604                 continue;
5605             }
5606 #endif
5607 
5608             hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5609 
5610             if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5611             {
5612                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5613                     ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5614 
5615                 if(num_clusters_merged)
5616                 {
5617                     ps_blk_32x32->num_clusters -= num_clusters_merged;
5618 
5619                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5620                 }
5621             }
5622         }
5623     }
5624 
5625 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5626     /* Eliminate outlier 32x32 clusters */
5627     if(e_quality_preset < ME_HIGH_QUALITY)
5628 #endif
5629     {
5630         hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5631 
5632         /* Find best_uni_ref and best_alt_ref */
5633         hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5634     }
5635 
5636     /* Populate the CU tree for depths 1 and higher */
5637     {
5638         cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5639         cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5640         cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5641         cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5642         cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5643 
5644         hme_populate_cu_tree(
5645             ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5646 
5647         num_32x32_merges += (ps_tl->is_node_valid == 1);
5648 
5649         hme_populate_cu_tree(
5650             ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5651 
5652         num_32x32_merges += (ps_tr->is_node_valid == 1);
5653 
5654         hme_populate_cu_tree(
5655             ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5656 
5657         num_32x32_merges += (ps_bl->is_node_valid == 1);
5658 
5659         hme_populate_cu_tree(
5660             ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5661 
5662         num_32x32_merges += (ps_br->is_node_valid == 1);
5663     }
5664 
5665 #if !ENABLE_4CTB_EVALUATION
5666     if(e_quality_preset < ME_HIGH_QUALITY)
5667     {
5668         enable_64x64_merge = (num_32x32_merges >= 3);
5669     }
5670 #else
5671     if(e_quality_preset < ME_HIGH_QUALITY)
5672     {
5673         enable_64x64_merge = 1;
5674     }
5675 #endif
5676 
5677 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5678     if(e_quality_preset >= ME_HIGH_QUALITY)
5679     {
5680         enable_64x64_merge = 1;
5681     }
5682 #else
5683     if(e_quality_preset >= ME_HIGH_QUALITY)
5684     {
5685         enable_64x64_merge = (num_32x32_merges >= 3);
5686     }
5687 #endif
5688 
5689     if(enable_64x64_merge)
5690     {
5691         S32 num_clusters_merged;
5692 
5693         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5694 
5695 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5696         if(e_quality_preset >= ME_HIGH_QUALITY)
5697         {
5698             for(j = 0; j < 4; j++, ps_blk_32x32++)
5699             {
5700                 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5701 
5702                 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5703             }
5704         }
5705         else
5706 #endif
5707         {
5708             hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5709 
5710             if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5711             {
5712                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5713                     ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5714 
5715                 if(num_clusters_merged)
5716                 {
5717                     ps_blk_64x64->num_clusters -= num_clusters_merged;
5718 
5719                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5720                 }
5721             }
5722         }
5723 
5724 #if !ENABLE_4CTB_EVALUATION
5725         if(e_quality_preset < ME_HIGH_QUALITY)
5726         {
5727             S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5728             S32 best_intra_cost =
5729                 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5730                   ps_ctb_cluster_info->i4_frame_qstep *
5731                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5732                     ? MAX_32BIT_VAL
5733                     : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5734                        ps_ctb_cluster_info->i4_frame_qstep *
5735                            ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5736             S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5737             S32 cost_differential = (best_inter_cost - best_cost);
5738 
5739             enable_64x64_merge =
5740                 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5741         }
5742 #endif
5743     }
5744 
5745     if(enable_64x64_merge)
5746     {
5747 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5748         if(e_quality_preset < ME_HIGH_QUALITY)
5749 #endif
5750         {
5751             hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5752 
5753             hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5754         }
5755 
5756         hme_populate_cu_tree(
5757             ps_ctb_cluster_info,
5758             ps_ctb_cluster_info->ps_cu_tree_root,
5759             0,
5760             e_quality_preset,
5761             POS_NA,
5762             POS_NA,
5763             POS_NA);
5764     }
5765 }
5766 #endif
5767 
hme_merge_prms_init(hme_merge_prms_t * ps_prms,layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,me_frm_ctxt_t * ps_me_ctxt,range_prms_t * ps_range_prms_rec,range_prms_t * ps_range_prms_inp,mv_grid_t ** pps_mv_grid,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_num_pred_dir,S32 i4_32x32_id,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_me_quality_presets)5768 static __inline void hme_merge_prms_init(
5769     hme_merge_prms_t *ps_prms,
5770     layer_ctxt_t *ps_curr_layer,
5771     refine_prms_t *ps_refine_prms,
5772     me_frm_ctxt_t *ps_me_ctxt,
5773     range_prms_t *ps_range_prms_rec,
5774     range_prms_t *ps_range_prms_inp,
5775     mv_grid_t **pps_mv_grid,
5776     inter_ctb_prms_t *ps_inter_ctb_prms,
5777     S32 i4_num_pred_dir,
5778     S32 i4_32x32_id,
5779     BLK_SIZE_T e_blk_size,
5780     ME_QUALITY_PRESETS_T e_me_quality_presets)
5781 {
5782     S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5783     S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5784 
5785     /* Currently not enabling segmentation info from prev layers */
5786     ps_prms->i4_seg_info_avail = 0;
5787     ps_prms->i4_part_mask = 0;
5788 
5789     /* Number of reference pics in which to do merge */
5790     ps_prms->i4_num_ref = i4_num_pred_dir;
5791 
5792     /* Layer ctxt info */
5793     ps_prms->ps_layer_ctxt = ps_curr_layer;
5794 
5795     ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5796 
5797     /* Top left, top right, bottom left and bottom right 16x16 units */
5798     if(BLK_32x32 == e_blk_size)
5799     {
5800         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5801         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5802         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5803         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5804 
5805         /* Merge results stored here */
5806         ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5807 
5808         /* This could be lesser than the number of 16x16results generated*/
5809         /* For now, keeping it to be same                                */
5810         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5811         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5812         ps_prms->ps_results_grandchild = NULL;
5813     }
5814     else
5815     {
5816         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5817         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5818         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5819         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5820 
5821         /* Merge results stored here */
5822         ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5823 
5824         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5825         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5826         ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5827     }
5828 
5829     if(i4_use_rec)
5830     {
5831         WORD32 ref_ctr;
5832 
5833         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5834         {
5835             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5836         }
5837     }
5838     else
5839     {
5840         WORD32 ref_ctr;
5841 
5842         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5843         {
5844             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5845         }
5846     }
5847     ps_prms->i4_use_rec = i4_use_rec;
5848 
5849     ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5850 
5851     ps_prms->pps_mv_grid = pps_mv_grid;
5852 
5853     ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5854 
5855     ps_prms->e_quality_preset = e_me_quality_presets;
5856     ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5857     ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5858     ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5859 }
5860 
5861 /**
5862 ********************************************************************************
5863 *  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
5864 *                       refine_layer_prms_t *ps_refine_prms)
5865 *
5866 *  @brief  Top level entry point for refinement ME
5867 *
5868 *  @param[in,out]  ps_ctxt: ME Handle
5869 *
5870 *  @param[in]  ps_refine_prms : refinement layer prms
5871 *
5872 *  @return None
5873 ********************************************************************************
5874 */
hme_refine(me_ctxt_t * ps_thrd_ctxt,refine_prms_t * ps_refine_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,layer_ctxt_t * ps_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,S32 thrd_id,S32 me_frm_id,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input)5875 void hme_refine(
5876     me_ctxt_t *ps_thrd_ctxt,
5877     refine_prms_t *ps_refine_prms,
5878     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5879     layer_ctxt_t *ps_coarse_layer,
5880     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5881     S32 lyr_job_type,
5882     S32 thrd_id,
5883     S32 me_frm_id,
5884     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5885 {
5886     inter_ctb_prms_t s_common_frm_prms;
5887 
5888     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5889     WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5890     me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5891     ME_QUALITY_PRESETS_T e_me_quality_presets =
5892         ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5893 
5894     WORD32 num_rows_proc = 0;
5895     WORD32 num_act_ref_pics;
5896     WORD16 i2_prev_enc_frm_max_mv_y;
5897     WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5898 
5899     /*************************************************************************/
5900     /* Complexity of search: Low to High                                     */
5901     /*************************************************************************/
5902     SEARCH_COMPLEXITY_T e_search_complexity;
5903 
5904     /*************************************************************************/
5905     /* to store the PU results which are passed to the decide_part_types     */
5906     /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5907     /*************************************************************************/
5908 
5909     pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5910     inter_pu_results_t as_inter_pu_results[4];
5911     inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5912 
5913     /*************************************************************************/
5914     /* Config parameter structures for varius ME submodules                  */
5915     /*************************************************************************/
5916     hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5917     hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5918     hme_merge_prms_t s_merge_prms_64x64;
5919     hme_search_prms_t s_search_prms_blk;
5920     mvbank_update_prms_t s_mv_update_prms;
5921     hme_ctb_prms_t s_ctb_prms;
5922     hme_subpel_prms_t s_subpel_prms;
5923     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5924     ctb_cluster_info_t *ps_ctb_cluster_info;
5925     fpel_srch_cand_init_data_t s_srch_cand_init_data;
5926 
5927     /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5928     S32 en_merge_32x32;
5929     /* 5 lsb's specify whether or not merge algorithm is required */
5930     /* to be executed or not. Relevant only in PQ. Ought to be */
5931     /* used in conjunction with en_merge_32x32 and */
5932     /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5933     /* required when all children are deemed to be intras */
5934     S32 en_merge_execution;
5935 
5936     /*************************************************************************/
5937     /* All types of search candidates for predictor based search.            */
5938     /*************************************************************************/
5939     S32 num_init_candts = 0;
5940     S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5941     S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5942     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5943     search_node_t as_top_neighbours[4], as_left_neighbours[3];
5944 
5945     pf_get_wt_inp fp_get_wt_inp;
5946 
5947     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5948     U32 au4_unique_node_map[MAP_X_MAX * 2];
5949 
5950     /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5951     ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5952 
5953     /*************************************************************************/
5954     /* points ot the search results for the blk level search (8x8/16x16)     */
5955     /*************************************************************************/
5956     search_results_t *ps_search_results;
5957 
5958     /*************************************************************************/
5959     /* Coordinates                                                           */
5960     /*************************************************************************/
5961     S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5962     S32 pos_x, pos_y;
5963     S32 blk_id_in_full_ctb;
5964 
5965     /*************************************************************************/
5966     /* Related to dimensions of block being searched and pic dimensions      */
5967     /*************************************************************************/
5968     S32 blk_4x4_to_16x16;
5969     S32 blk_wd, blk_ht, blk_size_shift;
5970     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5971     S32 num_results_prev_layer;
5972 
5973     /*************************************************************************/
5974     /* Size of a basic unit for this layer. For non encode layers, we search */
5975     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5976     /* basic unit size is the ctb size.                                      */
5977     /*************************************************************************/
5978     S32 unit_size;
5979 
5980     /*************************************************************************/
5981     /* Local variable storing results of any 4 CU merge to bigger CU         */
5982     /*************************************************************************/
5983     CU_MERGE_RESULT_T e_merge_result;
5984 
5985     /*************************************************************************/
5986     /* This mv grid stores results during and after fpel search, during      */
5987     /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
5988     /* meant for the 2 directions of search (l0 and l1).                     */
5989     /*************************************************************************/
5990     mv_grid_t *aps_mv_grid[2];
5991 
5992     /*************************************************************************/
5993     /* Pointers to context in current and coarser layers                     */
5994     /*************************************************************************/
5995     layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
5996 
5997     /*************************************************************************/
5998     /* to store mv range per blk, and picture limit, allowed search range    */
5999     /* range prms in hpel and qpel units as well                             */
6000     /*************************************************************************/
6001     range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6002     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6003     range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6004 
6005     /*************************************************************************/
6006     /* These variables are used to track number of references at different   */
6007     /* stages of ME.                                                         */
6008     /*************************************************************************/
6009     S32 i4_num_pred_dir;
6010     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6011     S32 lambda_recon = ps_refine_prms->lambda_recon;
6012 
6013     /* Counts successful merge to 32x32 every CTB (0-4) */
6014     S32 merge_count_32x32;
6015 
6016     S32 ai4_id_coloc[14], ai4_id_Z[2];
6017     U08 au1_search_candidate_list_index[2];
6018     S32 ai4_num_coloc_cands[2];
6019     U08 u1_pred_dir, u1_pred_dir_ctr;
6020 
6021     /*************************************************************************/
6022     /* Input pointer and stride                                              */
6023     /*************************************************************************/
6024     U08 *pu1_inp;
6025     S32 i4_inp_stride;
6026     S32 end_of_frame;
6027     S32 num_sync_units_in_row, num_sync_units_in_tile;
6028 
6029     /*************************************************************************/
6030     /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6031     /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6032     /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
6033     /*************************************************************************/
6034     S32 blk_8x8_mask;
6035     S32 ai4_blk_8x8_mask[16];
6036     U08 au1_is_64x64Blk_noisy[1];
6037     U08 au1_is_32x32Blk_noisy[4];
6038     U08 au1_is_16x16Blk_noisy[16];
6039 
6040     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6041         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6042     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6043         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6044 
6045     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6046 
6047     /*************************************************************************/
6048     /* Pointers to current and coarse layer are needed for projection */
6049     /* Pointer to prev layer are needed for other candts like coloc   */
6050     /*************************************************************************/
6051     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6052 
6053     ps_prev_layer = hme_get_past_layer_ctxt(
6054         ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6055 
6056     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6057 
6058     /* Function pointer is selected based on the C vc X86 macro */
6059 
6060     fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6061 
6062     i4_inp_stride = ps_curr_layer->i4_inp_stride;
6063     i4_pic_wd = ps_curr_layer->i4_wd;
6064     i4_pic_ht = ps_curr_layer->i4_ht;
6065     e_search_complexity = ps_refine_prms->e_search_complexity;
6066     end_of_frame = 0;
6067 
6068     /* This points to all the initial candts */
6069     ps_search_candts = &as_search_candts[0];
6070 
6071     /* mv grid being huge strucutre is part of context */
6072     aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6073     aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6074 
6075     /*************************************************************************/
6076     /* If the current layer is encoded (since it may be multicast or final   */
6077     /* layer (finest)), then we use 16x16 blk size with some selected parts  */
6078     /* If the current layer is not encoded, then we use 8x8 blk size, with   */
6079     /* enable or disable of 4x4 partitions depending on the input prms       */
6080     /*************************************************************************/
6081     e_search_blk_size = BLK_16x16;
6082     blk_wd = blk_ht = 16;
6083     blk_size_shift = 4;
6084     e_result_blk_size = BLK_8x8;
6085     s_mv_update_prms.i4_shift = 1;
6086 
6087     if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6088     {
6089         blk_4x4_to_16x16 = 1;
6090     }
6091     else
6092     {
6093         blk_4x4_to_16x16 = 0;
6094     }
6095 
6096     unit_size = 1 << ps_ctxt->log_ctb_size;
6097     s_search_prms_blk.i4_inp_stride = unit_size;
6098 
6099     /* This is required to properly update the layer mv bank */
6100     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6101     s_search_prms_blk.e_blk_size = e_search_blk_size;
6102 
6103     /*************************************************************************/
6104     /* If current layer is explicit, then the number of ref frames are to    */
6105     /* be same as previous layer. Else it will be 2                          */
6106     /*************************************************************************/
6107     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6108     i4_num_pred_dir =
6109         (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6110         1;
6111 
6112 #if USE_MODIFIED == 1
6113     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6114 #else
6115     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6116 #endif
6117 
6118     i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6119     if(i4_num_ref_prev_layer <= 2)
6120     {
6121         i4_num_ref_each_dir = 1;
6122     }
6123     else
6124     {
6125         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6126     }
6127 
6128     s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6129     s_mv_update_prms.i4_num_results_to_store =
6130         MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6131                                                 : (i4_num_act_ref_l0 > 1) + 1,
6132             ps_refine_prms->i4_num_results_per_part);
6133 
6134     /*************************************************************************/
6135     /* Initialization of merge params for 16x16 to 32x32 merge.              */
6136     /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
6137     /*************************************************************************/
6138     {
6139         hme_merge_prms_t *aps_merge_prms[4];
6140         aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6141         aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6142         aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6143         aps_merge_prms[3] = &s_merge_prms_32x32_br;
6144         for(i = 0; i < 4; i++)
6145         {
6146             hme_merge_prms_init(
6147                 aps_merge_prms[i],
6148                 ps_curr_layer,
6149                 ps_refine_prms,
6150                 ps_ctxt,
6151                 as_range_prms_rec,
6152                 as_range_prms_inp,
6153                 &aps_mv_grid[0],
6154                 &s_common_frm_prms,
6155                 i4_num_pred_dir,
6156                 i,
6157                 BLK_32x32,
6158                 e_me_quality_presets);
6159         }
6160     }
6161 
6162     /*************************************************************************/
6163     /* Initialization of merge params for 32x32 to 64x64 merge.              */
6164     /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
6165     /*************************************************************************/
6166     {
6167         hme_merge_prms_init(
6168             &s_merge_prms_64x64,
6169             ps_curr_layer,
6170             ps_refine_prms,
6171             ps_ctxt,
6172             as_range_prms_rec,
6173             as_range_prms_inp,
6174             &aps_mv_grid[0],
6175             &s_common_frm_prms,
6176             i4_num_pred_dir,
6177             0,
6178             BLK_64x64,
6179             e_me_quality_presets);
6180     }
6181 
6182     /* Pointers to cu_results are initialised here */
6183     {
6184         WORD32 i;
6185 
6186         ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6187 
6188         for(i = 0; i < 4; i++)
6189         {
6190             ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6191         }
6192 
6193         for(i = 0; i < 16; i++)
6194         {
6195             ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6196         }
6197     }
6198 
6199     /*************************************************************************/
6200     /* SUBPEL Params initialized here                                        */
6201     /*************************************************************************/
6202     {
6203         s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6204         s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6205         s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6206 
6207         s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6208         s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6209         s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6210 
6211         s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6212         s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6213 
6214         s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6215 
6216         s_subpel_prms.i4_inp_stride = unit_size;
6217 
6218         s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6219         s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6220         s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6221 
6222         s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6223 
6224         {
6225             WORD32 ref_ctr;
6226             for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6227             {
6228                 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6229                 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6230             }
6231         }
6232         s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6233 
6234 #if USE_MODIFIED == 0
6235         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6236 #else
6237         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6238 #endif
6239         s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6240 
6241         /* BI Refinement done only if this field is 1 */
6242         s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6243 
6244         s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6245 
6246         s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6247         s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6248         s_subpel_prms.u1_max_num_subpel_refine_centers =
6249             ps_refine_prms->u1_max_num_subpel_refine_centers;
6250     }
6251 
6252     /* inter_ctb_prms_t struct initialisation */
6253     {
6254         inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6255         hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6256 
6257         ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6258         ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6259         ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6260         ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6261         ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6262         ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6263         ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6264         ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6265         ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6266         ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6267         ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6268         ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6269         ps_inter_ctb_prms->i4_lamda = lambda_recon;
6270         ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6271         ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6272         ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6273         ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6274         ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6275         ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6276         ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6277             ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6278     }
6279 
6280     for(i = 0; i < MAX_INIT_CANDTS; i++)
6281     {
6282         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6283         ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6284 
6285         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6286     }
6287     num_act_ref_pics =
6288         ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6289 
6290     if(num_act_ref_pics)
6291     {
6292         hme_search_cand_data_init(
6293             ai4_id_Z,
6294             ai4_id_coloc,
6295             ai4_num_coloc_cands,
6296             au1_search_candidate_list_index,
6297             i4_num_act_ref_l0,
6298             i4_num_act_ref_l1,
6299             ps_ctxt->s_frm_prms.bidir_enabled,
6300             blk_4x4_to_16x16);
6301     }
6302 
6303     if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6304     {
6305         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6306         ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6307     }
6308     else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6309     {
6310         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6311     }
6312 
6313     for(i = 0; i < 3; i++)
6314     {
6315         search_node_t *ps_search_node;
6316         ps_search_node = &as_left_neighbours[i];
6317         INIT_SEARCH_NODE(ps_search_node, 0);
6318         ps_search_node = &as_top_neighbours[i];
6319         INIT_SEARCH_NODE(ps_search_node, 0);
6320     }
6321 
6322     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6323     as_left_neighbours[2].u1_is_avail = 0;
6324 
6325     /*************************************************************************/
6326     /* Initialize all the search results structure here. We update all the   */
6327     /* search results to default values, and configure things like blk sizes */
6328     /*************************************************************************/
6329     if(num_act_ref_pics)
6330     {
6331         S32 i4_x, i4_y;
6332         /* 16x16 results */
6333         for(i = 0; i < 16; i++)
6334         {
6335             search_results_t *ps_search_results;
6336             S32 pred_lx;
6337             ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6338             i4_x = (S32)gau1_encode_to_raster_x[i];
6339             i4_y = (S32)gau1_encode_to_raster_y[i];
6340             i4_x <<= 4;
6341             i4_y <<= 4;
6342 
6343             hme_init_search_results(
6344                 ps_search_results,
6345                 i4_num_pred_dir,
6346                 ps_refine_prms->i4_num_fpel_results,
6347                 ps_refine_prms->i4_num_results_per_part,
6348                 e_search_blk_size,
6349                 i4_x,
6350                 i4_y,
6351                 &ps_ctxt->au1_is_past[0]);
6352 
6353             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6354             {
6355                 pred_ctxt_t *ps_pred_ctxt;
6356 
6357                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6358 
6359                 hme_init_pred_ctxt_encode(
6360                     ps_pred_ctxt,
6361                     ps_search_results,
6362                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6363                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6364                     aps_mv_grid[pred_lx],
6365                     pred_lx,
6366                     lambda_recon,
6367                     ps_refine_prms->lambda_q_shift,
6368                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6369                     &ps_ctxt->ai2_ref_scf[0]);
6370             }
6371         }
6372 
6373         for(i = 0; i < 4; i++)
6374         {
6375             search_results_t *ps_search_results;
6376             S32 pred_lx;
6377             ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6378 
6379             i4_x = (S32)gau1_encode_to_raster_x[i];
6380             i4_y = (S32)gau1_encode_to_raster_y[i];
6381             i4_x <<= 5;
6382             i4_y <<= 5;
6383 
6384             hme_init_search_results(
6385                 ps_search_results,
6386                 i4_num_pred_dir,
6387                 ps_refine_prms->i4_num_32x32_merge_results,
6388                 ps_refine_prms->i4_num_results_per_part,
6389                 BLK_32x32,
6390                 i4_x,
6391                 i4_y,
6392                 &ps_ctxt->au1_is_past[0]);
6393 
6394             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6395             {
6396                 pred_ctxt_t *ps_pred_ctxt;
6397 
6398                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6399 
6400                 hme_init_pred_ctxt_encode(
6401                     ps_pred_ctxt,
6402                     ps_search_results,
6403                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6404                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6405                     aps_mv_grid[pred_lx],
6406                     pred_lx,
6407                     lambda_recon,
6408                     ps_refine_prms->lambda_q_shift,
6409                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6410                     &ps_ctxt->ai2_ref_scf[0]);
6411             }
6412         }
6413 
6414         {
6415             search_results_t *ps_search_results;
6416             S32 pred_lx;
6417             ps_search_results = &ps_ctxt->s_search_results_64x64;
6418 
6419             hme_init_search_results(
6420                 ps_search_results,
6421                 i4_num_pred_dir,
6422                 ps_refine_prms->i4_num_64x64_merge_results,
6423                 ps_refine_prms->i4_num_results_per_part,
6424                 BLK_64x64,
6425                 0,
6426                 0,
6427                 &ps_ctxt->au1_is_past[0]);
6428 
6429             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6430             {
6431                 pred_ctxt_t *ps_pred_ctxt;
6432 
6433                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6434 
6435                 hme_init_pred_ctxt_encode(
6436                     ps_pred_ctxt,
6437                     ps_search_results,
6438                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6439                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6440                     aps_mv_grid[pred_lx],
6441                     pred_lx,
6442                     lambda_recon,
6443                     ps_refine_prms->lambda_q_shift,
6444                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6445                     &ps_ctxt->ai2_ref_scf[0]);
6446             }
6447         }
6448     }
6449 
6450     /* Initialise the structure used in clustering  */
6451     if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6452     {
6453         ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6454 
6455         ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6456         ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6457         ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6458         ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6459         ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6460         ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6461         ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6462     }
6463 
6464     /*********************************************************************/
6465     /* Initialize the dyn. search range params. for each reference index */
6466     /* in current layer ctxt                                             */
6467     /*********************************************************************/
6468 
6469     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6470     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6471     {
6472         WORD32 ref_ctr;
6473         /* set no. of act ref in L0 for further use at frame level */
6474         ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6475             ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6476 
6477         for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6478         {
6479             INIT_DYN_SEARCH_PRMS(
6480                 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6481                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6482         }
6483     }
6484     /*************************************************************************/
6485     /* Now that the candidates have been ordered, to choose the right number */
6486     /* of initial candidates.                                                */
6487     /*************************************************************************/
6488     if(blk_4x4_to_16x16)
6489     {
6490         if(i4_num_ref_prev_layer > 2)
6491         {
6492             if(e_search_complexity == SEARCH_CX_LOW)
6493                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6494             else if(e_search_complexity == SEARCH_CX_MED)
6495                 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6496             else if(e_search_complexity == SEARCH_CX_HIGH)
6497                 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6498             else
6499                 ASSERT(0);
6500         }
6501         else if(i4_num_ref_prev_layer == 2)
6502         {
6503             if(e_search_complexity == SEARCH_CX_LOW)
6504                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6505             else if(e_search_complexity == SEARCH_CX_MED)
6506                 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6507             else if(e_search_complexity == SEARCH_CX_HIGH)
6508                 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6509             else
6510                 ASSERT(0);
6511         }
6512         else
6513         {
6514             if(e_search_complexity == SEARCH_CX_LOW)
6515                 num_init_candts = 5;
6516             else if(e_search_complexity == SEARCH_CX_MED)
6517                 num_init_candts = 12;
6518             else if(e_search_complexity == SEARCH_CX_HIGH)
6519                 num_init_candts = 19;
6520             else
6521                 ASSERT(0);
6522         }
6523     }
6524     else
6525     {
6526         if(i4_num_ref_prev_layer > 2)
6527         {
6528             if(e_search_complexity == SEARCH_CX_LOW)
6529                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6530             else if(e_search_complexity == SEARCH_CX_MED)
6531                 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6532             else if(e_search_complexity == SEARCH_CX_HIGH)
6533                 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6534             else
6535                 ASSERT(0);
6536         }
6537         else if(i4_num_ref_prev_layer == 2)
6538         {
6539             if(e_search_complexity == SEARCH_CX_LOW)
6540                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6541             else if(e_search_complexity == SEARCH_CX_MED)
6542                 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6543             else if(e_search_complexity == SEARCH_CX_HIGH)
6544                 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6545             else
6546                 ASSERT(0);
6547         }
6548         else
6549         {
6550             if(e_search_complexity == SEARCH_CX_LOW)
6551                 num_init_candts = 5;
6552             else if(e_search_complexity == SEARCH_CX_MED)
6553                 num_init_candts = 11;
6554             else if(e_search_complexity == SEARCH_CX_HIGH)
6555                 num_init_candts = 16;
6556             else
6557                 ASSERT(0);
6558         }
6559     }
6560 
6561     /*************************************************************************/
6562     /* The following search parameters are fixed throughout the search across*/
6563     /* all blks. So these are configured outside processing loop             */
6564     /*************************************************************************/
6565     s_search_prms_blk.i4_num_init_candts = num_init_candts;
6566     s_search_prms_blk.i4_start_step = 1;
6567     s_search_prms_blk.i4_use_satd = 0;
6568     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6569     /* we use recon only for encoded layers, otherwise it is not available */
6570     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6571 
6572     s_search_prms_blk.ps_search_candts = ps_search_candts;
6573     if(s_search_prms_blk.i4_use_rec)
6574     {
6575         WORD32 ref_ctr;
6576         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6577             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6578     }
6579     else
6580     {
6581         WORD32 ref_ctr;
6582         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6583             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6584     }
6585 
6586     /*************************************************************************/
6587     /* Initialize coordinates. Meaning as follows                            */
6588     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
6589     /* blk_y : same as above, y coord.                                       */
6590     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
6591     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
6592     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
6593     /* corner of the picture. Always multiple of 64.                         */
6594     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
6595     /*************************************************************************/
6596     blk_y = 0;
6597     blk_id_in_ctb = 0;
6598     i4_ctb_y = 0;
6599 
6600     /*************************************************************************/
6601     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
6602     /* every block given its coordinate. Note thsi assumes that the min amt  */
6603     /* of padding to right of pic is equal to the blk size. If we go all the */
6604     /* way upto 64x64, then the min padding on right size of picture should  */
6605     /* be 64, and also on bottom side of picture.                            */
6606     /*************************************************************************/
6607     SET_PIC_LIMIT(
6608         s_pic_limit_inp,
6609         ps_curr_layer->i4_pad_x_rec,
6610         ps_curr_layer->i4_pad_y_rec,
6611         ps_curr_layer->i4_wd,
6612         ps_curr_layer->i4_ht,
6613         s_search_prms_blk.i4_num_steps_post_refine);
6614 
6615     SET_PIC_LIMIT(
6616         s_pic_limit_rec,
6617         ps_curr_layer->i4_pad_x_rec,
6618         ps_curr_layer->i4_pad_y_rec,
6619         ps_curr_layer->i4_wd,
6620         ps_curr_layer->i4_ht,
6621         s_search_prms_blk.i4_num_steps_post_refine);
6622 
6623     /*************************************************************************/
6624     /* set the MV limit per ref. pic.                                        */
6625     /*    - P pic. : Based on the config params.                             */
6626     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6627     /*************************************************************************/
6628     hme_set_mv_limit_using_dvsr_data(
6629         ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6630     s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6631     s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6632     s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6633     s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6634     s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6635     s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6636     s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6637     s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6638     s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6639     s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6640     s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6641 
6642     while(0 == end_of_frame)
6643     {
6644         job_queue_t *ps_job;
6645         frm_ctb_ctxt_t *ps_frm_ctb_prms;
6646         ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6647 
6648         WORD32 i4_max_mv_x_in_ctb;
6649         WORD32 i4_max_mv_y_in_ctb;
6650         void *pv_dep_mngr_encloop_dep_me;
6651         WORD32 offset_val, check_dep_pos, set_dep_pos;
6652         WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6653 
6654         pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6655 
6656         ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6657 
6658         /* Get the current row from the job queue */
6659         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6660             ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6661 
6662         /* If all rows are done, set the end of process flag to 1, */
6663         /* and the current row to -1 */
6664         if(NULL == ps_job)
6665         {
6666             blk_y = -1;
6667             i4_ctb_y = -1;
6668             tile_col_idx = -1;
6669             end_of_frame = 1;
6670 
6671             continue;
6672         }
6673 
6674         /* set the output dependency after picking up the row */
6675         ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6676 
6677         /* Obtain the current row's details from the job */
6678         {
6679             ihevce_tile_params_t *ps_col_tile_params;
6680 
6681             i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6682             /* Obtain the current colum tile index from the job */
6683             tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6684 
6685             /* in encode layer block are 16x16 and CTB is 64 x 64 */
6686             /* note if ctb is 32x32 the this calc needs to be changed */
6687             num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6688                                     ps_ctxt->log_ctb_size;
6689 
6690             /* The tile parameter for the col. idx. Use only the properties
6691             which is same for all the bottom tiles like width, start_x, etc.
6692             Don't use height, start_y, etc.                                  */
6693             ps_col_tile_params =
6694                 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6695             /* in encode layer block are 16x16 and CTB is 64 x 64 */
6696             /* note if ctb is 32x32 the this calc needs to be changed */
6697             num_sync_units_in_tile =
6698                 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6699                 ps_ctxt->log_ctb_size;
6700 
6701             i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6702             i4_ctb_x = i4_first_ctb_x;
6703 
6704             if(!num_act_ref_pics)
6705             {
6706                 for(i4_ctb_x = i4_first_ctb_x;
6707                     i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6708                     i4_ctb_x++)
6709                 {
6710                     S32 blk_i = 0, blk_j = 0;
6711                     /* set the dependency for the corresponding row in enc loop */
6712                     ihevce_dmgr_set_row_row_sync(
6713                         pv_dep_mngr_encloop_dep_me,
6714                         (i4_ctb_x + 1),
6715                         i4_ctb_y,
6716                         tile_col_idx /* Col Tile No. */);
6717                 }
6718 
6719                 continue;
6720             }
6721 
6722             /* increment the number of rows proc */
6723             num_rows_proc++;
6724 
6725             /* Set Variables for Dep. Checking and Setting */
6726             set_dep_pos = i4_ctb_y + 1;
6727             if(i4_ctb_y > 0)
6728             {
6729                 offset_val = 2;
6730                 check_dep_pos = i4_ctb_y - 1;
6731             }
6732             else
6733             {
6734                 /* First row should run without waiting */
6735                 offset_val = -1;
6736                 check_dep_pos = 0;
6737             }
6738 
6739             /* row ctb out pointer  */
6740             ps_ctxt->ps_ctb_analyse_curr_row =
6741                 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6742 
6743             /* Row level CU Tree buffer */
6744             ps_ctxt->ps_cu_tree_curr_row =
6745                 ps_ctxt->ps_cu_tree_base +
6746                 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6747 
6748             ps_ctxt->ps_me_ctb_data_curr_row =
6749                 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6750         }
6751 
6752         /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6753         left_ctb_in_diff_tile = 1;
6754 
6755         /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
6756         /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6757         {
6758             S32 i4_ref_id, i4_bits_req;
6759 
6760             for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6761                                             ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6762                 i4_ref_id++)
6763             {
6764                 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6765 
6766                 if(i4_bits_req > 12)
6767                 {
6768                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6769                 }
6770                 else
6771                 {
6772                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6773                 }
6774             }
6775 
6776             s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6777         }
6778 
6779         /* if non-encode layer then i4_ctb_x will be same as blk_x */
6780         /* loop over all the units is a row                        */
6781         for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6782             i4_ctb_x++)
6783         {
6784             ihevce_ctb_noise_params *ps_ctb_noise_params =
6785                 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6786 
6787             s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6788             s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6789 
6790             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6791             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6792             /* Initialize ptr to current IPE CTB */
6793             ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6794                              i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6795             {
6796                 ps_ctb_bound_attrs =
6797                     get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6798 
6799                 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6800                 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6801             }
6802 
6803             /* Block to initialise pointers to part_type_results_t */
6804             /* in each size-specific inter_cu_results_t  */
6805             {
6806                 WORD32 i;
6807 
6808                 for(i = 0; i < 64; i++)
6809                 {
6810                     ps_ctxt->as_cu8x8_results[i].ps_best_results =
6811                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6812                             .as_8x8_block_data[i]
6813                             .as_best_results;
6814                     ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6815                 }
6816 
6817                 for(i = 0; i < 16; i++)
6818                 {
6819                     ps_ctxt->as_cu16x16_results[i].ps_best_results =
6820                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6821                     ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6822                 }
6823 
6824                 for(i = 0; i < 4; i++)
6825                 {
6826                     ps_ctxt->as_cu32x32_results[i].ps_best_results =
6827                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828                             .as_32x32_block_data[i]
6829                             .as_best_results;
6830                     ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6831                 }
6832 
6833                 ps_ctxt->s_cu64x64_results.ps_best_results =
6834                     ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6835                 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6836             }
6837 
6838             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6839             {
6840                 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6841                 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6842                 ps_ctb_cluster_info->ps_cu_tree_root =
6843                     ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6844                 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6845             }
6846 
6847             if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6848             {
6849                 S32 i4_nodes_created_in_cu_tree = 1;
6850 
6851                 ihevce_cu_tree_init(
6852                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6853                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6854                     &i4_nodes_created_in_cu_tree,
6855                     0,
6856                     POS_NA,
6857                     POS_NA,
6858                     POS_NA);
6859             }
6860 
6861             memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6862 
6863             if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6864             {
6865                 S32 j;
6866 
6867                 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6868 
6869                 ps_cur_ipe_ctb =
6870                     ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6871                 lambda_recon =
6872                     hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6873 
6874                 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6875 
6876                 for(i = 0; i < 4; i++)
6877                 {
6878                     ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6879 
6880                     for(j = 0; j < 2; j++)
6881                     {
6882                         ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6883                     }
6884                 }
6885                 ps_search_results = &ps_ctxt->s_search_results_64x64;
6886 
6887                 for(j = 0; j < 2; j++)
6888                 {
6889                     ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6890                 }
6891 
6892                 s_common_frm_prms.i4_lamda = lambda_recon;
6893             }
6894             else
6895             {
6896                 lambda_recon = ps_refine_prms->lambda_recon;
6897             }
6898 
6899             /*********************************************************************/
6900             /* replicate the inp buffer at blk or ctb level for each ref id,     */
6901             /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6902             /* thereby avoiding a bloat up of memory. If we did all references   */
6903             /* weighted pred, we will end up with a duplicate copy of each ref   */
6904             /* at each layer, since we need to preserve the original reference.  */
6905             /* ToDo: Need to observe performance with this mechanism and compare */
6906             /* with case where ref is weighted.                                  */
6907             /*********************************************************************/
6908             fp_get_wt_inp(
6909                 ps_curr_layer,
6910                 &ps_ctxt->s_wt_pred,
6911                 unit_size,
6912                 s_common_frm_prms.i4_ctb_x_off,
6913                 s_common_frm_prms.i4_ctb_y_off,
6914                 unit_size,
6915                 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6916                 ps_ctxt->i4_wt_pred_enable_flag);
6917 
6918             if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6919             {
6920 #if TEMPORAL_NOISE_DETECT
6921                 {
6922                     WORD32 had_block_size = 16;
6923                     WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6924                                            ? 64
6925                                            : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6926                     WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6927                                             ? 64
6928                                             : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6929                     WORD32 num_pred_dir = i4_num_pred_dir;
6930                     WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6931                     WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6932 
6933                     WORD32 i;
6934                     WORD32 noise_detected;
6935                     WORD32 ctb_size;
6936                     WORD32 num_comp_had_blocks;
6937                     WORD32 noisy_block_cnt;
6938                     WORD32 index_8x8_block;
6939                     WORD32 num_8x8_in_ctb_row;
6940 
6941                     WORD32 ht_offset;
6942                     WORD32 wd_offset;
6943                     WORD32 block_ht;
6944                     WORD32 block_wd;
6945 
6946                     WORD32 num_horz_blocks;
6947                     WORD32 num_vert_blocks;
6948 
6949                     WORD32 mean;
6950                     UWORD32 variance_8x8;
6951 
6952                     WORD32 hh_energy_percent;
6953 
6954                     /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6955                     WORD32 min_noisy_block_cnt;
6956                     WORD32 min_coeffs_above_avg;
6957                     WORD32 min_coeff_avg_energy;
6958 
6959                     /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6960                     WORD32 i4_cu_x_off, i4_cu_y_off;
6961                     WORD32 is_noisy;
6962 
6963                     /* intialise the variables holding the constants */
6964                     if(had_block_size == 8)
6965                     {
6966                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
6967                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6968                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6969                     }
6970                     else
6971                     {
6972                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
6973                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6974                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6975                     }
6976 
6977                     /* initialize the variables */
6978                     noise_detected = 0;
6979                     noisy_block_cnt = 0;
6980                     hh_energy_percent = 0;
6981                     variance_8x8 = 0;
6982                     block_ht = ctb_height;
6983                     block_wd = ctb_width;
6984 
6985                     mean = 0;
6986 
6987                     ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
6988                     num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
6989 
6990                     num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
6991                     num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;
6992 
6993                     ht_offset = -had_block_size;
6994                     wd_offset = -had_block_size;
6995 
6996                     num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
6997                     for(i = 0; i < num_comp_had_blocks; i++)
6998                     {
6999                         if(i % num_horz_blocks == 0)
7000                         {
7001                             wd_offset = -had_block_size;
7002                             ht_offset += had_block_size;
7003                         }
7004                         wd_offset += had_block_size;
7005 
7006                         /* CU level offsets */
7007                         i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
7008                         i4_cu_y_off = i4_y_off + (i / 4) * 16;
7009 
7010                         /* if 50 % or more of the CU is noisy then the return value is 1 */
7011                         is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7012                             ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7013                             (i % 4) * 16,
7014                             (i / 4) * 16,
7015                             16);
7016 
7017                         /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7018                         if(is_noisy)
7019                         {
7020                             index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7021                                               (i % num_horz_blocks) * 2;
7022                             noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7023                                 16,
7024                                 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7025                                     ? 64
7026                                     : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7027                                 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7028                                     ? 64
7029                                     : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7030                                 ps_ctb_noise_params,
7031                                 &s_srch_cand_init_data,
7032                                 &s_search_prms_blk,
7033                                 ps_ctxt,
7034                                 num_pred_dir,
7035                                 i4_num_act_ref_l0,
7036                                 i4_num_act_ref_l1,
7037                                 i4_cu_x_off,
7038                                 i4_cu_y_off,
7039                                 &ps_ctxt->s_wt_pred,
7040                                 unit_size,
7041                                 index_8x8_block,
7042                                 num_horz_blocks,
7043                                 /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
7044                                 i);
7045                         } /* if 16x16 is noisy */
7046                     } /* loop over for all 16x16*/
7047 
7048                     if(noisy_block_cnt >= min_noisy_block_cnt)
7049                     {
7050                         noise_detected = 1;
7051                     }
7052 
7053                     /* write back the noise presence detected for the current CTB to the structure */
7054                     ps_ctb_noise_params->i4_noise_present = noise_detected;
7055                 }
7056 #endif
7057 
7058 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7059                 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7060                    ps_ctb_noise_params->i4_noise_present)
7061                 {
7062                     memset(
7063                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7064                         1,
7065                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7066                 }
7067 #endif
7068 
7069                 for(i = 0; i < 16; i++)
7070                 {
7071                     au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7072                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7073                 }
7074 
7075                 for(i = 0; i < 4; i++)
7076                 {
7077                     au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7078                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7079                 }
7080 
7081                 for(i = 0; i < 1; i++)
7082                 {
7083                     au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7084                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7085                 }
7086 
7087                 if(ps_ctxt->s_frm_prms.bidir_enabled &&
7088                    (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7089                     MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7090                 {
7091                     ps_ctb_noise_params->i4_noise_present = 0;
7092                     memset(
7093                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7094                         0,
7095                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7096                 }
7097 
7098 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7099                 for(i = 0; i < 4; i++)
7100                 {
7101                     S32 j;
7102                     S32 lambda;
7103 
7104                     if(au1_is_32x32Blk_noisy[i])
7105                     {
7106                         lambda = lambda_recon;
7107                         lambda =
7108                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7109 
7110                         ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7111 
7112                         for(j = 0; j < 2; j++)
7113                         {
7114                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
7115                         }
7116                     }
7117                 }
7118 
7119                 {
7120                     S32 j;
7121                     S32 lambda;
7122 
7123                     if(au1_is_64x64Blk_noisy[0])
7124                     {
7125                         lambda = lambda_recon;
7126                         lambda =
7127                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7128 
7129                         ps_search_results = &ps_ctxt->s_search_results_64x64;
7130 
7131                         for(j = 0; j < 2; j++)
7132                         {
7133                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
7134                         }
7135                     }
7136                 }
7137 #endif
7138                 if(au1_is_64x64Blk_noisy[0])
7139                 {
7140                     U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7141                                                              (s_common_frm_prms.i4_ctb_y_off *
7142                                                               ps_curr_layer->i4_inp_stride));
7143 
7144                     hme_compute_sigmaX_and_sigmaXSquared(
7145                         pu1_inp,
7146                         ps_curr_layer->i4_inp_stride,
7147                         ps_ctxt->au4_4x4_src_sigmaX,
7148                         ps_ctxt->au4_4x4_src_sigmaXSquared,
7149                         4,
7150                         4,
7151                         64,
7152                         64,
7153                         1,
7154                         16);
7155                 }
7156                 else
7157                 {
7158                     for(i = 0; i < 4; i++)
7159                     {
7160                         if(au1_is_32x32Blk_noisy[i])
7161                         {
7162                             U08 *pu1_inp =
7163                                 ps_curr_layer->pu1_inp +
7164                                 (s_common_frm_prms.i4_ctb_x_off +
7165                                  (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7166 
7167                             U08 u1_cu_size = 32;
7168                             WORD32 i4_inp_buf_offset =
7169                                 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7170                                  ((i % 2) * u1_cu_size));
7171 
7172                             U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7173                             U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7174                             S32 i4_sigma_arr_offset =
7175                                 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7176                                  ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7177 
7178                             hme_compute_sigmaX_and_sigmaXSquared(
7179                                 pu1_inp + i4_inp_buf_offset,
7180                                 ps_curr_layer->i4_inp_stride,
7181                                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7182                                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7183                                 4,
7184                                 4,
7185                                 32,
7186                                 32,
7187                                 1,
7188                                 16);
7189                         }
7190                         else
7191                         {
7192                             S32 j;
7193 
7194                             U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7195                             U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7196                             S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7197                                 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7198                                  ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7199 
7200                             for(j = 0; j < 4; j++)
7201                             {
7202                                 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7203                                 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7204                                 S32 i4_16x16_blk_index_in_ctb =
7205                                     i4_16x16_blk_start_index_in_i_th_32x32_blk +
7206                                     ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7207                                     ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7208 
7209                                 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7210 
7211                                 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7212                                 {
7213                                     U08 *pu1_inp =
7214                                         ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7215                                                                   (s_common_frm_prms.i4_ctb_y_off *
7216                                                                    ps_curr_layer->i4_inp_stride));
7217 
7218                                     U08 u1_cu_size = 16;
7219                                     WORD32 i4_inp_buf_offset =
7220                                         (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7221                                          ((i4_16x16_blk_index_in_ctb / 4) *
7222                                           (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7223 
7224                                     U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7225                                     U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7226                                     S32 i4_sigma_arr_offset =
7227                                         (((i4_16x16_blk_index_in_ctb % 4) *
7228                                           u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7229                                          ((i4_16x16_blk_index_in_ctb / 4) *
7230                                           u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7231 
7232                                     hme_compute_sigmaX_and_sigmaXSquared(
7233                                         pu1_inp + i4_inp_buf_offset,
7234                                         ps_curr_layer->i4_inp_stride,
7235                                         (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7236                                         (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7237                                         4,
7238                                         4,
7239                                         16,
7240                                         16,
7241                                         1,
7242                                         16);
7243                                 }
7244                             }
7245                         }
7246                     }
7247                 }
7248             }
7249             else
7250             {
7251                 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7252 
7253                 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7254 
7255                 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7256             }
7257 
7258             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7259             {
7260                 S32 ref_ctr;
7261                 U08 au1_pred_dir_searched[2];
7262                 U08 u1_is_cu_noisy;
7263                 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7264 
7265                 {
7266                     blk_x = (i4_ctb_x << 2) +
7267                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7268                     blk_y = (i4_ctb_y << 2) +
7269                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7270 
7271                     blk_id_in_full_ctb =
7272                         ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7273                     blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7274                     ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7275                     s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7276                     s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7277                 }
7278 
7279                 /* get the current input blk point */
7280                 pos_x = blk_x << blk_size_shift;
7281                 pos_y = blk_y << blk_size_shift;
7282                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7283 
7284                 /*********************************************************************/
7285                 /* For every blk in the picture, the search range needs to be derived*/
7286                 /* Any blk can have any mv, but practical search constraints are     */
7287                 /* imposed by the picture boundary and amt of padding.               */
7288                 /*********************************************************************/
7289                 /* MV limit is different based on ref. PIC */
7290                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7291                 {
7292                     if(!s_search_prms_blk.i4_use_rec)
7293                     {
7294                         hme_derive_search_range(
7295                             &as_range_prms_inp[ref_ctr],
7296                             &s_pic_limit_inp,
7297                             &as_mv_limit[ref_ctr],
7298                             pos_x,
7299                             pos_y,
7300                             blk_wd,
7301                             blk_ht);
7302                     }
7303                     else
7304                     {
7305                         hme_derive_search_range(
7306                             &as_range_prms_rec[ref_ctr],
7307                             &s_pic_limit_rec,
7308                             &as_mv_limit[ref_ctr],
7309                             pos_x,
7310                             pos_y,
7311                             blk_wd,
7312                             blk_ht);
7313                     }
7314                 }
7315                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7316                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7317                 /* Select search results from a suitable search result in the context */
7318                 {
7319                     ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7320 
7321                     if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7322                     {
7323                         S32 i;
7324 
7325                         for(i = 0; i < 2; i++)
7326                         {
7327                             ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7328                         }
7329                     }
7330                 }
7331 
7332                 u1_is_cu_noisy = au1_is_16x16Blk_noisy
7333                     [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7334 
7335                 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7336 
7337 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7338                 if(u1_is_cu_noisy)
7339                 {
7340                     S32 j;
7341                     S32 lambda;
7342 
7343                     lambda = lambda_recon;
7344                     lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7345 
7346                     for(j = 0; j < 2; j++)
7347                     {
7348                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
7349                     }
7350                 }
7351                 else
7352                 {
7353                     S32 j;
7354                     S32 lambda;
7355 
7356                     lambda = lambda_recon;
7357 
7358                     for(j = 0; j < 2; j++)
7359                     {
7360                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
7361                     }
7362                 }
7363 #endif
7364 
7365                 s_search_prms_blk.ps_search_results = ps_search_results;
7366 
7367                 s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7368                     pu1_inp,
7369                     i4_inp_stride,
7370                     ps_refine_prms->limit_active_partitions,
7371                     ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7372                     ps_ctxt->u1_is_curFrame_a_refFrame,
7373                     blk_8x8_mask,
7374                     e_me_quality_presets);
7375 
7376                 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7377                 {
7378                     ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7379                         s_search_prms_blk.i4_part_mask;
7380                 }
7381 
7382                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7383                 {
7384                     /* Setting u1_num_active_refs to 2 */
7385                     /* for the sole purpose of the */
7386                     /* function called below */
7387                     ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7388 
7389                     hme_reset_search_results(
7390                         ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7391 
7392                     ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7393                 }
7394 
7395                 if(0 == blk_id_in_ctb)
7396                 {
7397                     UWORD8 u1_ctr;
7398                     for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7399                                               ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7400                         u1_ctr++)
7401                     {
7402                         WORD32 i4_max_dep_ctb_y;
7403                         WORD32 i4_max_dep_ctb_x;
7404 
7405                         /* Set max mv in ctb units */
7406                         i4_max_mv_x_in_ctb =
7407                             (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7408                             ps_ctxt->log_ctb_size;
7409 
7410                         i4_max_mv_y_in_ctb =
7411                             (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7412                             ps_ctxt->log_ctb_size;
7413                         /********************************************************************/
7414                         /* Set max ctb_x and ctb_y dependency on reference picture          */
7415                         /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
7416                         /********************************************************************/
7417                         i4_max_dep_ctb_x = CLIP3(
7418                             (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7419                             0,
7420                             ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7421                         i4_max_dep_ctb_y = CLIP3(
7422                             (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7423                             0,
7424                             ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7425 
7426                         ihevce_dmgr_map_chk_sync(
7427                             ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7428                             ps_ctxt->thrd_id,
7429                             i4_ctb_x,
7430                             i4_ctb_y,
7431                             i4_max_mv_x_in_ctb,
7432                             i4_max_mv_y_in_ctb);
7433                     }
7434                 }
7435 
7436                 /* Loop across different Ref IDx */
7437                 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7438                 {
7439                     S32 resultid;
7440                     S08 u1_default_ref_id;
7441                     S32 i4_num_srch_cands = 0;
7442                     S32 i4_num_refinement_iterations;
7443                     S32 i4_refine_iter_ctr;
7444 
7445                     if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7446                        (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7447                     {
7448                         u1_pred_dir = u1_pred_dir_ctr;
7449                     }
7450                     else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7451                     {
7452                         u1_pred_dir = 1;
7453                     }
7454 
7455                     u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7456                                                            : ps_ctxt->ai1_future_list[0];
7457                     au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7458 
7459                     i4_num_srch_cands = 0;
7460                     resultid = 0;
7461 
7462                     /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7463                     if(0 == blk_id_in_ctb)
7464                     {
7465                         /*****************************************************************/
7466                         /* Initialize the mv grid with results of neighbours for the next*/
7467                         /* ctb.                                                          */
7468                         /*****************************************************************/
7469                         hme_fill_ctb_neighbour_mvs(
7470                             ps_curr_layer,
7471                             blk_x,
7472                             blk_y,
7473                             aps_mv_grid[u1_pred_dir],
7474                             u1_pred_dir_ctr,
7475                             u1_default_ref_id,
7476                             ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7477                     }
7478 
7479                     s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7480 
7481                     {
7482                         if((blk_id_in_full_ctb % 4) == 0)
7483                         {
7484                             ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7485                                 .as_pred_ctxt[u1_pred_dir]
7486                                 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7487                         }
7488 
7489                         if(blk_id_in_full_ctb == 0)
7490                         {
7491                             ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7492                         }
7493 
7494                         ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7495                             !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7496                     }
7497 
7498                     {
7499                         S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7500                         S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7501                         U08 u1_is_blk_at_ctb_boundary = !y;
7502 
7503                         s_srch_cand_init_data.u1_is_left_available =
7504                             !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7505 
7506                         if(u1_is_blk_at_ctb_boundary)
7507                         {
7508                             s_srch_cand_init_data.u1_is_topRight_available = 0;
7509                             s_srch_cand_init_data.u1_is_topLeft_available = 0;
7510                             s_srch_cand_init_data.u1_is_top_available = 0;
7511                         }
7512                         else
7513                         {
7514                             s_srch_cand_init_data.u1_is_topRight_available =
7515                                 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7516                             s_srch_cand_init_data.u1_is_top_available = 1;
7517                             s_srch_cand_init_data.u1_is_topLeft_available =
7518                                 s_srch_cand_init_data.u1_is_left_available;
7519                         }
7520                     }
7521 
7522                     s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7523                     s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7524                     s_srch_cand_init_data.i4_pos_x = pos_x;
7525                     s_srch_cand_init_data.i4_pos_y = pos_y;
7526                     s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7527                     s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7528                     s_srch_cand_init_data.u1_search_candidate_list_index =
7529                         au1_search_candidate_list_index[u1_pred_dir];
7530 
7531                     i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7532 
7533                     /* Note this block also clips the MV range for all candidates */
7534                     {
7535                         S08 i1_check_for_mult_refs;
7536 
7537                         i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7538                                                              : (ps_ctxt->num_ref_past > 1);
7539 
7540                         ps_me_optimised_function_list->pf_mv_clipper(
7541                             &s_search_prms_blk,
7542                             i4_num_srch_cands,
7543                             i1_check_for_mult_refs,
7544                             ps_refine_prms->i4_num_steps_fpel_refine,
7545                             ps_refine_prms->i4_num_steps_hpel_refine,
7546                             ps_refine_prms->i4_num_steps_qpel_refine);
7547                     }
7548 
7549 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7550                     i4_num_refinement_iterations =
7551                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7552                             ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7553                             : 1;
7554 #else
7555                     i4_num_refinement_iterations =
7556                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7557 #endif
7558 
7559 #if ENABLE_EXPLICIT_SEARCH_IN_PQ
7560                     if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7561                     {
7562                         i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7563                                                                           : i4_num_act_ref_l1;
7564                     }
7565 #endif
7566 
7567                     for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7568                         i4_refine_iter_ctr++)
7569                     {
7570                         S32 center_x;
7571                         S32 center_y;
7572                         S32 center_ref_idx;
7573 
7574                         S08 *pi1_pred_dir_to_ref_idx =
7575                             (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7576 
7577                         {
7578                             WORD32 i4_i;
7579 
7580                             for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7581                             {
7582                                 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7583                                 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7584                                 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7585                                     MAX_SIGNED_16BIT_VAL;
7586                                 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7587                                 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7588                                 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7589 
7590                                 if(ps_refine_prms->i4_num_results_per_part == 2)
7591                                 {
7592                                     ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7593                                         MAX_SIGNED_16BIT_VAL;
7594                                     ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7595                                         MAX_SIGNED_16BIT_VAL;
7596                                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7597                                         MAX_SIGNED_16BIT_VAL;
7598                                     ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7599                                     ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7600                                     ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7601                                 }
7602                             }
7603 
7604                             s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7605                             s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7606                         }
7607 
7608                         {
7609                             search_node_t *ps_coloc_node;
7610 
7611                             S32 i = 0;
7612 
7613                             if(i4_num_refinement_iterations > 1)
7614                             {
7615                                 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7616                                 {
7617                                     ps_coloc_node =
7618                                         s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7619                                             .ps_search_node;
7620 
7621                                     if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7622                                        ps_coloc_node->i1_ref_idx)
7623                                     {
7624                                         break;
7625                                     }
7626                                 }
7627 
7628                                 if(i == ai4_num_coloc_cands[u1_pred_dir])
7629                                 {
7630                                     i = 0;
7631                                 }
7632                             }
7633                             else
7634                             {
7635                                 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7636                                                     .ps_search_node;
7637                             }
7638 
7639                             hme_set_mvp_node(
7640                                 ps_search_results,
7641                                 ps_coloc_node,
7642                                 u1_pred_dir,
7643                                 (i4_num_refinement_iterations > 1)
7644                                     ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7645                                     : u1_default_ref_id);
7646 
7647                             center_x = ps_coloc_node->ps_mv->i2_mvx;
7648                             center_y = ps_coloc_node->ps_mv->i2_mvy;
7649                             center_ref_idx = ps_coloc_node->i1_ref_idx;
7650                         }
7651 
7652                         /* Full-Pel search */
7653                         {
7654                             S32 num_unique_nodes;
7655 
7656                             memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7657 
7658                             num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7659                                 as_unique_search_nodes,
7660                                 s_search_prms_blk.ps_search_candts,
7661                                 au4_unique_node_map,
7662                                 pi1_pred_dir_to_ref_idx,
7663                                 i4_num_srch_cands,
7664                                 s_search_prms_blk.i4_num_init_candts,
7665                                 i4_refine_iter_ctr,
7666                                 i4_num_refinement_iterations,
7667                                 i4_num_act_ref_l0,
7668                                 center_ref_idx,
7669                                 center_x,
7670                                 center_y,
7671                                 ps_ctxt->s_frm_prms.bidir_enabled,
7672                                 e_me_quality_presets);
7673 
7674                             /*************************************************************************/
7675                             /* This array stores the ids of the partitions whose                     */
7676                             /* SADs are updated. Since the partitions whose SADs are updated may not */
7677                             /* be in contiguous order, we supply another level of indirection.       */
7678                             /*************************************************************************/
7679                             ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7680                                 s_search_prms_blk.i4_part_mask,
7681                                 &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7682 
7683                             if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7684                             {
7685                                 S32 i;
7686                                 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7687                                 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7688                                                             (s_search_prms_blk.i4_cu_y_off * 4);
7689 
7690                                 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7691                                 {
7692                                     S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7693 
7694                                     hme_compute_final_sigma_of_pu_from_base_blocks(
7695                                         ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7696                                         ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7697                                         au8_final_src_sigmaX,
7698                                         au8_final_src_sigmaXSquared,
7699                                         16,
7700                                         4,
7701                                         i4_part_id,
7702                                         16);
7703                                 }
7704 
7705                                 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7706                                 s_common_frm_prms.pu8_part_src_sigmaXSquared =
7707                                     au8_final_src_sigmaXSquared;
7708 
7709                                 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7710                                 s_search_prms_blk.pu8_part_src_sigmaXSquared =
7711                                     au8_final_src_sigmaXSquared;
7712                             }
7713 
7714                             if(0 == num_unique_nodes)
7715                             {
7716                                 continue;
7717                             }
7718 
7719                             if(num_unique_nodes >= 2)
7720                             {
7721                                 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7722                                 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7723                                 if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7724                                 {
7725                                     if(ps_ctxt->i4_temporal_layer == 1)
7726                                     {
7727                                         hme_fullpel_cand_sifter(
7728                                             &s_search_prms_blk,
7729                                             ps_curr_layer,
7730                                             &ps_ctxt->s_wt_pred,
7731                                             ALPHA_FOR_NOISE_TERM_IN_ME,
7732                                             u1_is_cu_noisy,
7733                                             ps_me_optimised_function_list);
7734                                     }
7735                                     else
7736                                     {
7737                                         hme_fullpel_cand_sifter(
7738                                             &s_search_prms_blk,
7739                                             ps_curr_layer,
7740                                             &ps_ctxt->s_wt_pred,
7741                                             ALPHA_FOR_NOISE_TERM_IN_ME,
7742                                             u1_is_cu_noisy,
7743                                             ps_me_optimised_function_list);
7744                                     }
7745                                 }
7746                                 else
7747                                 {
7748                                     hme_fullpel_cand_sifter(
7749                                         &s_search_prms_blk,
7750                                         ps_curr_layer,
7751                                         &ps_ctxt->s_wt_pred,
7752                                         ALPHA_FOR_NOISE_TERM_IN_ME_P,
7753                                         u1_is_cu_noisy,
7754                                         ps_me_optimised_function_list);
7755                                 }
7756                             }
7757 
7758                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7759 
7760                             hme_fullpel_refine(
7761                                 ps_refine_prms,
7762                                 &s_search_prms_blk,
7763                                 ps_curr_layer,
7764                                 &ps_ctxt->s_wt_pred,
7765                                 au4_unique_node_map,
7766                                 num_unique_nodes,
7767                                 blk_8x8_mask,
7768                                 center_x,
7769                                 center_y,
7770                                 center_ref_idx,
7771                                 e_me_quality_presets,
7772                                 ps_me_optimised_function_list);
7773                         }
7774 
7775                         /* Sub-Pel search */
7776                         {
7777                             hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7778 
7779                             s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7780                                 &ps_ctxt->s_buf_mgr,
7781                                 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7782                             /* MV limit is different based on ref. PIC */
7783                             for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7784                             {
7785                                 SCALE_RANGE_PRMS(
7786                                     as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7787                                 SCALE_RANGE_PRMS(
7788                                     as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7789                             }
7790                             s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7791                             s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7792 
7793                             hme_subpel_refine_cu_hs(
7794                                 &s_subpel_prms,
7795                                 ps_curr_layer,
7796                                 ps_search_results,
7797                                 u1_pred_dir,
7798                                 &ps_ctxt->s_wt_pred,
7799                                 blk_8x8_mask,
7800                                 ps_ctxt->ps_func_selector,
7801                                 ps_cmn_utils_optimised_function_list,
7802                                 ps_me_optimised_function_list);
7803                         }
7804                     }
7805                 }
7806                 /* Populate the new PU struct with the results post subpel refinement*/
7807                 {
7808                     inter_cu_results_t *ps_cu_results;
7809                     WORD32 best_inter_cost, intra_cost, posx, posy;
7810 
7811                     UWORD8 intra_8x8_enabled = 0;
7812 
7813                     /*  cost of 16x16 cu parent  */
7814                     WORD32 parent_cost = MAX_32BIT_VAL;
7815 
7816                     /*  cost of 8x8 cu children  */
7817                     /*********************************************************************/
7818                     /* Assuming parent is not split, then we signal 1 bit for this parent*/
7819                     /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7820                     /* So, 4*lambda is extra for children cost.                          */
7821                     /*********************************************************************/
7822                     WORD32 child_cost = 0;
7823 
7824                     ps_cu_results = ps_search_results->ps_cu_results;
7825 
7826                     /* Initialize the pu_results pointers to the first struct in the stack array */
7827                     ps_pu_results = as_inter_pu_results;
7828 
7829                     hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7830 
7831                     hme_populate_pus(
7832                         ps_thrd_ctxt,
7833                         ps_ctxt,
7834                         &s_subpel_prms,
7835                         ps_search_results,
7836                         ps_cu_results,
7837                         ps_pu_results,
7838                         &(as_pu_results[0][0][0]),
7839                         &s_common_frm_prms,
7840                         &ps_ctxt->s_wt_pred,
7841                         ps_curr_layer,
7842                         au1_pred_dir_searched,
7843                         i4_num_pred_dir);
7844 
7845                     ps_cu_results->i4_inp_offset =
7846                         (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7847 
7848                     hme_decide_part_types(
7849                         ps_cu_results,
7850                         ps_pu_results,
7851                         &s_common_frm_prms,
7852                         ps_ctxt,
7853                         ps_cmn_utils_optimised_function_list,
7854                         ps_me_optimised_function_list
7855 
7856                     );
7857 
7858                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7859                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7860                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7861                     {
7862                         WORD32 res_ctr;
7863 
7864                         for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7865                         {
7866                             WORD32 num_part = 2, part_ctr;
7867                             part_type_results_t *ps_best_results =
7868                                 &ps_cu_results->ps_best_results[res_ctr];
7869 
7870                             if(PRT_2Nx2N == ps_best_results->u1_part_type)
7871                                 num_part = 1;
7872 
7873                             for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7874                             {
7875                                 pu_result_t *ps_pu_results =
7876                                     &ps_best_results->as_pu_results[part_ctr];
7877 
7878                                 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7879 
7880                                 hme_update_dynamic_search_params(
7881                                     &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7882                                          .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7883                                     ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7884 
7885                                 /* Sanity Check */
7886                                 ASSERT(
7887                                     ps_pu_results->pu.mv.i1_l0_ref_idx <
7888                                     ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7889 
7890                                 /* No L1 for P Pic. */
7891                                 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7892                                 /* No BI for P Pic. */
7893                                 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7894                             }
7895                         }
7896                     }
7897 
7898                     /*****************************************************************/
7899                     /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
7900                     /*****************************************************************/
7901 
7902 #if DISABLE_INTRA_IN_BPICS
7903                     if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7904                              (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7905 #endif
7906                     {
7907                         if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7908                         {
7909                             hme_insert_intra_nodes_post_bipred(
7910                                 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7911                         }
7912                     }
7913 
7914 #if DISABLE_INTRA_IN_BPICS
7915                     if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7916                        (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7917                     {
7918                         intra_8x8_enabled = 0;
7919                     }
7920                     else
7921 #endif
7922                     {
7923                         /*TRAQO intra flag updation*/
7924                         if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7925                         {
7926                             best_inter_cost =
7927                                 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7928                             intra_cost =
7929                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7930                             /*@16x16 level*/
7931                             posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7932                                     << 2) >>
7933                                    4;
7934                             posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7935                                     << 2) >>
7936                                    4;
7937                         }
7938                         else
7939                         {
7940                             best_inter_cost =
7941                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7942                             posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7943                                     << 2) >>
7944                                    3;
7945                             posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7946                                     << 2) >>
7947                                    3;
7948                         }
7949 
7950                         /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7951                         if(ps_cur_ipe_ctb->u1_split_flag)
7952                         {
7953                             /* Id of the 32x32 block, 16x16 block in a CTB */
7954                             WORD32 i4_32x32_id =
7955                                 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7956                             WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7957                                                  ((ps_cu_results->u1_x_off >> 4) & 0x1);
7958 
7959                             if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7960                             {
7961                                 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7962                                        .as_intra16_analyse[i4_16x16_id]
7963                                        .b1_split_flag)
7964                                 {
7965                                     intra_8x8_enabled =
7966                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7967                                             .as_intra16_analyse[i4_16x16_id]
7968                                             .as_intra8_analyse[0]
7969                                             .b1_valid_cu;
7970                                     intra_8x8_enabled &=
7971                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7972                                             .as_intra16_analyse[i4_16x16_id]
7973                                             .as_intra8_analyse[1]
7974                                             .b1_valid_cu;
7975                                     intra_8x8_enabled &=
7976                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7977                                             .as_intra16_analyse[i4_16x16_id]
7978                                             .as_intra8_analyse[2]
7979                                             .b1_valid_cu;
7980                                     intra_8x8_enabled &=
7981                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7982                                             .as_intra16_analyse[i4_16x16_id]
7983                                             .as_intra8_analyse[3]
7984                                             .b1_valid_cu;
7985                                 }
7986                             }
7987                         }
7988                     }
7989 
7990                     if(blk_8x8_mask == 0xf)
7991                     {
7992                         parent_cost =
7993                             ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
7994                         ps_search_results->u1_split_flag = 0;
7995                     }
7996                     else
7997                     {
7998                         ps_search_results->u1_split_flag = 1;
7999                     }
8000 
8001                     ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8002 
8003                     if(s_common_frm_prms.u1_is_cu_noisy)
8004                     {
8005                         intra_8x8_enabled = 0;
8006                     }
8007 
8008                     /* Evalaute 8x8 if NxN part id is enabled */
8009                     if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8010                     {
8011                         /* Populates the PU's for the 4 8x8's in one call */
8012                         hme_populate_pus_8x8_cu(
8013                             ps_thrd_ctxt,
8014                             ps_ctxt,
8015                             &s_subpel_prms,
8016                             ps_search_results,
8017                             ps_cu_results,
8018                             ps_pu_results,
8019                             &(as_pu_results[0][0][0]),
8020                             &s_common_frm_prms,
8021                             au1_pred_dir_searched,
8022                             i4_num_pred_dir,
8023                             blk_8x8_mask);
8024 
8025                         /* Re-initialize the pu_results pointers to the first struct in the stack array */
8026                         ps_pu_results = as_inter_pu_results;
8027 
8028                         for(i = 0; i < 4; i++)
8029                         {
8030                             if((blk_8x8_mask & (1 << i)))
8031                             {
8032                                 if(ps_cu_results->i4_part_mask)
8033                                 {
8034                                     hme_decide_part_types(
8035                                         ps_cu_results,
8036                                         ps_pu_results,
8037                                         &s_common_frm_prms,
8038                                         ps_ctxt,
8039                                         ps_cmn_utils_optimised_function_list,
8040                                         ps_me_optimised_function_list
8041 
8042                                     );
8043                                 }
8044                                 /*****************************************************************/
8045                                 /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
8046                                 /*****************************************************************/
8047 #if DISABLE_INTRA_IN_BPICS
8048                                 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8049                                          (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8050                                           TEMPORAL_LAYER_DISABLE)))
8051 #endif
8052                                 {
8053                                     if(!(DISABLE_INTRA_WHEN_NOISY &&
8054                                          s_common_frm_prms.u1_is_cu_noisy))
8055                                     {
8056                                         hme_insert_intra_nodes_post_bipred(
8057                                             ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8058                                     }
8059                                 }
8060 
8061                                 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8062                             }
8063 
8064                             ps_cu_results++;
8065                             ps_pu_results++;
8066                         }
8067 
8068                         /* Compare 16x16 vs 8x8 cost */
8069                         if(child_cost < parent_cost)
8070                         {
8071                             ps_search_results->best_cu_cost = child_cost;
8072                             ps_search_results->u1_split_flag = 1;
8073                         }
8074                     }
8075                 }
8076 
8077                 hme_update_mv_bank_encode(
8078                     ps_search_results,
8079                     ps_curr_layer->ps_layer_mvbank,
8080                     blk_x,
8081                     blk_y,
8082                     &s_mv_update_prms,
8083                     au1_pred_dir_searched,
8084                     i4_num_act_ref_l0);
8085 
8086                 /*********************************************************************/
8087                 /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
8088                 /* useful for doing things like predictor for cost calculation or    */
8089                 /* also for merge calculations if need be.                           */
8090                 /*********************************************************************/
8091                 hme_map_mvs_to_grid(
8092                     &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8093             }
8094 
8095             /* Set the CU tree nodes appropriately */
8096             if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8097             {
8098                 WORD32 i, j;
8099 
8100                 for(i = 0; i < 16; i++)
8101                 {
8102                     cur_ctb_cu_tree_t *ps_tree_node =
8103                         ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8104                     search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8105 
8106                     switch(i >> 2)
8107                     {
8108                     case 0:
8109                     {
8110                         ps_tree_node = ps_tree_node->ps_child_node_tl;
8111 
8112                         break;
8113                     }
8114                     case 1:
8115                     {
8116                         ps_tree_node = ps_tree_node->ps_child_node_tr;
8117 
8118                         break;
8119                     }
8120                     case 2:
8121                     {
8122                         ps_tree_node = ps_tree_node->ps_child_node_bl;
8123 
8124                         break;
8125                     }
8126                     case 3:
8127                     {
8128                         ps_tree_node = ps_tree_node->ps_child_node_br;
8129 
8130                         break;
8131                     }
8132                     }
8133 
8134                     switch(i % 4)
8135                     {
8136                     case 0:
8137                     {
8138                         ps_tree_node = ps_tree_node->ps_child_node_tl;
8139 
8140                         break;
8141                     }
8142                     case 1:
8143                     {
8144                         ps_tree_node = ps_tree_node->ps_child_node_tr;
8145 
8146                         break;
8147                     }
8148                     case 2:
8149                     {
8150                         ps_tree_node = ps_tree_node->ps_child_node_bl;
8151 
8152                         break;
8153                     }
8154                     case 3:
8155                     {
8156                         ps_tree_node = ps_tree_node->ps_child_node_br;
8157 
8158                         break;
8159                     }
8160                     }
8161 
8162                     if(ai4_blk_8x8_mask[i] == 15)
8163                     {
8164                         if(!ps_results->u1_split_flag)
8165                         {
8166                             ps_tree_node->is_node_valid = 1;
8167                             NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8168                         }
8169                         else
8170                         {
8171                             ps_tree_node->is_node_valid = 0;
8172                             ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8173                         }
8174                     }
8175                     else
8176                     {
8177                         cur_ctb_cu_tree_t *ps_tree_child;
8178 
8179                         ps_tree_node->is_node_valid = 0;
8180 
8181                         for(j = 0; j < 4; j++)
8182                         {
8183                             switch(j)
8184                             {
8185                             case 0:
8186                             {
8187                                 ps_tree_child = ps_tree_node->ps_child_node_tl;
8188 
8189                                 break;
8190                             }
8191                             case 1:
8192                             {
8193                                 ps_tree_child = ps_tree_node->ps_child_node_tr;
8194 
8195                                 break;
8196                             }
8197                             case 2:
8198                             {
8199                                 ps_tree_child = ps_tree_node->ps_child_node_bl;
8200 
8201                                 break;
8202                             }
8203                             case 3:
8204                             {
8205                                 ps_tree_child = ps_tree_node->ps_child_node_br;
8206 
8207                                 break;
8208                             }
8209                             }
8210 
8211                             ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8212                         }
8213                     }
8214                 }
8215             }
8216 
8217             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8218             {
8219                 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8220 
8221                 hme_analyse_mv_clustering(
8222                     ps_ctxt->as_search_results_16x16,
8223                     ps_ctxt->as_cu16x16_results,
8224                     ps_ctxt->as_cu8x8_results,
8225                     ps_ctxt->ps_ctb_cluster_info,
8226                     ps_ctxt->ai1_future_list,
8227                     ps_ctxt->ai1_past_list,
8228                     ps_ctxt->s_frm_prms.bidir_enabled,
8229                     e_me_quality_presets);
8230 
8231 #if DISABLE_BLK_MERGE_WHEN_NOISY
8232                 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8233                 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8234                 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8235                 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8236                 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8237                 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8238                 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8239                 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8240                 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8241                 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8242 #endif
8243 
8244                 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8245                                  (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8246                                  (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8247                                  (ps_tree->ps_child_node_br->is_node_valid << 3);
8248 
8249                 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8250                                      (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8251                                      (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8252                                      (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8253                                      (ps_tree->u1_inter_eval_enable << 4);
8254             }
8255             else
8256             {
8257                 en_merge_execution = 0x1f;
8258 
8259 #if DISABLE_BLK_MERGE_WHEN_NOISY
8260                 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8261                                  ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8262                                  ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8263                                  ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8264 #endif
8265             }
8266 
8267             /* Re-initialize the pu_results pointers to the first struct in the stack array */
8268             ps_pu_results = as_inter_pu_results;
8269 
8270             {
8271                 WORD32 ref_ctr;
8272 
8273                 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8274                 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8275 
8276                 /* MV limit is different based on ref. PIC */
8277                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8278                 {
8279                     SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8280                     SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8281                 }
8282 
8283                 e_merge_result = CU_SPLIT;
8284                 merge_count_32x32 = 0;
8285 
8286                 if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8287                 {
8288                     range_prms_t *ps_pic_limit;
8289                     if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8290                     {
8291                         ps_pic_limit = &s_pic_limit_rec;
8292                     }
8293                     else
8294                     {
8295                         ps_pic_limit = &s_pic_limit_inp;
8296                     }
8297                     /* MV limit is different based on ref. PIC */
8298                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8299                     {
8300                         hme_derive_search_range(
8301                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8302                             ps_pic_limit,
8303                             &as_mv_limit[ref_ctr],
8304                             i4_ctb_x << 6,
8305                             i4_ctb_y << 6,
8306                             32,
8307                             32);
8308 
8309                         SCALE_RANGE_PRMS_POINTERS(
8310                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8311                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8312                             2);
8313                     }
8314                     s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8315                     s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8316                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8317 
8318                     e_merge_result = hme_try_merge_high_speed(
8319                         ps_thrd_ctxt,
8320                         ps_ctxt,
8321                         ps_cur_ipe_ctb,
8322                         &s_subpel_prms,
8323                         &s_merge_prms_32x32_tl,
8324                         ps_pu_results,
8325                         &as_pu_results[0][0][0]);
8326 
8327                     if(e_merge_result == CU_MERGED)
8328                     {
8329                         inter_cu_results_t *ps_cu_results =
8330                             s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8331 
8332                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8333                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8334                         {
8335                             hme_map_mvs_to_grid(
8336                                 &aps_mv_grid[0],
8337                                 s_merge_prms_32x32_tl.ps_results_merge,
8338                                 s_merge_prms_32x32_tl.au1_pred_dir_searched,
8339                                 s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8340                         }
8341 
8342                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8343                         {
8344                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8345                                 .ps_child_node_tl->is_node_valid = 1;
8346                             NULLIFY_THE_CHILDREN_NODES(
8347                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8348                                     .ps_child_node_tl);
8349                         }
8350 
8351                         merge_count_32x32++;
8352                         e_merge_result = CU_SPLIT;
8353                     }
8354                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8355                     {
8356 #if ENABLE_CU_TREE_CULLING
8357                         cur_ctb_cu_tree_t *ps_tree =
8358                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8359 
8360                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8361                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8362                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8363                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8364                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8365                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8366                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8367 #endif
8368                     }
8369                 }
8370                 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8371                 {
8372 #if ENABLE_CU_TREE_CULLING
8373                     cur_ctb_cu_tree_t *ps_tree =
8374                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375 
8376                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8377                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8378                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8379                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8380                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8381 #endif
8382 
8383                     if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8384                     {
8385                         ps_tree->is_node_valid = 0;
8386                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8387                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8388                     }
8389                 }
8390 
8391                 if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8392                 {
8393                     range_prms_t *ps_pic_limit;
8394                     if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8395                     {
8396                         ps_pic_limit = &s_pic_limit_rec;
8397                     }
8398                     else
8399                     {
8400                         ps_pic_limit = &s_pic_limit_inp;
8401                     }
8402                     /* MV limit is different based on ref. PIC */
8403                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8404                     {
8405                         hme_derive_search_range(
8406                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8407                             ps_pic_limit,
8408                             &as_mv_limit[ref_ctr],
8409                             (i4_ctb_x << 6) + 32,
8410                             i4_ctb_y << 6,
8411                             32,
8412                             32);
8413                         SCALE_RANGE_PRMS_POINTERS(
8414                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8415                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8416                             2);
8417                     }
8418                     s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8419                     s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8420                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8421 
8422                     e_merge_result = hme_try_merge_high_speed(
8423                         ps_thrd_ctxt,
8424                         ps_ctxt,
8425                         ps_cur_ipe_ctb,
8426                         &s_subpel_prms,
8427                         &s_merge_prms_32x32_tr,
8428                         ps_pu_results,
8429                         &as_pu_results[0][0][0]);
8430 
8431                     if(e_merge_result == CU_MERGED)
8432                     {
8433                         inter_cu_results_t *ps_cu_results =
8434                             s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8435 
8436                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8437                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8438                         {
8439                             hme_map_mvs_to_grid(
8440                                 &aps_mv_grid[0],
8441                                 s_merge_prms_32x32_tr.ps_results_merge,
8442                                 s_merge_prms_32x32_tr.au1_pred_dir_searched,
8443                                 s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8444                         }
8445 
8446                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8447                         {
8448                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8449                                 .ps_child_node_tr->is_node_valid = 1;
8450                             NULLIFY_THE_CHILDREN_NODES(
8451                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8452                                     .ps_child_node_tr);
8453                         }
8454 
8455                         merge_count_32x32++;
8456                         e_merge_result = CU_SPLIT;
8457                     }
8458                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8459                     {
8460 #if ENABLE_CU_TREE_CULLING
8461                         cur_ctb_cu_tree_t *ps_tree =
8462                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8463 
8464                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8465                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8466                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8467                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8468                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8469                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8470                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8471 #endif
8472                     }
8473                 }
8474                 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8475                 {
8476 #if ENABLE_CU_TREE_CULLING
8477                     cur_ctb_cu_tree_t *ps_tree =
8478                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479 
8480                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8481                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8482                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8483                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8484                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8485 #endif
8486 
8487                     if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8488                     {
8489                         ps_tree->is_node_valid = 0;
8490                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8491                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8492                     }
8493                 }
8494 
8495                 if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8496                 {
8497                     range_prms_t *ps_pic_limit;
8498                     if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8499                     {
8500                         ps_pic_limit = &s_pic_limit_rec;
8501                     }
8502                     else
8503                     {
8504                         ps_pic_limit = &s_pic_limit_inp;
8505                     }
8506                     /* MV limit is different based on ref. PIC */
8507                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8508                     {
8509                         hme_derive_search_range(
8510                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8511                             ps_pic_limit,
8512                             &as_mv_limit[ref_ctr],
8513                             i4_ctb_x << 6,
8514                             (i4_ctb_y << 6) + 32,
8515                             32,
8516                             32);
8517                         SCALE_RANGE_PRMS_POINTERS(
8518                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8519                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8520                             2);
8521                     }
8522                     s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8523                     s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8524                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8525 
8526                     e_merge_result = hme_try_merge_high_speed(
8527                         ps_thrd_ctxt,
8528                         ps_ctxt,
8529                         ps_cur_ipe_ctb,
8530                         &s_subpel_prms,
8531                         &s_merge_prms_32x32_bl,
8532                         ps_pu_results,
8533                         &as_pu_results[0][0][0]);
8534 
8535                     if(e_merge_result == CU_MERGED)
8536                     {
8537                         inter_cu_results_t *ps_cu_results =
8538                             s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8539 
8540                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8541                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8542                         {
8543                             hme_map_mvs_to_grid(
8544                                 &aps_mv_grid[0],
8545                                 s_merge_prms_32x32_bl.ps_results_merge,
8546                                 s_merge_prms_32x32_bl.au1_pred_dir_searched,
8547                                 s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8548                         }
8549 
8550                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8551                         {
8552                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8553                                 .ps_child_node_bl->is_node_valid = 1;
8554                             NULLIFY_THE_CHILDREN_NODES(
8555                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8556                                     .ps_child_node_bl);
8557                         }
8558 
8559                         merge_count_32x32++;
8560                         e_merge_result = CU_SPLIT;
8561                     }
8562                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8563                     {
8564 #if ENABLE_CU_TREE_CULLING
8565                         cur_ctb_cu_tree_t *ps_tree =
8566                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8567 
8568                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8569                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8570                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8571                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8572                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8573                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8574                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8575 #endif
8576                     }
8577                 }
8578                 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8579                 {
8580 #if ENABLE_CU_TREE_CULLING
8581                     cur_ctb_cu_tree_t *ps_tree =
8582                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583 
8584                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8585                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8586                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8587                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8588                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8589 #endif
8590 
8591                     if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8592                     {
8593                         ps_tree->is_node_valid = 0;
8594                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8595                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8596                     }
8597                 }
8598 
8599                 if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8600                 {
8601                     range_prms_t *ps_pic_limit;
8602                     if(s_merge_prms_32x32_br.i4_use_rec == 1)
8603                     {
8604                         ps_pic_limit = &s_pic_limit_rec;
8605                     }
8606                     else
8607                     {
8608                         ps_pic_limit = &s_pic_limit_inp;
8609                     }
8610                     /* MV limit is different based on ref. PIC */
8611                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8612                     {
8613                         hme_derive_search_range(
8614                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8615                             ps_pic_limit,
8616                             &as_mv_limit[ref_ctr],
8617                             (i4_ctb_x << 6) + 32,
8618                             (i4_ctb_y << 6) + 32,
8619                             32,
8620                             32);
8621 
8622                         SCALE_RANGE_PRMS_POINTERS(
8623                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8624                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8625                             2);
8626                     }
8627                     s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8628                     s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8629                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8630 
8631                     e_merge_result = hme_try_merge_high_speed(
8632                         ps_thrd_ctxt,
8633                         ps_ctxt,
8634                         ps_cur_ipe_ctb,
8635                         &s_subpel_prms,
8636                         &s_merge_prms_32x32_br,
8637                         ps_pu_results,
8638                         &as_pu_results[0][0][0]);
8639 
8640                     if(e_merge_result == CU_MERGED)
8641                     {
8642                         /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8643 
8644                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8645                         (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8646                         {
8647                         hme_map_mvs_to_grid
8648                         (
8649                         &aps_mv_grid[0],
8650                         s_merge_prms_32x32_br.ps_results_merge,
8651                         s_merge_prms_32x32_br.au1_pred_dir_searched,
8652                         s_merge_prms_32x32_br.i4_num_pred_dir_actual
8653                         );
8654                         }*/
8655 
8656                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8657                         {
8658                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8659                                 .ps_child_node_br->is_node_valid = 1;
8660                             NULLIFY_THE_CHILDREN_NODES(
8661                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8662                                     .ps_child_node_br);
8663                         }
8664 
8665                         merge_count_32x32++;
8666                         e_merge_result = CU_SPLIT;
8667                     }
8668                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8669                     {
8670 #if ENABLE_CU_TREE_CULLING
8671                         cur_ctb_cu_tree_t *ps_tree =
8672                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8673 
8674                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8675                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8676                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8677                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8678                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8679                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8680                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8681 #endif
8682                     }
8683                 }
8684                 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8685                 {
8686 #if ENABLE_CU_TREE_CULLING
8687                     cur_ctb_cu_tree_t *ps_tree =
8688                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689 
8690                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8691                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8692                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8693                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8694                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8695 #endif
8696 
8697                     if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8698                     {
8699                         ps_tree->is_node_valid = 0;
8700                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8701                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8702                     }
8703                 }
8704 
8705                 /* Try merging all 32x32 to 64x64 candts */
8706                 if(((en_merge_32x32 & 0xf) == 0xf) &&
8707                    (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8708                     ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8709                     if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8710                          !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8711                         (e_me_quality_presets != ME_XTREME_SPEED_25)))
8712                     {
8713                         range_prms_t *ps_pic_limit;
8714                         if(s_merge_prms_64x64.i4_use_rec == 1)
8715                         {
8716                             ps_pic_limit = &s_pic_limit_rec;
8717                         }
8718                         else
8719                         {
8720                             ps_pic_limit = &s_pic_limit_inp;
8721                         }
8722                         /* MV limit is different based on ref. PIC */
8723                         for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8724                         {
8725                             hme_derive_search_range(
8726                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8727                                 ps_pic_limit,
8728                                 &as_mv_limit[ref_ctr],
8729                                 i4_ctb_x << 6,
8730                                 i4_ctb_y << 6,
8731                                 64,
8732                                 64);
8733 
8734                             SCALE_RANGE_PRMS_POINTERS(
8735                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8736                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8737                                 2);
8738                         }
8739                         s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8740                         s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8741                         s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8742 
8743                         e_merge_result = hme_try_merge_high_speed(
8744                             ps_thrd_ctxt,
8745                             ps_ctxt,
8746                             ps_cur_ipe_ctb,
8747                             &s_subpel_prms,
8748                             &s_merge_prms_64x64,
8749                             ps_pu_results,
8750                             &as_pu_results[0][0][0]);
8751 
8752                         if((e_merge_result == CU_MERGED) &&
8753                            (ME_PRISTINE_QUALITY != e_me_quality_presets))
8754                         {
8755                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8756                                 .is_node_valid = 1;
8757                             NULLIFY_THE_CHILDREN_NODES(
8758                                 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8759                         }
8760                         else if(
8761                             (e_merge_result == CU_SPLIT) &&
8762                             (ME_PRISTINE_QUALITY == e_me_quality_presets))
8763                         {
8764                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8765                                 .is_node_valid = 0;
8766                         }
8767                     }
8768 
8769                 /*****************************************************************/
8770                 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
8771                 /*****************************************************************/
8772                 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8773 
8774                 {
8775 #ifdef _DEBUG
8776                     S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8777                                  ? 64
8778                                  : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8779                     S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8780                                  ? 64
8781                                  : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8782                     ASSERT(
8783                         (wd * ht) ==
8784                         ihevce_compute_area_of_valid_cus_in_ctb(
8785                             &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8786 #endif
8787                 }
8788             }
8789 
8790             /* set the dependency for the corresponding row in enc loop */
8791             ihevce_dmgr_set_row_row_sync(
8792                 pv_dep_mngr_encloop_dep_me,
8793                 (i4_ctb_x + 1),
8794                 i4_ctb_y,
8795                 tile_col_idx /* Col Tile No. */);
8796 
8797             left_ctb_in_diff_tile = 0;
8798         }
8799     }
8800 }
8801 
8802 /**
8803 ********************************************************************************
8804 *  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8805 *                       refine_layer_prms_t *ps_refine_prms)
8806 *
8807 *  @brief  Top level entry point for refinement ME
8808 *
8809 *  @param[in,out]  ps_ctxt: ME Handle
8810 *
8811 *  @param[in]  ps_refine_prms : refinement layer prms
8812 *
8813 *  @return None
8814 ********************************************************************************
8815 */
hme_refine_no_encode(coarse_me_ctxt_t * ps_ctxt,refine_prms_t * ps_refine_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)8816 void hme_refine_no_encode(
8817     coarse_me_ctxt_t *ps_ctxt,
8818     refine_prms_t *ps_refine_prms,
8819     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8820     S32 lyr_job_type,
8821     WORD32 i4_ping_pong,
8822     void **ppv_dep_mngr_hme_sync)
8823 {
8824     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8825     ME_QUALITY_PRESETS_T e_me_quality_presets =
8826         ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8827 
8828     /*************************************************************************/
8829     /* Complexity of search: Low to High                                     */
8830     /*************************************************************************/
8831     SEARCH_COMPLEXITY_T e_search_complexity;
8832 
8833     /*************************************************************************/
8834     /* Config parameter structures for varius ME submodules                  */
8835     /*************************************************************************/
8836     hme_search_prms_t s_search_prms_blk;
8837     mvbank_update_prms_t s_mv_update_prms;
8838 
8839     /*************************************************************************/
8840     /* All types of search candidates for predictor based search.            */
8841     /*************************************************************************/
8842     S32 num_init_candts = 0;
8843     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8844     search_node_t as_top_neighbours[4], as_left_neighbours[3];
8845     search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8846     search_node_t *ps_candt_l, *ps_candt_t;
8847     search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8848     search_node_t *ps_candt_prj_bl[2];
8849     search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8850     search_node_t *ps_candt_prj_coloc[2];
8851 
8852     pf_get_wt_inp fp_get_wt_inp;
8853 
8854     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8855     U32 au4_unique_node_map[MAP_X_MAX * 2];
8856 
8857     /*EIID */
8858     WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
8859     WORD32 i4_num_comparisions = 0;  //debug code
8860     WORD32 i4_threshold_multiplier;
8861     WORD32 i4_threshold_divider;
8862     WORD32 i4_temporal_layer =
8863         ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8864 
8865     /*************************************************************************/
8866     /* points ot the search results for the blk level search (8x8/16x16)     */
8867     /*************************************************************************/
8868     search_results_t *ps_search_results;
8869 
8870     /*************************************************************************/
8871     /* Coordinates                                                           */
8872     /*************************************************************************/
8873     S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8874     //S32 i4_ctb_y;
8875     S32 pos_x, pos_y;
8876     S32 blk_id_in_full_ctb;
8877     S32 i4_num_srch_cands;
8878 
8879     S32 blk_y;
8880 
8881     /*************************************************************************/
8882     /* Related to dimensions of block being searched and pic dimensions      */
8883     /*************************************************************************/
8884     S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8885     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8886     S32 num_results_prev_layer;
8887 
8888     /*************************************************************************/
8889     /* Size of a basic unit for this layer. For non encode layers, we search */
8890     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8891     /* basic unit size is the ctb size.                                      */
8892     /*************************************************************************/
8893     S32 unit_size;
8894 
8895     /*************************************************************************/
8896     /* Pointers to context in current and coarser layers                     */
8897     /*************************************************************************/
8898     layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8899 
8900     /*************************************************************************/
8901     /* to store mv range per blk, and picture limit, allowed search range    */
8902     /* range prms in hpel and qpel units as well                             */
8903     /*************************************************************************/
8904     range_prms_t s_range_prms_inp, s_range_prms_rec;
8905     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8906     /*************************************************************************/
8907     /* These variables are used to track number of references at different   */
8908     /* stages of ME.                                                         */
8909     /*************************************************************************/
8910     S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8911     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8912     S32 lambda_inp = ps_refine_prms->lambda_inp;
8913 
8914     /*************************************************************************/
8915     /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8916     /* Explicit means it searches on all active ref idx.                     */
8917     /*************************************************************************/
8918     S32 curr_layer_implicit, prev_layer_implicit;
8919 
8920     /*************************************************************************/
8921     /* Variables for loop counts                                             */
8922     /*************************************************************************/
8923     S32 id;
8924     S08 i1_ref_idx;
8925 
8926     /*************************************************************************/
8927     /* Input pointer and stride                                              */
8928     /*************************************************************************/
8929     U08 *pu1_inp;
8930     S32 i4_inp_stride;
8931 
8932     S32 end_of_frame;
8933 
8934     S32 num_sync_units_in_row;
8935 
8936     PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8937     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8938 
8939     /*************************************************************************/
8940     /* Pointers to current and coarse layer are needed for projection */
8941     /* Pointer to prev layer are needed for other candts like coloc   */
8942     /*************************************************************************/
8943     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8944 
8945     ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8946 
8947     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8948 
8949     /* Function pointer is selected based on the C vc X86 macro */
8950 
8951     fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8952                         ->pf_get_wt_inp_8x8;
8953 
8954     i4_inp_stride = ps_curr_layer->i4_inp_stride;
8955     i4_pic_wd = ps_curr_layer->i4_wd;
8956     i4_pic_ht = ps_curr_layer->i4_ht;
8957     e_search_complexity = ps_refine_prms->e_search_complexity;
8958 
8959     end_of_frame = 0;
8960 
8961     /* If the previous layer is non-encode layer, then use dyadic projection */
8962     if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8963         pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8964     else
8965         pf_hme_project_coloc_candt = hme_project_coloc_candt;
8966 
8967     /* This points to all the initial candts */
8968     ps_search_candts = &as_search_candts[0];
8969 
8970     {
8971         e_search_blk_size = BLK_8x8;
8972         blk_wd = blk_ht = 8;
8973         blk_size_shift = 3;
8974         s_mv_update_prms.i4_shift = 0;
8975         /*********************************************************************/
8976         /* In case we do not encode this layer, we search 8x8 with or without*/
8977         /* enable 4x4 SAD.                                                   */
8978         /*********************************************************************/
8979         {
8980             S32 i4_mask = (ENABLE_2Nx2N);
8981 
8982             e_result_blk_size = BLK_8x8;
8983             if(ps_refine_prms->i4_enable_4x4_part)
8984             {
8985                 i4_mask |= (ENABLE_NxN);
8986                 e_result_blk_size = BLK_4x4;
8987                 s_mv_update_prms.i4_shift = 1;
8988             }
8989 
8990             s_search_prms_blk.i4_part_mask = i4_mask;
8991         }
8992 
8993         unit_size = blk_wd;
8994         s_search_prms_blk.i4_inp_stride = unit_size;
8995     }
8996 
8997     /* This is required to properly update the layer mv bank */
8998     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
8999     s_search_prms_blk.e_blk_size = e_search_blk_size;
9000 
9001     /*************************************************************************/
9002     /* If current layer is explicit, then the number of ref frames are to    */
9003     /* be same as previous layer. Else it will be 2                          */
9004     /*************************************************************************/
9005     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9006     if(ps_refine_prms->explicit_ref)
9007     {
9008         curr_layer_implicit = 0;
9009         i4_num_ref_fpel = i4_num_ref_prev_layer;
9010         /* 100578 : Using same mv cost fun. for all presets. */
9011         s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9012     }
9013     else
9014     {
9015         i4_num_ref_fpel = 2;
9016         curr_layer_implicit = 1;
9017         {
9018             if(ME_MEDIUM_SPEED > e_me_quality_presets)
9019             {
9020                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9021             }
9022             else
9023             {
9024 #if USE_MODIFIED == 1
9025                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9026 #else
9027                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9028 #endif
9029             }
9030         }
9031     }
9032 
9033     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9034     if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9035            IV_IDR_FRAME ||
9036        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9037     {
9038         i4_num_ref_fpel = 1;
9039     }
9040     if(i4_num_ref_prev_layer <= 2)
9041     {
9042         prev_layer_implicit = 1;
9043         curr_layer_implicit = 1;
9044         i4_num_ref_each_dir = 1;
9045     }
9046     else
9047     {
9048         /* It is assumed that we have equal number of references in each dir */
9049         //ASSERT(!(i4_num_ref_prev_layer & 1));
9050         prev_layer_implicit = 0;
9051         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9052     }
9053     s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9054     s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9055     s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9056 
9057     /* this can be kept to 1 or 2 */
9058     i4_num_ref_before_merge = 2;
9059     i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9060 
9061     /* Set up place holders to hold the search nodes of each initial candt */
9062     for(i = 0; i < MAX_INIT_CANDTS; i++)
9063     {
9064         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9065         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9066     }
9067 
9068     /* redundant, but doing it here since it is used in pred ctxt init */
9069     ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9070     for(i = 0; i < 3; i++)
9071     {
9072         search_node_t *ps_search_node;
9073         ps_search_node = &as_left_neighbours[i];
9074         INIT_SEARCH_NODE(ps_search_node, 0);
9075         ps_search_node = &as_top_neighbours[i];
9076         INIT_SEARCH_NODE(ps_search_node, 0);
9077     }
9078 
9079     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9080     /* bottom left node always not available for the blk being searched */
9081     as_left_neighbours[2].u1_is_avail = 0;
9082     /*************************************************************************/
9083     /* Initialize all the search results structure here. We update all the   */
9084     /* search results to default values, and configure things like blk sizes */
9085     /*************************************************************************/
9086     if(ps_refine_prms->i4_encode == 0)
9087     {
9088         S32 pred_lx;
9089         search_results_t *ps_search_results;
9090 
9091         ps_search_results = &ps_ctxt->s_search_results_8x8;
9092         hme_init_search_results(
9093             ps_search_results,
9094             i4_num_ref_fpel,
9095             ps_refine_prms->i4_num_fpel_results,
9096             ps_refine_prms->i4_num_results_per_part,
9097             e_search_blk_size,
9098             0,
9099             0,
9100             &ps_ctxt->au1_is_past[0]);
9101         for(pred_lx = 0; pred_lx < 2; pred_lx++)
9102         {
9103             hme_init_pred_ctxt_no_encode(
9104                 &ps_search_results->as_pred_ctxt[pred_lx],
9105                 ps_search_results,
9106                 &as_top_neighbours[0],
9107                 &as_left_neighbours[0],
9108                 &ps_candt_prj_coloc[0],
9109                 ps_candt_zeromv,
9110                 ps_candt_zeromv,
9111                 pred_lx,
9112                 lambda_inp,
9113                 ps_refine_prms->lambda_q_shift,
9114                 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9115                 &ps_ctxt->ai2_ref_scf[0]);
9116         }
9117     }
9118 
9119     /*********************************************************************/
9120     /* Initialize the dyn. search range params. for each reference index */
9121     /* in current layer ctxt                                             */
9122     /*********************************************************************/
9123     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9124     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9125     {
9126         WORD32 ref_ctr;
9127 
9128         for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9129         {
9130             INIT_DYN_SEARCH_PRMS(
9131                 &ps_ctxt->s_coarse_dyn_range_prms
9132                      .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9133                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9134         }
9135     }
9136 
9137     /* Next set up initial candidates according to a given set of rules.   */
9138     /* The number of initial candidates affects the quality of ME in the   */
9139     /* case of motion with multiple degrees of freedom. In case of simple  */
9140     /* translational motion, a current and a few causal and non causal     */
9141     /* candts would suffice. More candidates help to cover more complex    */
9142     /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9143     /* where multiple ref helps etc.                                       */
9144     /* The candidate choice also depends on the following parameters.      */
9145     /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
9146     /* Whether we encode or not, and the type of search across reference   */
9147     /* i.e. the previous layer may have been explicit/implicit and curr    */
9148     /* layer may be explicit/implicit                                      */
9149 
9150     /* 0, 0, L, T, projected coloc best always presnt by default */
9151     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9152     ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9153     ps_search_candts[id].u1_num_steps_refine = 0;
9154     ps_candt_zeromv->s_mv.i2_mvx = 0;
9155     ps_candt_zeromv->s_mv.i2_mvy = 0;
9156 
9157     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9158     ps_candt_l = ps_search_candts[id].ps_search_node;
9159     ps_search_candts[id].u1_num_steps_refine = 0;
9160 
9161     /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9162     /* not at the CTB boundary use the causal T and */
9163     /* not the projected T, although the candidate is */
9164     /* still pointed to by ps_candt_prj_t[0] */
9165     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9166     {
9167         /* Using Projected top to eliminate sync */
9168         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9169             PROJECTED_TOP0, e_me_quality_presets);
9170         ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9171         ps_search_candts[id].u1_num_steps_refine = 1;
9172     }
9173     else
9174     {
9175         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9176             SPATIAL_TOP0, e_me_quality_presets);
9177         ps_candt_t = ps_search_candts[id].ps_search_node;
9178         ps_search_candts[id].u1_num_steps_refine = 0;
9179     }
9180 
9181     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9182         PROJECTED_COLOC0, e_me_quality_presets);
9183     ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9184     ps_search_candts[id].u1_num_steps_refine = 1;
9185 
9186     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9187         PROJECTED_COLOC1, e_me_quality_presets);
9188     ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9189     ps_search_candts[id].u1_num_steps_refine = 1;
9190 
9191     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9192     {
9193         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9194             PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9195         ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9196         ps_search_candts[id].u1_num_steps_refine = 1;
9197 
9198         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9199             PROJECTED_TOP_LEFT0, e_me_quality_presets);
9200         ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9201         ps_search_candts[id].u1_num_steps_refine = 1;
9202     }
9203     else
9204     {
9205         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9206             SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9207         ps_candt_tr = ps_search_candts[id].ps_search_node;
9208         ps_search_candts[id].u1_num_steps_refine = 0;
9209 
9210         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9211             SPATIAL_TOP_LEFT0, e_me_quality_presets);
9212         ps_candt_tl = ps_search_candts[id].ps_search_node;
9213         ps_search_candts[id].u1_num_steps_refine = 0;
9214     }
9215 
9216     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9217         PROJECTED_RIGHT0, e_me_quality_presets);
9218     ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9219     ps_search_candts[id].u1_num_steps_refine = 1;
9220 
9221     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222         PROJECTED_BOTTOM0, e_me_quality_presets);
9223     ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9224     ps_search_candts[id].u1_num_steps_refine = 1;
9225 
9226     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227         PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9228     ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9229     ps_search_candts[id].u1_num_steps_refine = 1;
9230 
9231     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9232         PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9233     ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9234     ps_search_candts[id].u1_num_steps_refine = 1;
9235 
9236     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9237         PROJECTED_RIGHT1, e_me_quality_presets);
9238     ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9239     ps_search_candts[id].u1_num_steps_refine = 1;
9240 
9241     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9242         PROJECTED_BOTTOM1, e_me_quality_presets);
9243     ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9244     ps_search_candts[id].u1_num_steps_refine = 1;
9245 
9246     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9247         PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9248     ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9249     ps_search_candts[id].u1_num_steps_refine = 1;
9250 
9251     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9252         PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9253     ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9254     ps_search_candts[id].u1_num_steps_refine = 1;
9255 
9256     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9257     ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9258     ps_search_candts[id].u1_num_steps_refine = 1;
9259 
9260     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9261         PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9262     ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9263     ps_search_candts[id].u1_num_steps_refine = 1;
9264 
9265     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9266         PROJECTED_TOP_LEFT1, e_me_quality_presets);
9267     ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9268     ps_search_candts[id].u1_num_steps_refine = 1;
9269 
9270     /*************************************************************************/
9271     /* Now that the candidates have been ordered, to choose the right number */
9272     /* of initial candidates.                                                */
9273     /*************************************************************************/
9274     if(curr_layer_implicit && !prev_layer_implicit)
9275     {
9276         if(e_search_complexity == SEARCH_CX_LOW)
9277             num_init_candts = 7;
9278         else if(e_search_complexity == SEARCH_CX_MED)
9279             num_init_candts = 13;
9280         else if(e_search_complexity == SEARCH_CX_HIGH)
9281             num_init_candts = 18;
9282         else
9283             ASSERT(0);
9284     }
9285     else
9286     {
9287         if(e_search_complexity == SEARCH_CX_LOW)
9288             num_init_candts = 5;
9289         else if(e_search_complexity == SEARCH_CX_MED)
9290             num_init_candts = 11;
9291         else if(e_search_complexity == SEARCH_CX_HIGH)
9292             num_init_candts = 16;
9293         else
9294             ASSERT(0);
9295     }
9296 
9297     if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9298     {
9299         num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9300     }
9301 
9302     /*************************************************************************/
9303     /* The following search parameters are fixed throughout the search across*/
9304     /* all blks. So these are configured outside processing loop             */
9305     /*************************************************************************/
9306     s_search_prms_blk.i4_num_init_candts = num_init_candts;
9307     s_search_prms_blk.i4_start_step = 1;
9308     s_search_prms_blk.i4_use_satd = 0;
9309     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9310     /* we use recon only for encoded layers, otherwise it is not available */
9311     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9312 
9313     s_search_prms_blk.ps_search_candts = ps_search_candts;
9314     /* We use the same mv_range for all ref. pic. So assign to member 0 */
9315     if(s_search_prms_blk.i4_use_rec)
9316         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9317     else
9318         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9319     /*************************************************************************/
9320     /* Initialize coordinates. Meaning as follows                            */
9321     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
9322     /* blk_y : same as above, y coord.                                       */
9323     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
9324     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
9325     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
9326     /* corner of the picture. Always multiple of 64.                         */
9327     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
9328     /*************************************************************************/
9329     blk_y = 0;
9330     blk_id_in_ctb = 0;
9331 
9332     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9333 
9334     /* Get the number of sync units in a row based on encode/non enocde layer */
9335     num_sync_units_in_row = num_blks_in_row;
9336 
9337     /*************************************************************************/
9338     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
9339     /* every block given its coordinate. Note thsi assumes that the min amt  */
9340     /* of padding to right of pic is equal to the blk size. If we go all the */
9341     /* way upto 64x64, then the min padding on right size of picture should  */
9342     /* be 64, and also on bottom side of picture.                            */
9343     /*************************************************************************/
9344     SET_PIC_LIMIT(
9345         s_pic_limit_inp,
9346         ps_curr_layer->i4_pad_x_inp,
9347         ps_curr_layer->i4_pad_y_inp,
9348         ps_curr_layer->i4_wd,
9349         ps_curr_layer->i4_ht,
9350         s_search_prms_blk.i4_num_steps_post_refine);
9351 
9352     SET_PIC_LIMIT(
9353         s_pic_limit_rec,
9354         ps_curr_layer->i4_pad_x_rec,
9355         ps_curr_layer->i4_pad_y_rec,
9356         ps_curr_layer->i4_wd,
9357         ps_curr_layer->i4_ht,
9358         s_search_prms_blk.i4_num_steps_post_refine);
9359 
9360     /*************************************************************************/
9361     /* set the MV limit per ref. pic.                                        */
9362     /*    - P pic. : Based on the config params.                             */
9363     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9364     /*************************************************************************/
9365     {
9366         WORD32 ref_ctr;
9367         /* Only for B/b pic. */
9368         if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9369         {
9370             WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9371             WORD32 cur_poc, ref_poc, abs_poc_diff;
9372 
9373             cur_poc = ps_ctxt->i4_curr_poc;
9374 
9375             /* Get abs MAX for symmetric search */
9376             i2_mv_y_per_poc = MAX(
9377                 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9378                 (ABS(ps_ctxt->s_coarse_dyn_range_prms
9379                          .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9380 
9381             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9382             {
9383                 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9384                 abs_poc_diff = ABS((cur_poc - ref_poc));
9385                 /* Get the cur. max MV based on POC distance */
9386                 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9387                 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9388 
9389                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9390                 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9391                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9392                 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9393             }
9394         }
9395         else
9396         {
9397             /* Set the Config. File Params for P pic. */
9398             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9399             {
9400                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9401                 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9402                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9403                 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9404             }
9405         }
9406     }
9407 
9408     /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9409     if(e_me_quality_presets == ME_MEDIUM_SPEED)
9410     {
9411         i4_threshold_multiplier = 1;
9412         i4_threshold_divider = 4;
9413     }
9414     else if(e_me_quality_presets == ME_HIGH_SPEED)
9415     {
9416         i4_threshold_multiplier = 1;
9417         i4_threshold_divider = 2;
9418     }
9419     else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9420     {
9421 #if OLD_XTREME_SPEED
9422         /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9423         i4_temporal_layer = 1;
9424 #endif
9425         if(i4_temporal_layer == 0)
9426         {
9427             i4_threshold_multiplier = 3;
9428             i4_threshold_divider = 4;
9429         }
9430         else if(i4_temporal_layer == 1)
9431         {
9432             i4_threshold_multiplier = 3;
9433             i4_threshold_divider = 4;
9434         }
9435         else if(i4_temporal_layer == 2)
9436         {
9437             i4_threshold_multiplier = 1;
9438             i4_threshold_divider = 1;
9439         }
9440         else
9441         {
9442             i4_threshold_multiplier = 5;
9443             i4_threshold_divider = 4;
9444         }
9445     }
9446     else if(e_me_quality_presets == ME_HIGH_QUALITY)
9447     {
9448         i4_threshold_multiplier = 1;
9449         i4_threshold_divider = 1;
9450     }
9451 
9452     /*************************************************************************/
9453     /*************************************************************************/
9454     /*************************************************************************/
9455     /* START OF THE CORE LOOP                                                */
9456     /* If Encode is 0, then we just loop over each blk                       */
9457     /*************************************************************************/
9458     /*************************************************************************/
9459     /*************************************************************************/
9460     while(0 == end_of_frame)
9461     {
9462         job_queue_t *ps_job;
9463         ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
9464         WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
9465         WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
9466         //+3 to get ceil values when divided by 4
9467         WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9468             8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
9469         //if there is variable for ctb size use that and this variable can be derived
9470         WORD32 offset_val, check_dep_pos, set_dep_pos;
9471         void *pv_hme_dep_mngr;
9472         ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9473 
9474         /* Get the current layer HME Dep Mngr       */
9475         /* Note : Use layer_id - 1 in HME layers    */
9476 
9477         pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9478 
9479         /* Get the current row from the job queue */
9480         ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9481             ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9482 
9483         /* If all rows are done, set the end of process flag to 1, */
9484         /* and the current row to -1 */
9485         if(NULL == ps_job)
9486         {
9487             blk_y = -1;
9488             end_of_frame = 1;
9489 
9490             continue;
9491         }
9492 
9493         if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9494         {
9495             /* set the output dependency of current row */
9496             ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9497             continue;
9498         }
9499 
9500         blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9501         blk_x = 0;
9502         i4_ctb_x = 0;
9503 
9504         /* wait for Corresponding Pre intra Job to be completed */
9505         if(1 == ps_refine_prms->i4_layer_id)
9506         {
9507             volatile UWORD32 i4_l1_done;
9508             volatile UWORD32 *pi4_l1_done;
9509             pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9510                               ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9511             i4_l1_done = *pi4_l1_done;
9512             while(!i4_l1_done)
9513             {
9514                 i4_l1_done = *pi4_l1_done;
9515             }
9516         }
9517         /* Set Variables for Dep. Checking and Setting */
9518         set_dep_pos = blk_y + 1;
9519         if(blk_y > 0)
9520         {
9521             offset_val = 2;
9522             check_dep_pos = blk_y - 1;
9523         }
9524         else
9525         {
9526             /* First row should run without waiting */
9527             offset_val = -1;
9528             check_dep_pos = 0;
9529         }
9530 
9531         /* EIID: calculate ed_blk_ctxt pointer for current row */
9532         /* valid for only layer-1. not varified and used for other layers */
9533         i4_ctb_row_ctr = blk_y / 4;
9534         ps_ed_blk_ctxt_curr_row =
9535             ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9536                                   i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
9537         ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9538 
9539         /* if non-encode layer then i4_ctb_x will be same as blk_x */
9540         /* loop over all the units is a row                        */
9541         for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9542         {
9543             ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
9544             ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9545             WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9546 
9547             /* Wait till top row block is processed   */
9548             /* Currently checking till top right block*/
9549 
9550             /* Disabled since all candidates, except for */
9551             /* L and C, are projected from the coarser layer, */
9552             /* only in ME_HIGH_SPEED mode */
9553             if((ME_MEDIUM_SPEED > e_me_quality_presets))
9554             {
9555                 if(i4_ctb_x < (num_sync_units_in_row - 1))
9556                 {
9557                     ihevce_dmgr_chk_row_row_sync(
9558                         pv_hme_dep_mngr,
9559                         i4_ctb_x,
9560                         offset_val,
9561                         check_dep_pos,
9562                         0, /* Col Tile No. : Not supported in PreEnc*/
9563                         ps_ctxt->thrd_id);
9564                 }
9565             }
9566 
9567             {
9568                 /* for non encoder layer only one block is processed */
9569                 num_blks_in_this_ctb = 1;
9570             }
9571 
9572             /* EIID: derive ed_ctxt ptr for current CTB */
9573             ps_ed_blk_ctxt_curr_ctb =
9574                 ps_ed_blk_ctxt_curr_row +
9575                 (i4_ctb_blk_ctr *
9576                  i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
9577             ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9578 
9579             /* loop over all the blocks in CTB will always be 1 */
9580             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9581             {
9582                 {
9583                     /* non encode layer */
9584                     blk_x = i4_ctb_x;
9585                     blk_id_in_full_ctb = 0;
9586                     s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9587                 }
9588 
9589                 /* get the current input blk point */
9590                 pos_x = blk_x << blk_size_shift;
9591                 pos_y = blk_y << blk_size_shift;
9592                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9593 
9594                 /*********************************************************************/
9595                 /* replicate the inp buffer at blk or ctb level for each ref id,     */
9596                 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9597                 /* thereby avoiding a bloat up of memory. If we did all references   */
9598                 /* weighted pred, we will end up with a duplicate copy of each ref   */
9599                 /* at each layer, since we need to preserve the original reference.  */
9600                 /* ToDo: Need to observe performance with this mechanism and compare */
9601                 /* with case where ref is weighted.                                  */
9602                 /*********************************************************************/
9603                 if(blk_id_in_ctb == 0)
9604                 {
9605                     fp_get_wt_inp(
9606                         ps_curr_layer,
9607                         &ps_ctxt->s_wt_pred,
9608                         unit_size,
9609                         pos_x,
9610                         pos_y,
9611                         unit_size,
9612                         ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9613                         ps_ctxt->i4_wt_pred_enable_flag);
9614                 }
9615 
9616                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9617                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9618                 /* Select search results from a suitable search result in the context */
9619                 {
9620                     ps_search_results = &ps_ctxt->s_search_results_8x8;
9621                 }
9622 
9623                 s_search_prms_blk.ps_search_results = ps_search_results;
9624 
9625                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9626                 hme_reset_search_results(
9627                     ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9628 
9629                 /* Loop across different Ref IDx */
9630                 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9631                 {
9632                     S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9633                     S32 prev_blk_offset = 6;
9634                     S32 resultid;
9635 
9636                     /*********************************************************************/
9637                     /* For every blk in the picture, the search range needs to be derived*/
9638                     /* Any blk can have any mv, but practical search constraints are     */
9639                     /* imposed by the picture boundary and amt of padding.               */
9640                     /*********************************************************************/
9641                     /* MV limit is different based on ref. PIC */
9642                     hme_derive_search_range(
9643                         &s_range_prms_inp,
9644                         &s_pic_limit_inp,
9645                         &as_mv_limit[i1_ref_idx],
9646                         pos_x,
9647                         pos_y,
9648                         blk_wd,
9649                         blk_ht);
9650                     hme_derive_search_range(
9651                         &s_range_prms_rec,
9652                         &s_pic_limit_rec,
9653                         &as_mv_limit[i1_ref_idx],
9654                         pos_x,
9655                         pos_y,
9656                         blk_wd,
9657                         blk_ht);
9658 
9659                     s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9660                     ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9661 
9662                     i4_num_srch_cands = 1;
9663 
9664                     if(1 != ps_refine_prms->i4_layer_id)
9665                     {
9666                         S32 x, y;
9667                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9668                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9669 
9670                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
9671                         {
9672                             hme_get_spatial_candt(
9673                                 ps_curr_layer,
9674                                 e_search_blk_size,
9675                                 blk_x,
9676                                 blk_y,
9677                                 i1_ref_idx,
9678                                 &as_top_neighbours[0],
9679                                 &as_left_neighbours[0],
9680                                 0,
9681                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9682                                 0,
9683                                 ps_refine_prms->i4_encode);
9684 
9685                             *ps_candt_tr = as_top_neighbours[3];
9686                             *ps_candt_t = as_top_neighbours[1];
9687                             *ps_candt_tl = as_top_neighbours[0];
9688                             i4_num_srch_cands += 3;
9689                         }
9690                         else
9691                         {
9692                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9693                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9694                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9695                             search_node_t *ps_search_node;
9696                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9697                             hme_mv_t *ps_mv, *ps_mv_base;
9698                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
9699                             S32 jump = 1, mvs_in_blk, mvs_in_row;
9700                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9701 
9702                             if(i4_blk_size1 != i4_blk_size2)
9703                             {
9704                                 blk_x_temp <<= 1;
9705                                 blk_y_temp <<= 1;
9706                                 jump = 2;
9707                                 if((i4_blk_size1 << 2) == i4_blk_size2)
9708                                 {
9709                                     blk_x_temp <<= 1;
9710                                     blk_y_temp <<= 1;
9711                                     jump = 4;
9712                                 }
9713                             }
9714 
9715                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9716                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9717 
9718                             /* Adjust teh blk coord to point to top left locn */
9719                             blk_x_temp -= 1;
9720                             blk_y_temp -= 1;
9721 
9722                             /* Pick up the mvs from the location */
9723                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9724                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9725 
9726                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9727                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9728 
9729                             ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9730                             pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9731 
9732                             ps_mv_base = ps_mv;
9733                             pi1_ref_idx_base = pi1_ref_idx;
9734 
9735                             ps_search_node = &as_left_neighbours[0];
9736                             ps_mv = ps_mv_base + mvs_in_row;
9737                             pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9738                             COPY_MV_TO_SEARCH_NODE(
9739                                 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9740 
9741                             i4_num_srch_cands++;
9742                         }
9743                     }
9744                     else
9745                     {
9746                         S32 x, y;
9747                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9748                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9749 
9750                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
9751                         {
9752                             hme_get_spatial_candt_in_l1_me(
9753                                 ps_curr_layer,
9754                                 e_search_blk_size,
9755                                 blk_x,
9756                                 blk_y,
9757                                 i1_ref_idx,
9758                                 !ps_search_results->pu1_is_past[i1_ref_idx],
9759                                 &as_top_neighbours[0],
9760                                 &as_left_neighbours[0],
9761                                 0,
9762                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9763                                 0,
9764                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9765                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9766 
9767                             *ps_candt_tr = as_top_neighbours[3];
9768                             *ps_candt_t = as_top_neighbours[1];
9769                             *ps_candt_tl = as_top_neighbours[0];
9770 
9771                             i4_num_srch_cands += 3;
9772                         }
9773                         else
9774                         {
9775                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9776                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9777                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9778                             S32 i4_mv_pos_in_implicit_array;
9779                             search_node_t *ps_search_node;
9780                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9781                             hme_mv_t *ps_mv, *ps_mv_base;
9782                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
9783                             S32 jump = 1, mvs_in_blk, mvs_in_row;
9784                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9785                             U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9786                             S32 i4_num_results_in_given_dir =
9787                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9788                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9789                                                     : (ps_layer_mvbank->i4_num_mvs_per_ref *
9790                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9791 
9792                             if(i4_blk_size1 != i4_blk_size2)
9793                             {
9794                                 blk_x_temp <<= 1;
9795                                 blk_y_temp <<= 1;
9796                                 jump = 2;
9797                                 if((i4_blk_size1 << 2) == i4_blk_size2)
9798                                 {
9799                                     blk_x_temp <<= 1;
9800                                     blk_y_temp <<= 1;
9801                                     jump = 4;
9802                                 }
9803                             }
9804 
9805                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9806                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9807 
9808                             /* Adjust teh blk coord to point to top left locn */
9809                             blk_x_temp -= 1;
9810                             blk_y_temp -= 1;
9811 
9812                             /* Pick up the mvs from the location */
9813                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9814                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9815 
9816                             i4_offset +=
9817                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9818                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9819                                                     : 0);
9820 
9821                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9822                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9823 
9824                             ps_mv_base = ps_mv;
9825                             pi1_ref_idx_base = pi1_ref_idx;
9826 
9827                             {
9828                                 /* ps_mv and pi1_ref_idx now point to the top left locn */
9829                                 ps_search_node = &as_left_neighbours[0];
9830                                 ps_mv = ps_mv_base + mvs_in_row;
9831                                 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9832 
9833                                 i4_mv_pos_in_implicit_array =
9834                                     hme_find_pos_of_implicitly_stored_ref_id(
9835                                         pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9836 
9837                                 if(-1 != i4_mv_pos_in_implicit_array)
9838                                 {
9839                                     COPY_MV_TO_SEARCH_NODE(
9840                                         ps_search_node,
9841                                         &ps_mv[i4_mv_pos_in_implicit_array],
9842                                         &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9843                                         i1_ref_idx,
9844                                         shift);
9845                                 }
9846                                 else
9847                                 {
9848                                     ps_search_node->u1_is_avail = 0;
9849                                     ps_search_node->s_mv.i2_mvx = 0;
9850                                     ps_search_node->s_mv.i2_mvy = 0;
9851                                     ps_search_node->i1_ref_idx = i1_ref_idx;
9852                                 }
9853 
9854                                 i4_num_srch_cands++;
9855                             }
9856                         }
9857                     }
9858 
9859                     *ps_candt_l = as_left_neighbours[0];
9860 
9861                     /* when 16x16 is searched in an encode layer, and the prev layer */
9862                     /* stores results for 4x4 blks, we project 5 candts corresponding */
9863                     /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9864                     /* However in other cases, only 2,2 best and 2nd best reqd */
9865                     resultid = 0;
9866                     pf_hme_project_coloc_candt(
9867                         ps_candt_prj_coloc[0],
9868                         ps_curr_layer,
9869                         ps_coarse_layer,
9870                         pos_x + 2,
9871                         pos_y + 2,
9872                         i1_ref_idx,
9873                         resultid);
9874 
9875                     i4_num_srch_cands++;
9876 
9877                     resultid = 1;
9878                     if(num_results_prev_layer > 1)
9879                     {
9880                         pf_hme_project_coloc_candt(
9881                             ps_candt_prj_coloc[1],
9882                             ps_curr_layer,
9883                             ps_coarse_layer,
9884                             pos_x + 2,
9885                             pos_y + 2,
9886                             i1_ref_idx,
9887                             resultid);
9888 
9889                         i4_num_srch_cands++;
9890                     }
9891 
9892                     resultid = 0;
9893 
9894                     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9895                     {
9896                         pf_hme_project_coloc_candt(
9897                             ps_candt_prj_t[0],
9898                             ps_curr_layer,
9899                             ps_coarse_layer,
9900                             pos_x,
9901                             pos_y - prev_blk_offset,
9902                             i1_ref_idx,
9903                             resultid);
9904 
9905                         i4_num_srch_cands++;
9906                     }
9907 
9908                     {
9909                         pf_hme_project_coloc_candt(
9910                             ps_candt_prj_br[0],
9911                             ps_curr_layer,
9912                             ps_coarse_layer,
9913                             pos_x + next_blk_offset,
9914                             pos_y + next_blk_offset,
9915                             i1_ref_idx,
9916                             resultid);
9917                         pf_hme_project_coloc_candt(
9918                             ps_candt_prj_bl[0],
9919                             ps_curr_layer,
9920                             ps_coarse_layer,
9921                             pos_x - prev_blk_offset,
9922                             pos_y + next_blk_offset,
9923                             i1_ref_idx,
9924                             resultid);
9925                         pf_hme_project_coloc_candt(
9926                             ps_candt_prj_r[0],
9927                             ps_curr_layer,
9928                             ps_coarse_layer,
9929                             pos_x + next_blk_offset,
9930                             pos_y,
9931                             i1_ref_idx,
9932                             resultid);
9933                         pf_hme_project_coloc_candt(
9934                             ps_candt_prj_b[0],
9935                             ps_curr_layer,
9936                             ps_coarse_layer,
9937                             pos_x,
9938                             pos_y + next_blk_offset,
9939                             i1_ref_idx,
9940                             resultid);
9941 
9942                         i4_num_srch_cands += 4;
9943 
9944                         if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9945                         {
9946                             pf_hme_project_coloc_candt(
9947                                 ps_candt_prj_tr[0],
9948                                 ps_curr_layer,
9949                                 ps_coarse_layer,
9950                                 pos_x + next_blk_offset,
9951                                 pos_y - prev_blk_offset,
9952                                 i1_ref_idx,
9953                                 resultid);
9954                             pf_hme_project_coloc_candt(
9955                                 ps_candt_prj_tl[0],
9956                                 ps_curr_layer,
9957                                 ps_coarse_layer,
9958                                 pos_x - prev_blk_offset,
9959                                 pos_y - prev_blk_offset,
9960                                 i1_ref_idx,
9961                                 resultid);
9962 
9963                             i4_num_srch_cands += 2;
9964                         }
9965                     }
9966                     if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9967                     {
9968                         resultid = 1;
9969                         pf_hme_project_coloc_candt(
9970                             ps_candt_prj_br[1],
9971                             ps_curr_layer,
9972                             ps_coarse_layer,
9973                             pos_x + next_blk_offset,
9974                             pos_y + next_blk_offset,
9975                             i1_ref_idx,
9976                             resultid);
9977                         pf_hme_project_coloc_candt(
9978                             ps_candt_prj_bl[1],
9979                             ps_curr_layer,
9980                             ps_coarse_layer,
9981                             pos_x - prev_blk_offset,
9982                             pos_y + next_blk_offset,
9983                             i1_ref_idx,
9984                             resultid);
9985                         pf_hme_project_coloc_candt(
9986                             ps_candt_prj_r[1],
9987                             ps_curr_layer,
9988                             ps_coarse_layer,
9989                             pos_x + next_blk_offset,
9990                             pos_y,
9991                             i1_ref_idx,
9992                             resultid);
9993                         pf_hme_project_coloc_candt(
9994                             ps_candt_prj_b[1],
9995                             ps_curr_layer,
9996                             ps_coarse_layer,
9997                             pos_x,
9998                             pos_y + next_blk_offset,
9999                             i1_ref_idx,
10000                             resultid);
10001 
10002                         i4_num_srch_cands += 4;
10003 
10004                         pf_hme_project_coloc_candt(
10005                             ps_candt_prj_tr[1],
10006                             ps_curr_layer,
10007                             ps_coarse_layer,
10008                             pos_x + next_blk_offset,
10009                             pos_y - prev_blk_offset,
10010                             i1_ref_idx,
10011                             resultid);
10012                         pf_hme_project_coloc_candt(
10013                             ps_candt_prj_tl[1],
10014                             ps_curr_layer,
10015                             ps_coarse_layer,
10016                             pos_x - prev_blk_offset,
10017                             pos_y - prev_blk_offset,
10018                             i1_ref_idx,
10019                             resultid);
10020                         pf_hme_project_coloc_candt(
10021                             ps_candt_prj_t[1],
10022                             ps_curr_layer,
10023                             ps_coarse_layer,
10024                             pos_x,
10025                             pos_y - prev_blk_offset,
10026                             i1_ref_idx,
10027                             resultid);
10028 
10029                         i4_num_srch_cands += 3;
10030                     }
10031 
10032                     /* Note this block also clips the MV range for all candidates */
10033 #ifdef _DEBUG
10034                     {
10035                         S32 candt;
10036                         range_prms_t *ps_range_prms;
10037 
10038                         S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10039                         for(candt = 0; candt < i4_num_srch_cands; candt++)
10040                         {
10041                             search_node_t *ps_search_node;
10042 
10043                             ps_search_node =
10044                                 s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10045 
10046                             ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10047 
10048                             if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10049                                (ps_search_node->i1_ref_idx < 0))
10050                             {
10051                                 ASSERT(0);
10052                             }
10053                         }
10054                     }
10055 #endif
10056 
10057                     {
10058                         S32 srch_cand;
10059                         S32 num_unique_nodes = 0;
10060                         S32 num_nodes_searched = 0;
10061                         S32 num_best_cand = 0;
10062                         S08 i1_grid_enable = 0;
10063                         search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10064                         /* has list of valid partition to search terminated by -1 */
10065                         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10066                         S32 center_x;
10067                         S32 center_y;
10068 
10069                         /* indicates if the centre point of grid needs to be explicitly added for search */
10070                         S32 add_centre = 0;
10071 
10072                         memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10073                         center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10074                         center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10075 
10076                         for(srch_cand = 0;
10077                             (srch_cand < i4_num_srch_cands) &&
10078                             (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10079                             srch_cand++)
10080                         {
10081                             search_node_t s_search_node_temp =
10082                                 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10083 
10084                             s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;
10085 
10086                             /* Clip the motion vectors as well here since after clipping
10087                             two candidates can become same and they will be removed during deduplication */
10088                             CLIP_MV_WITHIN_RANGE(
10089                                 s_search_node_temp.s_mv.i2_mvx,
10090                                 s_search_node_temp.s_mv.i2_mvy,
10091                                 s_search_prms_blk.aps_mv_range[0],
10092                                 ps_refine_prms->i4_num_steps_fpel_refine,
10093                                 ps_refine_prms->i4_num_steps_hpel_refine,
10094                                 ps_refine_prms->i4_num_steps_qpel_refine);
10095 
10096                             /* PT_C */
10097                             INSERT_NEW_NODE(
10098                                 as_unique_search_nodes,
10099                                 num_unique_nodes,
10100                                 s_search_node_temp,
10101                                 0,
10102                                 au4_unique_node_map,
10103                                 center_x,
10104                                 center_y,
10105                                 1);
10106 
10107                             num_nodes_searched += 1;
10108                         }
10109                         num_unique_nodes =
10110                             MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10111 
10112                         /* If number of candidates projected/number of candidates to be refined are more than 2,
10113                         then filter out and choose the best two here */
10114                         if(num_unique_nodes >= 2)
10115                         {
10116                             S32 num_results;
10117                             S32 cnt;
10118                             S32 *pi4_valid_part_ids;
10119                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10120                             s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10121                             pi4_valid_part_ids = &ai4_valid_part_ids[0];
10122 
10123                             /* pi4_valid_part_ids is updated inside */
10124                             hme_pred_search_no_encode(
10125                                 &s_search_prms_blk,
10126                                 ps_curr_layer,
10127                                 &ps_ctxt->s_wt_pred,
10128                                 pi4_valid_part_ids,
10129                                 1,
10130                                 e_me_quality_presets,
10131                                 i1_grid_enable,
10132                                 (ihevce_me_optimised_function_list_t *)
10133                                     ps_ctxt->pv_me_optimised_function_list
10134 
10135                             );
10136 
10137                             num_best_cand = 0;
10138                             cnt = 0;
10139                             num_results = ps_search_results->u1_num_results_per_part;
10140 
10141                             while((id = pi4_valid_part_ids[cnt++]) >= 0)
10142                             {
10143                                 num_results =
10144                                     MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10145 
10146                                 for(i = 0; i < num_results; i++)
10147                                 {
10148                                     search_node_t s_search_node_temp;
10149                                     s_search_node_temp =
10150                                         *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10151                                     if(s_search_node_temp.i1_ref_idx >= 0)
10152                                     {
10153                                         INSERT_NEW_NODE_NOMAP(
10154                                             as_best_two_proj_node,
10155                                             num_best_cand,
10156                                             s_search_node_temp,
10157                                             0);
10158                                     }
10159                                 }
10160                             }
10161                         }
10162                         else
10163                         {
10164                             add_centre = 1;
10165                             num_best_cand = num_unique_nodes;
10166                             as_best_two_proj_node[0] = as_unique_search_nodes[0];
10167                         }
10168 
10169                         num_unique_nodes = 0;
10170                         num_nodes_searched = 0;
10171 
10172                         if(1 == num_best_cand)
10173                         {
10174                             search_node_t s_search_node_temp = as_best_two_proj_node[0];
10175                             S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10176                             S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10177                             S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10178 
10179                             i1_grid_enable = 1;
10180 
10181                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10182                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10183                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10184 
10185                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10186                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10187                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10188 
10189                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10190                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10191                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10192 
10193                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10194                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10195                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10196 
10197                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10198                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10199                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200 
10201                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10202                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10203                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204 
10205                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10206                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10207                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208 
10209                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10210                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10211                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212 
10213                             if(add_centre)
10214                             {
10215                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10216                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10217                                 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10218                             }
10219                         }
10220                         else
10221                         {
10222                             /* For the candidates where refinement was required, choose the best two */
10223                             for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10224                             {
10225                                 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10226                                 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10227                                 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10228 
10229                                 /* Because there may not be two best unique candidates (because of clipping),
10230                                 second best candidate can be uninitialized, ignore that */
10231                                 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10232                                    s_search_node_temp.i1_ref_idx < 0)
10233                                 {
10234                                     num_nodes_searched++;
10235                                     continue;
10236                                 }
10237 
10238                                 /* PT_C */
10239                                 /* Since the center point has already be evaluated and best results are persistent,
10240                                 it will not be evaluated again */
10241                                 if(add_centre) /* centre point added explicitly again if search results is not updated */
10242                                 {
10243                                     INSERT_NEW_NODE(
10244                                         as_unique_search_nodes,
10245                                         num_unique_nodes,
10246                                         s_search_node_temp,
10247                                         0,
10248                                         au4_unique_node_map,
10249                                         center_x,
10250                                         center_y,
10251                                         1);
10252                                 }
10253 
10254                                 /* PT_L */
10255                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10256                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
10257                                 INSERT_NEW_NODE(
10258                                     as_unique_search_nodes,
10259                                     num_unique_nodes,
10260                                     s_search_node_temp,
10261                                     0,
10262                                     au4_unique_node_map,
10263                                     center_x,
10264                                     center_y,
10265                                     1);
10266 
10267                                 /* PT_T */
10268                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
10269                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10270                                 INSERT_NEW_NODE(
10271                                     as_unique_search_nodes,
10272                                     num_unique_nodes,
10273                                     s_search_node_temp,
10274                                     0,
10275                                     au4_unique_node_map,
10276                                     center_x,
10277                                     center_y,
10278                                     1);
10279 
10280                                 /* PT_R */
10281                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10282                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
10283                                 INSERT_NEW_NODE(
10284                                     as_unique_search_nodes,
10285                                     num_unique_nodes,
10286                                     s_search_node_temp,
10287                                     0,
10288                                     au4_unique_node_map,
10289                                     center_x,
10290                                     center_y,
10291                                     1);
10292 
10293                                 /* PT_B */
10294                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
10295                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10296                                 INSERT_NEW_NODE(
10297                                     as_unique_search_nodes,
10298                                     num_unique_nodes,
10299                                     s_search_node_temp,
10300                                     0,
10301                                     au4_unique_node_map,
10302                                     center_x,
10303                                     center_y,
10304                                     1);
10305 
10306                                 /* PT_TL */
10307                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10308                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10309                                 INSERT_NEW_NODE(
10310                                     as_unique_search_nodes,
10311                                     num_unique_nodes,
10312                                     s_search_node_temp,
10313                                     0,
10314                                     au4_unique_node_map,
10315                                     center_x,
10316                                     center_y,
10317                                     1);
10318 
10319                                 /* PT_TR */
10320                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10321                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10322                                 INSERT_NEW_NODE(
10323                                     as_unique_search_nodes,
10324                                     num_unique_nodes,
10325                                     s_search_node_temp,
10326                                     0,
10327                                     au4_unique_node_map,
10328                                     center_x,
10329                                     center_y,
10330                                     1);
10331 
10332                                 /* PT_BL */
10333                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10334                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10335                                 INSERT_NEW_NODE(
10336                                     as_unique_search_nodes,
10337                                     num_unique_nodes,
10338                                     s_search_node_temp,
10339                                     0,
10340                                     au4_unique_node_map,
10341                                     center_x,
10342                                     center_y,
10343                                     1);
10344 
10345                                 /* PT_BR */
10346                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10347                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10348                                 INSERT_NEW_NODE(
10349                                     as_unique_search_nodes,
10350                                     num_unique_nodes,
10351                                     s_search_node_temp,
10352                                     0,
10353                                     au4_unique_node_map,
10354                                     center_x,
10355                                     center_y,
10356                                     1);
10357                             }
10358                         }
10359 
10360                         s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10361                         s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10362 
10363                         /*****************************************************************/
10364                         /* Call the search algorithm, this includes:                     */
10365                         /* Pre-Search-Refinement (for coarse candts)                     */
10366                         /* Search on each candidate                                      */
10367                         /* Post Search Refinement on winners/other new candidates        */
10368                         /*****************************************************************/
10369 
10370                         hme_pred_search_no_encode(
10371                             &s_search_prms_blk,
10372                             ps_curr_layer,
10373                             &ps_ctxt->s_wt_pred,
10374                             ai4_valid_part_ids,
10375                             0,
10376                             e_me_quality_presets,
10377                             i1_grid_enable,
10378                             (ihevce_me_optimised_function_list_t *)
10379                                 ps_ctxt->pv_me_optimised_function_list);
10380 
10381                         i1_grid_enable = 0;
10382                     }
10383                 }
10384 
10385                 /* for non encode layer update MV and end processing for block */
10386                 {
10387                     WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10388                     search_node_t *ps_search_node;
10389                     /* now update the reqd results back to the layer mv bank. */
10390                     if(1 == ps_refine_prms->i4_layer_id)
10391                     {
10392                         hme_update_mv_bank_in_l1_me(
10393                             ps_search_results,
10394                             ps_curr_layer->ps_layer_mvbank,
10395                             blk_x,
10396                             blk_y,
10397                             &s_mv_update_prms);
10398                     }
10399                     else
10400                     {
10401                         hme_update_mv_bank_noencode(
10402                             ps_search_results,
10403                             ps_curr_layer->ps_layer_mvbank,
10404                             blk_x,
10405                             blk_y,
10406                             &s_mv_update_prms);
10407                     }
10408 
10409                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10410                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10411                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10412                     {
10413                         WORD32 i4_j;
10414                         layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10415 
10416                         //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10417                         /* Not considering this for Dyn. Search Update */
10418                         {
10419                             for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10420                                 i4_ref_id++)
10421                             {
10422                                 ps_search_node =
10423                                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10424 
10425                                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10426                                 {
10427                                     hme_update_dynamic_search_params(
10428                                         &ps_ctxt->s_coarse_dyn_range_prms
10429                                              .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10430                                                                [i4_ref_id],
10431                                         ps_search_node->s_mv.i2_mvy);
10432 
10433                                     ps_search_node++;
10434                                 }
10435                             }
10436                         }
10437                     }
10438 
10439                     if(1 == ps_refine_prms->i4_layer_id)
10440                     {
10441                         WORD32 wt_pred_val, log_wt_pred_val;
10442                         WORD32 ref_id_of_nearest_poc = 0;
10443                         WORD32 max_val = 0x7fffffff;
10444                         WORD32 max_l0_val = 0x7fffffff;
10445                         WORD32 max_l1_val = 0x7fffffff;
10446                         WORD32 cur_val;
10447                         WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10448 
10449                         WORD32 bestl0_sad = 0x7fffffff;
10450                         WORD32 bestl1_sad = 0x7fffffff;
10451                         search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10452 
10453                         for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10454                             i4_ref_id++)
10455                         {
10456                             wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10457                             log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10458 
10459                             ps_search_node =
10460                                 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10461 
10462                             i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10463                                                      ((1 << log_wt_pred_val) >> 1)) >>
10464                                                     log_wt_pred_val;
10465 
10466                             i4_local_cost_weighted_pred =
10467                                 i4_local_weighted_sad +
10468                                 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10469                             //the loop is redundant as the results are already sorted based on total cost
10470                             //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10471                             {
10472                                 if(i4_local_cost_weighted_pred < min_cost)
10473                                 {
10474                                     min_cost = i4_local_cost_weighted_pred;
10475                                     min_sad = i4_local_weighted_sad;
10476                                 }
10477                             }
10478 
10479                             /* For P frame, calculate the nearest poc which is either P or I frame*/
10480                             if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10481                             {
10482                                 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10483                                 {
10484                                     cur_val =
10485                                         ABS(ps_ctxt->i4_curr_poc -
10486                                             ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10487                                     if(cur_val < max_val)
10488                                     {
10489                                         max_val = cur_val;
10490                                         ref_id_of_nearest_poc = i4_ref_id;
10491                                     }
10492                                 }
10493                             }
10494                         }
10495                         /*Store me cost wrt. to past frame only for P frame  */
10496                         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497                         {
10498                             if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10499                             {
10500                                 WORD16 i2_mvx, i2_mvy;
10501 
10502                                 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10503                                 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10504                                 WORD32 z_scan_idx =
10505                                     gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10506                                 WORD32 wt, log_wt;
10507 
10508                                 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10509                                 <= (1 + ps_ctxt->num_b_frms));*/
10510 
10511                                 /*obtain mvx and mvy */
10512                                 i2_mvx =
10513                                     ps_search_results
10514                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10515                                         ->s_mv.i2_mvx;
10516                                 i2_mvy =
10517                                     ps_search_results
10518                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10519                                         ->s_mv.i2_mvy;
10520 
10521                                 /*register the min cost for l1 me in blk context */
10522                                 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10523                                 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10524 
10525                                 /*register the min cost for l1 me in blk context */
10526                                 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10527                                     ((ps_search_results
10528                                           ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10529                                           ->i4_sad *
10530                                       wt) +
10531                                      ((1 << log_wt) >> 1)) >>
10532                                     log_wt;
10533                                 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10534                                     ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10535                                     (ps_search_results
10536                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10537                                          ->i4_tot_cost -
10538                                      ps_search_results
10539                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10540                                          ->i4_sad);
10541                                 /*for complexity change detection*/
10542                                 ps_ctxt->i4_num_blks++;
10543                                 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10544                                    (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10545                                 {
10546                                     ps_ctxt->i4_num_blks_high_sad++;
10547                                 }
10548                             }
10549                         }
10550                     }
10551 
10552                     /* EIID: Early inter intra decisions */
10553                     /* tap L1 level SAD for inter intra decisions */
10554                     if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10555                        (!ps_ctxt->s_frm_prms
10556                              .is_i_pic))  //for high-quality preset->disable early decisions
10557                     {
10558                         if(1 == ps_refine_prms->i4_layer_id)
10559                         {
10560                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
10561                             ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10562                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10563                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10564                             WORD32 z_scan_idx =
10565                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10566                             ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10567 
10568                             /*register the min cost for l1 me in blk context */
10569                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10570                                 i4_min_sad_cost_8x8_block;
10571                             i4_num_comparisions++;
10572 
10573                             /* take early inter-intra decision here */
10574                             ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10575 #if DISABLE_INTRA_IN_BPICS
10576                             if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10577                                (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10578                             {
10579                                 ps_curr_ed_blk_ctxt->intra_or_inter =
10580                                     2; /*eval only inter if inter cost is less */
10581                                 i4_num_inter_wins++;
10582                             }
10583                             else
10584 #endif
10585                             {
10586                                 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10587                                    ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10588                                      i4_threshold_multiplier) /
10589                                     i4_threshold_divider))
10590                                 {
10591                                     ps_curr_ed_blk_ctxt->intra_or_inter =
10592                                         2; /*eval only inter if inter cost is less */
10593                                     i4_num_inter_wins++;
10594                                 }
10595                             }
10596 
10597                             //{
10598                             //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10599                             //      blk_x,blk_y,
10600                             //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
10601                             //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10602                             //      i4_min_sad_cost_8x8_block
10603                             //      );
10604                             //}
10605 
10606                         }  //end of layer-1
10607                     }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10608                     else
10609                     {
10610                         if(1 == ps_refine_prms->i4_layer_id)
10611                         {
10612                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
10613                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10614                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10615                             WORD32 z_scan_idx =
10616                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10617 
10618                             /*register the min cost for l1 me in blk context */
10619                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10620                                 i4_min_sad_cost_8x8_block;
10621                         }
10622                     }
10623                     if(1 == ps_refine_prms->i4_layer_id)
10624                     {
10625                         WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10626                         WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10627                         WORD32 z_scan_idx =
10628                             gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10629 
10630                         ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10631                             min_sad;
10632 
10633                         if(min_cost <
10634                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10635                         {
10636                             ps_ctxt->i4_L1_hme_best_cost += min_cost;
10637                             ps_ctxt->i4_L1_hme_sad += min_sad;
10638                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10639                         }
10640                         else
10641                         {
10642                             ps_ctxt->i4_L1_hme_best_cost +=
10643                                 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10644                             ps_ctxt->i4_L1_hme_sad +=
10645                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10646                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10647                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10648                         }
10649                     }
10650                 }
10651             }
10652 
10653             /* Update the number of blocks processed in the current row */
10654             if((ME_MEDIUM_SPEED > e_me_quality_presets))
10655             {
10656                 ihevce_dmgr_set_row_row_sync(
10657                     pv_hme_dep_mngr,
10658                     (i4_ctb_x + 1),
10659                     blk_y,
10660                     0 /* Col Tile No. : Not supported in PreEnc*/);
10661             }
10662         }
10663 
10664         /* set the output dependency after completion of row */
10665         ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10666     }
10667 }
10668