/****************************************************************************** * * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ /** ****************************************************************************** * @file hme_refine.c * * @brief * Contains the implementation of the refinement layer searches and related * functionality like CU merge. * * @author * Ittiam * * * List of Functions * * ****************************************************************************** */ /*****************************************************************************/ /* File Includes */ /*****************************************************************************/ /* System include files */ #include #include #include #include #include #include #include /* User include files */ #include "ihevc_typedefs.h" #include "itt_video_api.h" #include "ihevce_api.h" #include "rc_cntrl_param.h" #include "rc_frame_info_collector.h" #include "rc_look_ahead_params.h" #include "ihevc_defs.h" #include "ihevc_structs.h" #include "ihevc_platform_macros.h" #include "ihevc_deblk.h" #include "ihevc_itrans_recon.h" #include "ihevc_chroma_itrans_recon.h" #include "ihevc_chroma_intra_pred.h" #include "ihevc_intra_pred.h" #include "ihevc_inter_pred.h" #include "ihevc_mem_fns.h" #include "ihevc_padding.h" #include "ihevc_weighted_pred.h" #include "ihevc_sao.h" #include "ihevc_resi_trans.h" #include "ihevc_quant_iquant_ssd.h" #include "ihevc_cabac_tables.h" #include "ihevce_defs.h" #include "ihevce_lap_enc_structs.h" #include "ihevce_multi_thrd_structs.h" #include "ihevce_multi_thrd_funcs.h" #include "ihevce_me_common_defs.h" #include "ihevce_had_satd.h" #include "ihevce_error_codes.h" #include "ihevce_bitstream.h" #include "ihevce_cabac.h" #include "ihevce_rdoq_macros.h" #include "ihevce_function_selector.h" #include "ihevce_enc_structs.h" #include "ihevce_entropy_structs.h" #include "ihevce_cmn_utils_instr_set_router.h" #include "ihevce_enc_loop_structs.h" #include "ihevce_bs_compute_ctb.h" #include "ihevce_global_tables.h" #include "ihevce_dep_mngr_interface.h" #include "hme_datatype.h" #include "hme_interface.h" #include "hme_common_defs.h" #include "hme_defs.h" #include "ihevce_me_instr_set_router.h" #include "hme_globals.h" #include "hme_utils.h" #include "hme_coarse.h" #include "hme_fullpel.h" #include "hme_subpel.h" #include "hme_refine.h" #include "hme_err_compute.h" #include "hme_common_utils.h" #include "hme_search_algo.h" #include "ihevce_stasino_helpers.h" #include "ihevce_common_utils.h" /*****************************************************************************/ /* Globals */ /*****************************************************************************/ /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */ UWORD8 gau1_raster_scan_to_ctb[4][4] = { { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 } }; /*****************************************************************************/ /* Extern Fucntion declaration */ /*****************************************************************************/ extern ctb_boundary_attrs_t * get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt); typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)( search_node_t *ps_search_node, layer_ctxt_t *ps_curr_layer, layer_ctxt_t *ps_coarse_layer, S32 i4_pos_x, S32 i4_pos_y, S08 i1_ref_id, S32 i4_result_id); typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)( search_node_t *ps_search_node, layer_ctxt_t *ps_curr_layer, layer_ctxt_t *ps_coarse_layer, S32 i4_pos_x, S32 i4_pos_y, S32 i4_num_act_ref_l0, U08 u1_pred_dir, U08 u1_default_ref_id, S32 i4_result_id); /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ void ihevce_no_wt_copy( coarse_me_ctxt_t *ps_ctxt, layer_ctxt_t *ps_curr_layer, pu_t *ps_pu, UWORD8 *pu1_temp_pred, WORD32 temp_stride, WORD32 blk_x, WORD32 blk_y) { UWORD8 *pu1_ref; WORD32 ref_stride, ref_offset; WORD32 row, col, i4_tmp; ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1)); if(ps_pu->b2_pred_mode == PRED_L0) { WORD8 i1_ref_idx; i1_ref_idx = ps_pu->mv.i1_l0_ref_idx; pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx]; ref_stride = ps_curr_layer->i4_inp_stride; ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride; ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx; pu1_ref += ref_offset; for(row = 0; row < temp_stride; row++) { for(col = 0; col < temp_stride; col++) { i4_tmp = pu1_ref[col]; pu1_temp_pred[col] = CLIP_U8(i4_tmp); } pu1_ref += ref_stride; pu1_temp_pred += temp_stride; } } else { WORD8 i1_ref_idx; i1_ref_idx = ps_pu->mv.i1_l1_ref_idx; pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx]; ref_stride = ps_curr_layer->i4_inp_stride; ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride; ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx; pu1_ref += ref_offset; for(row = 0; row < temp_stride; row++) { for(col = 0; col < temp_stride; col++) { i4_tmp = pu1_ref[col]; pu1_temp_pred[col] = CLIP_U8(i4_tmp); } pu1_ref += ref_stride; pu1_temp_pred += temp_stride; } } } static WORD32 hme_add_clustered_mvs_as_merge_cands( cluster_data_t *ps_cluster_base, search_node_t *ps_merge_cand, range_prms_t **pps_range_prms, U08 *pu1_refid_to_pred_dir_list, WORD32 i4_num_clusters, U08 u1_pred_dir) { WORD32 i, j, k; WORD32 i4_num_cands_added = 0; WORD32 i4_num_mvs_in_cluster; for(i = 0; i < i4_num_clusters; i++) { cluster_data_t *ps_data = &ps_cluster_base[i]; if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id]) { i4_num_mvs_in_cluster = ps_data->num_mvs; for(j = 0; j < i4_num_mvs_in_cluster; j++) { ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx; ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy; ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id; CLIP_MV_WITHIN_RANGE( ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, pps_range_prms[ps_data->ref_id], 0, 0, 0); for(k = 0; k < i4_num_cands_added; k++) { if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) && (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) && (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id)) { break; } } if(k == i4_num_cands_added) { i4_num_cands_added++; } } } } return i4_num_cands_added; } static WORD32 hme_add_me_best_as_merge_cands( search_results_t **pps_child_data_array, inter_cu_results_t *ps_8x8cu_results, search_node_t *ps_merge_cand, range_prms_t **pps_range_prms, U08 *pu1_refid_to_pred_dir_list, S08 *pi1_past_list, S08 *pi1_future_list, BLK_SIZE_T e_blk_size, ME_QUALITY_PRESETS_T e_quality_preset, S32 i4_num_cands_added, U08 u1_pred_dir) { WORD32 i, j, k; WORD32 i4_max_cands_to_add; WORD32 i4_result_id = 0; ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size)); ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size)); ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size)); ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size)); switch(e_quality_preset) { case ME_PRISTINE_QUALITY: { i4_max_cands_to_add = MAX_MERGE_CANDTS; break; } case ME_HIGH_QUALITY: { /* All 4 children are split and each grandchild contributes an MV */ /* and 2 best results per grandchild */ i4_max_cands_to_add = 4 * 4 * 2; break; } case ME_MEDIUM_SPEED: { i4_max_cands_to_add = 4 * 2 * 2; break; } case ME_HIGH_SPEED: case ME_XTREME_SPEED: case ME_XTREME_SPEED_25: { i4_max_cands_to_add = 4 * 2 * 1; break; } } while(i4_result_id < 4) { for(i = 0; i < 4; i++) { inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results; inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2]; if(!pps_child_data_array[i]->u1_split_flag) { part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id]; if(ps_child_data->u1_num_best_results <= i4_result_id) { continue; } if(ps_data->as_pu_results->pu.b1_intra_flag) { continue; } for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++) { mv_t *ps_mv; S08 i1_ref_idx; pu_t *ps_pu = &ps_data->as_pu_results[j].pu; if(u1_pred_dir != ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode)) { continue; } if(u1_pred_dir) { ps_mv = &ps_pu->mv.s_l1_mv; i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx]; } else { ps_mv = &ps_pu->mv.s_l0_mv; i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx]; } if(-1 == i1_ref_idx) { continue; } ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx; ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy; ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx; CLIP_MV_WITHIN_RANGE( ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, pps_range_prms[i1_ref_idx], 0, 0, 0); for(k = 0; k < i4_num_cands_added; k++) { if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) && (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) && (ps_merge_cand[k].i1_ref_idx == i1_ref_idx)) { break; } } if(k == i4_num_cands_added) { i4_num_cands_added++; if(i4_max_cands_to_add <= i4_num_cands_added) { return i4_num_cands_added; } } } } else { for(j = 0; j < 4; j++) { mv_t *ps_mv; S08 i1_ref_idx; part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results; pu_t *ps_pu = &ps_data->as_pu_results[0].pu; ASSERT(ps_data->u1_part_type == PRT_2Nx2N); if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id) { continue; } if(ps_data->as_pu_results->pu.b1_intra_flag) { continue; } if(u1_pred_dir != ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode)) { continue; } if(u1_pred_dir) { ps_mv = &ps_pu->mv.s_l1_mv; i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx]; } else { ps_mv = &ps_pu->mv.s_l0_mv; i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx]; } ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx; ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy; ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx; CLIP_MV_WITHIN_RANGE( ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, pps_range_prms[i1_ref_idx], 0, 0, 0); for(k = 0; k < i4_num_cands_added; k++) { if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) && (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) && (ps_merge_cand[k].i1_ref_idx == i1_ref_idx)) { break; } } if(k == i4_num_cands_added) { i4_num_cands_added++; if(i4_max_cands_to_add <= i4_num_cands_added) { return i4_num_cands_added; } } } } } i4_result_id++; } return i4_num_cands_added; } WORD32 hme_add_cands_for_merge_eval( ctb_cluster_info_t *ps_cluster_info, search_results_t **pps_child_data_array, inter_cu_results_t *ps_8x8cu_results, range_prms_t **pps_range_prms, search_node_t *ps_merge_cand, U08 *pu1_refid_to_pred_dir_list, S08 *pi1_past_list, S08 *pi1_future_list, ME_QUALITY_PRESETS_T e_quality_preset, BLK_SIZE_T e_blk_size, U08 u1_pred_dir, U08 u1_blk_id) { WORD32 i4_num_cands_added = 0; if(ME_PRISTINE_QUALITY == e_quality_preset) { cluster_data_t *ps_cluster_primo; WORD32 i4_num_clusters; if(BLK_32x32 == e_blk_size) { ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data; i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters; } else { ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data; i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters; } i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands( ps_cluster_primo, ps_merge_cand, pps_range_prms, pu1_refid_to_pred_dir_list, i4_num_clusters, u1_pred_dir); } i4_num_cands_added = hme_add_me_best_as_merge_cands( pps_child_data_array, ps_8x8cu_results, ps_merge_cand, pps_range_prms, pu1_refid_to_pred_dir_list, pi1_past_list, pi1_future_list, e_blk_size, e_quality_preset, i4_num_cands_added, u1_pred_dir); return i4_num_cands_added; } /** ******************************************************************************** * @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms, * S08 i1_ref_idx, * S32 i4_best_part_type, * S32 i4_is_vert) * * @brief Given a target partition orientation in the merged CU, and the * partition type of most likely partition this fxn picks up * candidates from the 4 constituent CUs and does refinement search * to identify best results for the merge CU across active partitions * * @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of * these params, the search result structure is also derived and * updated during the search * * @param[in] i1_ref_idx : ID of the buffer within the search results to update. * Will be 0 if all refidx collapsed to one buf, else it'll be 0/1 * * @param[in] i4_best_part_type : partition type of potential partition in the * merged CU, -1 if the merge process has not yet been able to * determine this. * * @param[in] i4_is_vert : Whether target partition of merged CU is vertical * orientation or horizontal orientation. * * @return Number of merge candidates ******************************************************************************** */ WORD32 hme_pick_eval_merge_candts( hme_merge_prms_t *ps_merge_prms, hme_subpel_prms_t *ps_subpel_prms, S32 i4_search_idx, S32 i4_best_part_type, S32 i4_is_vert, wgt_pred_ctxt_t *ps_wt_inp_prms, S32 i4_frm_qstep, ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list, ihevce_me_optimised_function_list_t *ps_me_optimised_function_list) { S32 x_off, y_off; search_node_t *ps_search_node; S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; S32 i4_num_valid_parts; pred_ctxt_t *ps_pred_ctxt; search_node_t as_merge_unique_node[MAX_MERGE_CANDTS]; S32 num_unique_nodes_cu_merge = 0; search_results_t *ps_search_results = ps_merge_prms->ps_results_merge; CU_SIZE_T e_cu_size = ps_search_results->e_cu_size; S32 i4_part_mask = ps_search_results->i4_part_mask; search_results_t *aps_child_results[4]; layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt; S32 i4_ref_stride, i, j; result_upd_prms_t s_result_prms; BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size]; S32 i4_offset; /*************************************************************************/ /* Function pointer for SAD/SATD, array and prms structure to pass to */ /* This function */ /*************************************************************************/ PF_SAD_FXN_T pf_err_compute; S32 ai4_sad_grid[9][17]; err_prms_t s_err_prms; /*************************************************************************/ /* Allowed MV RANGE */ /*************************************************************************/ range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range; PF_INTERP_FXN_T pf_qpel_interp; PF_MV_COST_FXN pf_mv_cost_compute; WORD32 pred_lx; U08 *apu1_hpel_ref[4]; interp_prms_t s_interp_prms; S32 i4_interp_buf_id; S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off; S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off; /* Sanity checks */ ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32)); s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list; /* Initialize all the ptrs to child CUs for merge decision */ aps_child_results[0] = ps_merge_prms->ps_results_tl; aps_child_results[1] = ps_merge_prms->ps_results_tr; aps_child_results[2] = ps_merge_prms->ps_results_bl; aps_child_results[3] = ps_merge_prms->ps_results_br; num_unique_nodes_cu_merge = 0; pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset) { num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval( ps_merge_prms->ps_cluster_info, aps_child_results, ps_merge_prms->ps_8x8_cu_results, pps_range_prms, as_merge_unique_node, ps_search_results->pu1_is_past, ps_merge_prms->pi1_past_list, ps_merge_prms->pi1_future_list, ps_merge_prms->e_quality_preset, e_blk_size, i4_search_idx, (ps_merge_prms->ps_results_merge->u1_x_off >> 5) + (ps_merge_prms->ps_results_merge->u1_y_off >> 4)); } else { /*************************************************************************/ /* Populate the list of unique search nodes in the child CUs for merge */ /* evaluation */ /*************************************************************************/ for(i = 0; i < 4; i++) { search_node_t s_search_node; PART_TYPE_T e_part_type; PART_ID_T e_part_id; WORD32 part_num; search_results_t *ps_child = aps_child_results[i]; if(ps_child->ps_cu_results->u1_num_best_results) { if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) && (1 == ps_child->ps_cu_results->u1_num_best_results))) { e_part_type = (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type; ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS); /* Insert mvs of NxN partitions. */ for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)]; part_num++) { e_part_id = ge_part_type_to_part_id[e_part_type][part_num]; if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1) { s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id]; if(s_search_node.s_mv.i2_mvx != INTRA_MV) { CLIP_MV_WITHIN_RANGE( s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, pps_range_prms[s_search_node.i1_ref_idx], 0, 0, 0); INSERT_NEW_NODE_NOMAP( as_merge_unique_node, num_unique_nodes_cu_merge, s_search_node, 1); } } } } } else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)] .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) && (1 == ps_merge_prms->ps_results_grandchild[(i << 2)] .ps_cu_results->u1_num_best_results))) { search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)]; for(j = 0; j < 4; j++) { e_part_type = (PART_TYPE_T)ps_results_root[j] .ps_cu_results->ps_best_results[0] .u1_part_type; ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS); /* Insert mvs of NxN partitions. */ for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)]; part_num++) { e_part_id = ge_part_type_to_part_id[e_part_type][part_num]; if((ps_results_root[j] .aps_part_results[i4_search_idx][e_part_id] ->i1_ref_idx != -1) && (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu .b1_intra_flag)) { s_search_node = *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id]; if(s_search_node.s_mv.i2_mvx != INTRA_MV) { CLIP_MV_WITHIN_RANGE( s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, pps_range_prms[s_search_node.i1_ref_idx], 0, 0, 0); INSERT_NEW_NODE_NOMAP( as_merge_unique_node, num_unique_nodes_cu_merge, s_search_node, 1); } } } } } } } if(0 == num_unique_nodes_cu_merge) { return 0; } /*************************************************************************/ /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/ /* fixed through this subpel refinement for this partition. */ /* Note, we do not enable grid sads since one pt is evaluated per node */ /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */ /*************************************************************************/ i4_part_mask = ps_search_results->i4_part_mask; /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */ if(ps_subpel_prms->i4_use_satd) { if(BLK_32x32 == e_blk_size) { pf_err_compute = hme_evalsatd_pt_pu_32x32; } else { pf_err_compute = hme_evalsatd_pt_pu_64x64; } } else { pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM; } i4_ref_stride = ps_curr_layer->i4_rec_stride; x_off = ps_merge_prms->ps_results_tl->u1_x_off; y_off = ps_merge_prms->ps_results_tl->u1_y_off; i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride); /*************************************************************************/ /* This array stores the ids of the partitions whose */ /* SADs are updated. Since the partitions whose SADs are updated may not */ /* be in contiguous order, we supply another level of indirection. */ /*************************************************************************/ i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids); /* Initialize result params used for partition update */ s_result_prms.pf_mv_cost_compute = NULL; s_result_prms.ps_search_results = ps_search_results; s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids; s_result_prms.i1_ref_idx = i4_search_idx; s_result_prms.i4_part_mask = i4_part_mask; s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; s_result_prms.i4_grid_mask = 1; /* One time Initialization of error params used for SAD/SATD compute */ s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride; s_err_prms.i4_ref_stride = i4_ref_stride; s_err_prms.i4_part_mask = (ENABLE_2Nx2N); s_err_prms.i4_grid_mask = 1; s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; s_err_prms.i4_step = 1; /*************************************************************************/ /* One time preparation of non changing interpolation params. */ /*************************************************************************/ s_interp_prms.i4_ref_stride = i4_ref_stride; s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem; s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size]; i4_interp_buf_id = 0; pf_qpel_interp = ps_subpel_prms->pf_qpel_interp; /***************************************************************************/ /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */ /* results */ /***************************************************************************/ for(i = 0; i < num_unique_nodes_cu_merge; i++) { WORD8 i1_ref_idx; ps_search_node = &as_merge_unique_node[i]; /*********************************************************************/ /* Compute the base pointer for input, interpolated buffers */ /* The base pointers point as follows: */ /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */ /* To these, we need to add the offset of the current node */ /*********************************************************************/ i1_ref_idx = ps_search_node->i1_ref_idx; apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset; apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset; apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset; apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset; s_interp_prms.ppu1_ref = &apu1_hpel_ref[0]; pf_qpel_interp( &s_interp_prms, ps_search_node->s_mv.i2_mvx, ps_search_node->s_mv.i2_mvy, i4_interp_buf_id); pred_lx = i4_search_idx; ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; s_result_prms.u1_pred_lx = pred_lx; s_result_prms.ps_search_node_base = ps_search_node; s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride; s_err_prms.pu1_ref = s_interp_prms.pu1_final_out; s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride; /* Carry out the SAD/SATD. This call also does the TU RECURSION. Here the tu recursion logic is restricted with the size of the PU*/ pf_err_compute(&s_err_prms); if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier) { ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts( s_err_prms.pu1_ref, s_err_prms.i4_ref_stride, ai4_valid_part_ids, ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX, ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, s_err_prms.pi4_sad_grid, ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier, ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx], ps_wt_inp_prms->ai4_shift_val[i1_ref_idx], i4_num_valid_parts, ps_wt_inp_prms->wpred_log_wdc, (BLK_32x32 == e_blk_size) ? 32 : 64); } /* Update the mv's */ s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx; s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy; /* Update best results */ hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms); } /************************************************************************/ /* Update mv cost and total cost for each valid partition in the CU */ /************************************************************************/ for(i = 0; i < TOT_NUM_PARTS; i++) { if(i4_part_mask & (1 << i)) { WORD32 j; WORD32 i4_mv_cost; ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i]; for(j = 0; j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge); j++) { if(ps_search_node->i1_ref_idx != -1) { pred_lx = i4_search_idx; ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; /* Prediction context should now deal with qpel units */ HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL); ps_search_node->u1_subpel_done = 1; ps_search_node->u1_is_avail = 1; i4_mv_cost = pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL); ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad; ps_search_node->i4_mv_cost = i4_mv_cost; ps_search_node++; } } } } return num_unique_nodes_cu_merge; } #define CU_MERGE_MAX_INTRA_PARTS 4 /** ******************************************************************************** * @fn hme_try_merge_high_speed * * @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single entity or with partititons for high speed preset * * @param[in,out] hme_merge_prms_t: Params for CU merge * * @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT) ******************************************************************************** */ CU_MERGE_RESULT_T hme_try_merge_high_speed( me_ctxt_t *ps_thrd_ctxt, me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, hme_subpel_prms_t *ps_subpel_prms, hme_merge_prms_t *ps_merge_prms, inter_pu_results_t *ps_pu_results, pu_result_t *ps_pu_result) { search_results_t *ps_results_tl, *ps_results_tr; search_results_t *ps_results_bl, *ps_results_br; S32 i; S32 i4_search_idx; S32 i4_cost_parent; S32 intra_cu_size; ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17]; search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge; wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred; S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN; S32 is_vert = 0, i4_best_part_type = -1; S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */ S32 i4_cost_children = 0; S32 i4_frm_qstep = ps_ctxt->frm_qstep; S32 i4_num_merge_cands_evaluated = 0; U08 u1_x_off = ps_results_merge->u1_x_off; U08 u1_y_off = ps_results_merge->u1_y_off; S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5); ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list = ps_thrd_ctxt->ps_cmn_utils_optimised_function_list; ihevce_me_optimised_function_list_t *ps_me_optimised_function_list = ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list); ps_results_tl = ps_merge_prms->ps_results_tl; ps_results_tr = ps_merge_prms->ps_results_tr; ps_results_bl = ps_merge_prms->ps_results_bl; ps_results_br = ps_merge_prms->ps_results_br; if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED) { i4_part_mask &= ~ENABLE_AMP; } if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25) { i4_part_mask &= ~ENABLE_AMP; i4_part_mask &= ~ENABLE_SMP; } ps_merge_prms->i4_num_pred_dir_actual = 0; /*************************************************************************/ /* The logic for High speed CU merge goes as follows: */ /* */ /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */ /* exceed 7 */ /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */ /* are identical */ /* 3. Find the all unique mvs of best partitions of children CUs and */ /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */ /* best parent cost is lower than sum of the best children costs */ /* return CU_MERGE after seeding the best results else return CU_SPLIT*/ /* */ /*************************************************************************/ /* Count the number of best partitions in child CUs, early exit if > 7 */ if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) || (CU_32x32 == ps_results_merge->e_cu_size)) { S32 num_parts_in_32x32 = 0; WORD32 i4_part_type; if(ps_results_tl->u1_split_flag) { num_parts_in_32x32 += 4; #define COST_INTERCHANGE 0 i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost; } else { i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type; num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost; } if(ps_results_tr->u1_split_flag) { num_parts_in_32x32 += 4; i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost; } else { i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type; num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost; } if(ps_results_bl->u1_split_flag) { num_parts_in_32x32 += 4; i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost; } else { i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type; num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost; } if(ps_results_br->u1_split_flag) { num_parts_in_32x32 += 4; i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost + ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost; } else { i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type; num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost; } if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY)) { return CU_SPLIT; } if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) && (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)) { return CU_SPLIT; } } /* Accumulate intra percentage before merge for early CU_SPLIT decision */ /* Note : Each intra part represent a NxN unit of the children CUs */ /* This is essentially 1/16th of the CUsize under consideration for merge */ if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset) { if(CU_64x64 == ps_results_merge->e_cu_size) { i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable) ? 16 : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable; } else { switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4))) { case 0: { i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl ->u1_inter_eval_enable) ? 16 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root ->ps_child_node_tl->u1_intra_eval_enable); break; } case 1: { i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr ->u1_inter_eval_enable) ? 16 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root ->ps_child_node_tr->u1_intra_eval_enable); break; } case 2: { i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl ->u1_inter_eval_enable) ? 16 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root ->ps_child_node_bl->u1_intra_eval_enable); break; } case 3: { i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br ->u1_inter_eval_enable) ? 16 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root ->ps_child_node_br->u1_intra_eval_enable); break; } } } } else { for(i = 0; i < 4; i++) { search_results_t *ps_results = (i == 0) ? ps_results_tl : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br)); part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0]; if(ps_results->u1_split_flag) { U08 u1_x_off = ps_results->u1_x_off; U08 u1_y_off = ps_results->u1_y_off; U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >> 2; /* Special case to handle 8x8 CUs when 16x16 is split */ ASSERT(ps_results->e_cu_size == CU_16x16); ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0]; if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) i4_intra_parts += 1; ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0]; if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) i4_intra_parts += 1; ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0]; if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) i4_intra_parts += 1; ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0]; if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) i4_intra_parts += 1; } else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag) { i4_intra_parts += 4; } } } /* Determine the max intra CU size indicated by IPE */ intra_cu_size = CU_64x64; if(ps_cur_ipe_ctb->u1_split_flag) { intra_cu_size = CU_32x32; if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) { intra_cu_size = CU_16x16; } } if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) && (intra_cu_size < ps_results_merge->e_cu_size) && (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) || (i4_intra_parts == 16)) { S32 i4_merge_outcome; i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size) ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag && ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu) : (!ps_cur_ipe_ctb->u1_split_flag); i4_merge_outcome = i4_merge_outcome || (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset); i4_merge_outcome = i4_merge_outcome && !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY); if(i4_merge_outcome) { inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results; part_type_results_t *ps_best_result = ps_cu_results->ps_best_results; pu_t *ps_pu = &ps_best_result->as_pu_results->pu; ps_cu_results->u1_num_best_results = 1; ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size; ps_cu_results->u1_x_off = u1_x_off; ps_cu_results->u1_y_off = u1_y_off; ps_best_result->u1_part_type = PRT_2Nx2N; ps_best_result->ai4_tu_split_flag[0] = 0; ps_best_result->ai4_tu_split_flag[1] = 0; ps_best_result->ai4_tu_split_flag[2] = 0; ps_best_result->ai4_tu_split_flag[3] = 0; ps_best_result->i4_tot_cost = (CU_64x64 == ps_results_merge->e_cu_size) ? ps_cur_ipe_ctb->i4_best64x64_intra_cost : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id]; ps_pu->b1_intra_flag = 1; ps_pu->b4_pos_x = u1_x_off >> 2; ps_pu->b4_pos_y = u1_y_off >> 2; ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1; ps_pu->b4_ht = ps_pu->b4_wd; ps_pu->mv.i1_l0_ref_idx = -1; ps_pu->mv.i1_l1_ref_idx = -1; ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV; ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV; ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV; ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV; return CU_MERGED; } else { return CU_SPLIT; } } if(i4_intra_parts) { i4_part_mask = ENABLE_2Nx2N; } ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1; hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL); ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref; ps_merge_prms->i4_num_pred_dir_actual = 0; if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier) { S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; S32 i4_num_valid_parts; S32 i4_sigma_array_offset; i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids); /*********************************************************************************************************************************************/ /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */ /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */ /* increment as there will be 256 4x4 blocks in a CTB */ /*********************************************************************************************************************************************/ i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) + (ps_merge_prms->ps_results_merge->u1_y_off * 4); for(i = 0; i < i4_num_valid_parts; i++) { S32 i4_part_id = ai4_valid_part_ids[i]; hme_compute_final_sigma_of_pu_from_base_blocks( ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset, ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset, au8_final_src_sigmaX, au8_final_src_sigmaXSquared, (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64, 4, i4_part_id, 16); } ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX; ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared; } /*************************************************************************/ /* Loop through all ref idx and pick the merge candts and refine based */ /* on the active partitions. At this stage num ref will be 1 or 2 */ /*************************************************************************/ for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++) { S32 i4_cands; U08 u1_pred_dir = 0; if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled)) { u1_pred_dir = i4_search_idx; } else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0) { u1_pred_dir = 1; } else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0) { u1_pred_dir = 0; } else { ASSERT(0); } /* call the function to pick and evaluate the merge candts, given */ /* a ref id and a part mask. */ i4_cands = hme_pick_eval_merge_candts( ps_merge_prms, ps_subpel_prms, u1_pred_dir, i4_best_part_type, is_vert, ps_wt_inp_prms, i4_frm_qstep, ps_cmn_utils_optimised_function_list, ps_me_optimised_function_list); if(i4_cands) { ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] = u1_pred_dir; ps_merge_prms->i4_num_pred_dir_actual++; } i4_num_merge_cands_evaluated += i4_cands; } /* Call the decide_part_types function here */ /* Populate the new PU struct with the results post subpel refinement*/ if(i4_num_merge_cands_evaluated) { inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results; hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off; ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off; hme_populate_pus( ps_thrd_ctxt, ps_ctxt, ps_subpel_prms, ps_results_merge, ps_cu_results, ps_pu_results, ps_pu_result, ps_merge_prms->ps_inter_ctb_prms, &ps_ctxt->s_wt_pred, ps_merge_prms->ps_layer_ctxt, ps_merge_prms->au1_pred_dir_searched, ps_merge_prms->i4_num_pred_dir_actual); ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64); hme_decide_part_types( ps_cu_results, ps_pu_results, ps_merge_prms->ps_inter_ctb_prms, ps_ctxt, ps_cmn_utils_optimised_function_list, ps_me_optimised_function_list ); /*****************************************************************/ /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */ /*****************************************************************/ #if DISABLE_INTRA_IN_BPICS if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) && (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))) #endif { if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy)) { hme_insert_intra_nodes_post_bipred( ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); } } } else { return CU_SPLIT; } /* We check the best result of ref idx 0 and compare for parent vs child */ if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) || (CU_32x32 == ps_results_merge->e_cu_size)) { i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost; /*********************************************************************/ /* Add the cost of signaling the CU tree bits. */ /* Assuming parent is not split, then we signal 1 bit for this parent*/ /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */ /* So, 4*lambda is extra for children cost. :Lokesh */ /*********************************************************************/ { pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0]; i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift)); } if(i4_cost_parent < i4_cost_children) { return CU_MERGED; } return CU_SPLIT; } else { return CU_MERGED; } } #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \ { \ (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \ (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \ *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \ } /** ******************************************************************************** * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results, * layer_mv_t *ps_layer_mv, * S32 i4_search_blk_x, * S32 i4_search_blk_y, * mvbank_update_prms_t *ps_prms) * * @brief Updates the mv bank in case there is no further encodign to be done * * @param[in] ps_search_results: contains results for the block just searched * * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things * * @param[in] i4_search_blk_x : col num of blk being searched * * @param[in] i4_search_blk_y : row num of blk being searched * * @param[in] ps_prms : contains certain parameters which govern how updatedone * * @return None ******************************************************************************** */ void hme_update_mv_bank_noencode( search_results_t *ps_search_results, layer_mv_t *ps_layer_mv, S32 i4_search_blk_x, S32 i4_search_blk_y, mvbank_update_prms_t *ps_prms) { hme_mv_t *ps_mv; hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; S32 i4_blk_x, i4_blk_y, i4_offset; S32 i4_j, i4_ref_id; search_node_t *ps_search_node; search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1; search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3; search_node_t *ps_search_node_4x4_4; i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; /* Identify the correct offset in the mvbank and the reference id buf */ ps_mv = ps_layer_mv->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; /*************************************************************************/ /* Supposing we store the mvs in the same blk size as we searched (e.g. */ /* we searched 8x8 blks and store results for 8x8 blks), then we can */ /* do a straightforward single update of results. This will have a 1-1 */ /* correspondence. */ /*************************************************************************/ if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size) { for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++) { ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) { COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0); ps_mv++; pi1_ref_idx++; ps_search_node++; } } return; } /*************************************************************************/ /* Case where search blk size is 8x8, but we update 4x4 results. In this */ /* case, we need to have NxN partitions enabled in search. */ /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */ /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/ /*************************************************************************/ ASSERT(ps_layer_mv->e_blk_size == BLK_4x4); ASSERT(ps_prms->e_search_blk_size == BLK_8x8); ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN)); /*************************************************************************/ /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ /* hence the below check. */ /*************************************************************************/ ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1); ps_mv1 = ps_mv; ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); pi1_ref_idx1 = pi1_ref_idx; pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++) { ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL]; ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR]; ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL]; ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR]; COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0); ps_mv1++; pi1_ref_idx1++; ps_search_node_4x4_1++; COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0); ps_mv2++; pi1_ref_idx2++; ps_search_node_4x4_2++; COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0); ps_mv3++; pi1_ref_idx3++; ps_search_node_4x4_3++; COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0); ps_mv4++; pi1_ref_idx4++; ps_search_node_4x4_4++; if(ps_layer_mv->i4_num_mvs_per_ref > 1) { COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0); ps_mv1++; pi1_ref_idx1++; COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0); ps_mv2++; pi1_ref_idx2++; COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0); ps_mv3++; pi1_ref_idx3++; COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0); ps_mv4++; pi1_ref_idx4++; } for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) { COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0); ps_mv1++; pi1_ref_idx1++; ps_search_node_4x4_1++; COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0); ps_mv2++; pi1_ref_idx2++; ps_search_node_4x4_2++; COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0); ps_mv3++; pi1_ref_idx3++; ps_search_node_4x4_3++; COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0); ps_mv4++; pi1_ref_idx4++; ps_search_node_4x4_4++; } } } void hme_update_mv_bank_encode( search_results_t *ps_search_results, layer_mv_t *ps_layer_mv, S32 i4_search_blk_x, S32 i4_search_blk_y, mvbank_update_prms_t *ps_prms, U08 *pu1_pred_dir_searched, S32 i4_num_act_ref_l0) { hme_mv_t *ps_mv; hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; S32 i4_blk_x, i4_blk_y, i4_offset; S32 j, i, num_parts; search_node_t *ps_search_node_tl, *ps_search_node_tr; search_node_t *ps_search_node_bl, *ps_search_node_br; search_node_t s_zero_mv; WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type; i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; /* Identify the correct offset in the mvbank and the reference id buf */ ps_mv = ps_layer_mv->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; ASSERT(ps_layer_mv->e_blk_size == BLK_8x8); ASSERT(ps_prms->e_search_blk_size == BLK_16x16); /*************************************************************************/ /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ /* hence the below check. */ /*************************************************************************/ ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results); ps_mv1 = ps_mv; ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); pi1_ref_idx1 = pi1_ref_idx; pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); /* Initialize zero mv: default mv used for intra mvs */ s_zero_mv.s_mv.i2_mvx = 0; s_zero_mv.s_mv.i2_mvy = 0; s_zero_mv.i1_ref_idx = 0; if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) && (ps_search_results->i4_part_mask & ENABLE_NxN)) { i4_part_type = PRT_NxN; } for(i = 0; i < ps_prms->i4_num_ref; i++) { for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++) { WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0]; num_parts = gau1_num_parts_in_part_type[i4_part_type]; ps_search_node_tl = ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id]; if(num_parts == 1) { ps_search_node_tr = ps_search_node_tl; ps_search_node_bl = ps_search_node_tl; ps_search_node_br = ps_search_node_tl; } else if(num_parts == 2) { /* For vertically oriented partitions, tl, bl pt to same result */ /* For horizontally oriented partition, tl, tr pt to same result */ /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */ /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */ /* and right 2 8x8 have 12x16R partition */ if(gau1_is_vert_part[i4_part_type]) { ps_search_node_tr = ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; ps_search_node_bl = ps_search_node_tl; } else { ps_search_node_tr = ps_search_node_tl; ps_search_node_bl = ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; } ps_search_node_br = ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; } else { /* 4 unique results */ ps_search_node_tr = ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; ps_search_node_bl = ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2]; ps_search_node_br = ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3]; } if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV) ps_search_node_tl++; if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV) ps_search_node_tr++; if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV) ps_search_node_bl++; if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV) ps_search_node_br++; COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0); ps_mv1++; pi1_ref_idx1++; COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0); ps_mv2++; pi1_ref_idx2++; COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0); ps_mv3++; pi1_ref_idx3++; COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0); ps_mv4++; pi1_ref_idx4++; if(ps_prms->i4_num_results_to_store > 1) { ps_search_node_tl = &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1]; if(num_parts == 1) { ps_search_node_tr = ps_search_node_tl; ps_search_node_bl = ps_search_node_tl; ps_search_node_br = ps_search_node_tl; } else if(num_parts == 2) { /* For vertically oriented partitions, tl, bl pt to same result */ /* For horizontally oriented partition, tl, tr pt to same result */ /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */ /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */ /* and right 2 8x8 have 12x16R partition */ if(gau1_is_vert_part[i4_part_type]) { ps_search_node_tr = &ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; ps_search_node_bl = ps_search_node_tl; } else { ps_search_node_tr = ps_search_node_tl; ps_search_node_bl = &ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; } ps_search_node_br = &ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; } else { /* 4 unique results */ ps_search_node_tr = &ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; ps_search_node_bl = &ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1]; ps_search_node_br = &ps_search_results ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1]; } if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV) ps_search_node_tl++; if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV) ps_search_node_tr++; if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV) ps_search_node_bl++; if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV) ps_search_node_br++; COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0); ps_mv1++; pi1_ref_idx1++; COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0); ps_mv2++; pi1_ref_idx2++; COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0); ps_mv3++; pi1_ref_idx3++; COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0); ps_mv4++; pi1_ref_idx4++; } } } } /** ******************************************************************************** * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results, * layer_mv_t *ps_layer_mv, * S32 i4_search_blk_x, * S32 i4_search_blk_y, * mvbank_update_prms_t *ps_prms) * * @brief Updates the mv bank in case there is no further encodign to be done * * @param[in] ps_search_results: contains results for the block just searched * * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things * * @param[in] i4_search_blk_x : col num of blk being searched * * @param[in] i4_search_blk_y : row num of blk being searched * * @param[in] ps_prms : contains certain parameters which govern how updatedone * * @return None ******************************************************************************** */ void hme_update_mv_bank_in_l1_me( search_results_t *ps_search_results, layer_mv_t *ps_layer_mv, S32 i4_search_blk_x, S32 i4_search_blk_y, mvbank_update_prms_t *ps_prms) { hme_mv_t *ps_mv; hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; S32 i4_blk_x, i4_blk_y, i4_offset; S32 i4_j, i4_ref_id; search_node_t *ps_search_node; search_node_t *ps_search_node_8x8, *ps_search_node_4x4; i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; /* Identify the correct offset in the mvbank and the reference id buf */ ps_mv = ps_layer_mv->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; /*************************************************************************/ /* Supposing we store the mvs in the same blk size as we searched (e.g. */ /* we searched 8x8 blks and store results for 8x8 blks), then we can */ /* do a straightforward single update of results. This will have a 1-1 */ /* correspondence. */ /*************************************************************************/ if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size) { search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2]; hme_mv_t *ps_mv_l0_root = ps_mv; hme_mv_t *ps_mv_l1_root = ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); U32 u4_num_l0_results_updated = 0; U32 u4_num_l1_results_updated = 0; S08 *pi1_ref_idx_l0_root = pi1_ref_idx; S08 *pi1_ref_idx_l1_root = pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++) { U32 *pu4_num_results_updated; search_node_t **pps_result_nodes; U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id]; if(u1_pred_dir_of_cur_ref) { pu4_num_results_updated = &u4_num_l1_results_updated; pps_result_nodes = &aps_result_nodes_sorted[1][0]; } else { pu4_num_results_updated = &u4_num_l0_results_updated; pps_result_nodes = &aps_result_nodes_sorted[0][0]; } ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) { hme_add_new_node_to_a_sorted_array( &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0); ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id); (*pu4_num_results_updated)++; } } for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++) { COPY_SEARCH_RESULT( &ps_mv_l0_root[i4_j], &pi1_ref_idx_l0_root[i4_j], aps_result_nodes_sorted[0][i4_j], 0); } for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++) { COPY_SEARCH_RESULT( &ps_mv_l1_root[i4_j], &pi1_ref_idx_l1_root[i4_j], aps_result_nodes_sorted[1][i4_j], 0); } return; } /*************************************************************************/ /* Case where search blk size is 8x8, but we update 4x4 results. In this */ /* case, we need to have NxN partitions enabled in search. */ /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */ /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/ /*************************************************************************/ ASSERT(ps_layer_mv->e_blk_size == BLK_4x4); ASSERT(ps_prms->e_search_blk_size == BLK_8x8); ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN)); /*************************************************************************/ /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ /* hence the below check. */ /*************************************************************************/ ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1); ps_mv1 = ps_mv; ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); pi1_ref_idx1 = pi1_ref_idx; pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); { search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4]; U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4]; S32 i; hme_mv_t *ps_mv1_l0_root = ps_mv1; hme_mv_t *ps_mv1_l1_root = ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); hme_mv_t *ps_mv2_l0_root = ps_mv2; hme_mv_t *ps_mv2_l1_root = ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); hme_mv_t *ps_mv3_l0_root = ps_mv3; hme_mv_t *ps_mv3_l1_root = ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); hme_mv_t *ps_mv4_l0_root = ps_mv4; hme_mv_t *ps_mv4_l1_root = ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); U32 u4_num_l0_results_updated = 0; U32 u4_num_l1_results_updated = 0; S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1; S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2; S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3; S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4; S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); for(i = 0; i < 4; i++) { hme_mv_t *ps_mv_l0_root; hme_mv_t *ps_mv_l1_root; S08 *pi1_ref_idx_l0_root; S08 *pi1_ref_idx_l1_root; for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++) { U32 *pu4_num_results_updated; search_node_t **pps_result_nodes; U08 *pu1_cost_shifts_for_sorted_node; U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id]; if(u1_pred_dir_of_cur_ref) { pu4_num_results_updated = &u4_num_l1_results_updated; pps_result_nodes = &aps_result_nodes_sorted[1][0]; pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0]; } else { pu4_num_results_updated = &u4_num_l0_results_updated; pps_result_nodes = &aps_result_nodes_sorted[0][0]; pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0]; } ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; ps_search_node_4x4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i]; for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) { hme_add_new_node_to_a_sorted_array( &ps_search_node_4x4[i4_j], pps_result_nodes, pu1_cost_shifts_for_sorted_node, *pu4_num_results_updated, 0); (*pu4_num_results_updated)++; hme_add_new_node_to_a_sorted_array( &ps_search_node_8x8[i4_j], pps_result_nodes, pu1_cost_shifts_for_sorted_node, *pu4_num_results_updated, 2); (*pu4_num_results_updated)++; } } switch(i) { case 0: { ps_mv_l0_root = ps_mv1_l0_root; ps_mv_l1_root = ps_mv1_l1_root; pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root; pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root; break; } case 1: { ps_mv_l0_root = ps_mv2_l0_root; ps_mv_l1_root = ps_mv2_l1_root; pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root; pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root; break; } case 2: { ps_mv_l0_root = ps_mv3_l0_root; ps_mv_l1_root = ps_mv3_l1_root; pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root; pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root; break; } case 3: { ps_mv_l0_root = ps_mv4_l0_root; ps_mv_l1_root = ps_mv4_l1_root; pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root; pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root; break; } } u4_num_l0_results_updated = MIN((S32)u4_num_l0_results_updated, ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); u4_num_l1_results_updated = MIN((S32)u4_num_l1_results_updated, ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref); for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++) { COPY_SEARCH_RESULT( &ps_mv_l0_root[i4_j], &pi1_ref_idx_l0_root[i4_j], aps_result_nodes_sorted[0][i4_j], 0); } for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++) { COPY_SEARCH_RESULT( &ps_mv_l1_root[i4_j], &pi1_ref_idx_l1_root[i4_j], aps_result_nodes_sorted[1][i4_j], 0); } } } } /** ****************************************************************************** * @brief Scales motion vector component projecte from a diff layer in same * picture (so no ref id related delta poc scaling required) ****************************************************************************** */ #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \ ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p)) /** ******************************************************************************** * @fn hme_project_coloc_candt(search_node_t *ps_search_node, * layer_ctxt_t *ps_curr_layer, * layer_ctxt_t *ps_coarse_layer, * S32 i4_pos_x, * S32 i4_pos_y, * S08 i1_ref_id, * S08 i1_result_id) * * @brief From a coarser layer, projects a candidated situated at "colocated" * position in the picture (e.g. given x, y it will be x/2, y/2 dyadic * * @param[out] ps_search_node : contains the projected result * * @param[in] ps_curr_layer : current layer context * * @param[in] ps_coarse_layer : coarser layer context * * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer) * * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer) * * @param[in] i1_ref_id : reference id for which the candidate required * * @param[in] i4_result_id : result id for which the candidate required * (0 : best result, 1 : next best) * * @return None ******************************************************************************** */ void hme_project_coloc_candt( search_node_t *ps_search_node, layer_ctxt_t *ps_curr_layer, layer_ctxt_t *ps_coarse_layer, S32 i4_pos_x, S32 i4_pos_y, S08 i1_ref_id, S32 i4_result_id) { S32 wd_c, ht_c, wd_p, ht_p; S32 blksize_p, blk_x, blk_y, i4_offset; layer_mv_t *ps_layer_mvbank; hme_mv_t *ps_mv; S08 *pi1_ref_idx; /* Width and ht of current and prev layers */ wd_c = ps_curr_layer->i4_wd; ht_c = ps_curr_layer->i4_ht; wd_p = ps_coarse_layer->i4_wd; ht_p = ps_coarse_layer->i4_ht; ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; /* Safety check to avoid uninitialized access across temporal layers */ i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); /* Project the positions to prev layer */ /* TODO: convert these to scale factors at pic level */ blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p); blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p); /* Pick up the mvs from the location */ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); ps_mv = ps_layer_mvbank->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p); ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p); ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; ps_search_node->u1_subpel_done = 0; if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) { ps_search_node->i1_ref_idx = i1_ref_id; ps_search_node->s_mv.i2_mvx = 0; ps_search_node->s_mv.i2_mvy = 0; } } /** ******************************************************************************** * @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node, * layer_ctxt_t *ps_curr_layer, * layer_ctxt_t *ps_coarse_layer, * S32 i4_pos_x, * S32 i4_pos_y, * S08 i1_ref_id, * S08 i1_result_id) * * @brief From a coarser layer, projects a candidated situated at "colocated" * position in the picture when the ratios are dyadic * * @param[out] ps_search_node : contains the projected result * * @param[in] ps_curr_layer : current layer context * * @param[in] ps_coarse_layer : coarser layer context * * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer) * * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer) * * @param[in] i1_ref_id : reference id for which the candidate required * * @param[in] i4_result_id : result id for which the candidate required * (0 : best result, 1 : next best) * * @return None ******************************************************************************** */ void hme_project_coloc_candt_dyadic( search_node_t *ps_search_node, layer_ctxt_t *ps_curr_layer, layer_ctxt_t *ps_coarse_layer, S32 i4_pos_x, S32 i4_pos_y, S08 i1_ref_id, S32 i4_result_id) { S32 wd_c, ht_c, wd_p, ht_p; S32 blksize_p, blk_x, blk_y, i4_offset; layer_mv_t *ps_layer_mvbank; hme_mv_t *ps_mv; S08 *pi1_ref_idx; /* Width and ht of current and prev layers */ wd_c = ps_curr_layer->i4_wd; ht_c = ps_curr_layer->i4_ht; wd_p = ps_coarse_layer->i4_wd; ht_p = ps_coarse_layer->i4_ht; ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; /* blksize_p = log2(wd) + 1 */ blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; /* ASSERT for valid sizes */ ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); /* Safety check to avoid uninitialized access across temporal layers */ i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); /* Project the positions to prev layer */ /* TODO: convert these to scale factors at pic level */ blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p); blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p); /* Pick up the mvs from the location */ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); ps_mv = ps_layer_mvbank->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1; ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1; ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) { ps_search_node->i1_ref_idx = i1_ref_id; ps_search_node->s_mv.i2_mvx = 0; ps_search_node->s_mv.i2_mvy = 0; } } void hme_project_coloc_candt_dyadic_implicit( search_node_t *ps_search_node, layer_ctxt_t *ps_curr_layer, layer_ctxt_t *ps_coarse_layer, S32 i4_pos_x, S32 i4_pos_y, S32 i4_num_act_ref_l0, U08 u1_pred_dir, U08 u1_default_ref_id, S32 i4_result_id) { S32 wd_c, ht_c, wd_p, ht_p; S32 blksize_p, blk_x, blk_y, i4_offset; layer_mv_t *ps_layer_mvbank; hme_mv_t *ps_mv; S08 *pi1_ref_idx; /* Width and ht of current and prev layers */ wd_c = ps_curr_layer->i4_wd; ht_c = ps_curr_layer->i4_ht; wd_p = ps_coarse_layer->i4_wd; ht_p = ps_coarse_layer->i4_ht; ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; /* ASSERT for valid sizes */ ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); /* Safety check to avoid uninitialized access across temporal layers */ i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); /* Project the positions to prev layer */ /* TODO: convert these to scale factors at pic level */ blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p); blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p); /* Pick up the mvs from the location */ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); ps_mv = ps_layer_mvbank->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; if(u1_pred_dir == 1) { ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); } ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1; ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1; ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) { ps_search_node->i1_ref_idx = u1_default_ref_id; ps_search_node->s_mv.i2_mvx = 0; ps_search_node->s_mv.i2_mvy = 0; } } #define SCALE_RANGE_PRMS(prm1, prm2, shift) \ { \ prm1.i2_min_x = prm2.i2_min_x << shift; \ prm1.i2_max_x = prm2.i2_max_x << shift; \ prm1.i2_min_y = prm2.i2_min_y << shift; \ prm1.i2_max_y = prm2.i2_max_y << shift; \ } #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \ { \ prm1->i2_min_x = prm2->i2_min_x << shift; \ prm1->i2_max_x = prm2->i2_max_x << shift; \ prm1->i2_min_y = prm2->i2_min_y << shift; \ prm1->i2_max_y = prm2->i2_max_y << shift; \ } /** ******************************************************************************** * @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt, * refine_layer_prms_t *ps_refine_prms) * * @brief Frame init of refinemnet layers in ME * * @param[in,out] ps_ctxt: ME Handle * * @param[in] ps_refine_prms : refinement layer prms * * @return None ******************************************************************************** */ void hme_refine_frm_init( layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer) { /* local variables */ BLK_SIZE_T e_result_blk_size = BLK_8x8; S32 i4_num_ref_fpel, i4_num_ref_prev_layer; i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; if(ps_refine_prms->explicit_ref) { i4_num_ref_fpel = i4_num_ref_prev_layer; } else { i4_num_ref_fpel = 2; } if(ps_refine_prms->i4_enable_4x4_part) { e_result_blk_size = BLK_4x4; } i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer); hme_init_mv_bank( ps_curr_layer, e_result_blk_size, i4_num_ref_fpel, ps_refine_prms->i4_num_mvbank_results, ps_refine_prms->i4_layer_id > 0 ? 0 : 1); } #if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION /** ******************************************************************************** * @fn void hme_init_clusters_16x16 * ( * cluster_16x16_blk_t *ps_cluster_blk_16x16 * ) * * @brief Intialisations for the structs used in clustering algorithm * * @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters * of 16x16 block * * @return None ******************************************************************************** */ static __inline void hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled) { S32 i; ps_cluster_blk_16x16->num_clusters = 0; ps_cluster_blk_16x16->intra_mv_area = 0; ps_cluster_blk_16x16->best_inter_cost = 0; for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++) { ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid = bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16; ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0; ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0; ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0; } for(i = 0; i < MAX_NUM_REF; i++) { ps_cluster_blk_16x16->au1_num_clusters[i] = 0; } } /** ******************************************************************************** * @fn void hme_init_clusters_32x32 * ( * cluster_32x32_blk_t *ps_cluster_blk_32x32 * ) * * @brief Intialisations for the structs used in clustering algorithm * * @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters * of 32x32 block * * @return None ******************************************************************************** */ static __inline void hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled) { S32 i; ps_cluster_blk_32x32->num_clusters = 0; ps_cluster_blk_32x32->intra_mv_area = 0; ps_cluster_blk_32x32->best_alt_ref = -1; ps_cluster_blk_32x32->best_uni_ref = -1; ps_cluster_blk_32x32->best_inter_cost = 0; ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0; for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++) { ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid = bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32; ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0; ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0; ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0; } for(i = 0; i < MAX_NUM_REF; i++) { ps_cluster_blk_32x32->au1_num_clusters[i] = 0; } } /** ******************************************************************************** * @fn void hme_init_clusters_64x64 * ( * cluster_64x64_blk_t *ps_cluster_blk_64x64 * ) * * @brief Intialisations for the structs used in clustering algorithm * * @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters * of 64x64 block * * @return None ******************************************************************************** */ static __inline void hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled) { S32 i; ps_cluster_blk_64x64->num_clusters = 0; ps_cluster_blk_64x64->intra_mv_area = 0; ps_cluster_blk_64x64->best_alt_ref = -1; ps_cluster_blk_64x64->best_uni_ref = -1; ps_cluster_blk_64x64->best_inter_cost = 0; for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++) { ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid = bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64; ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0; ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0; ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0; } for(i = 0; i < MAX_NUM_REF; i++) { ps_cluster_blk_64x64->au1_num_clusters[i] = 0; } } /** ******************************************************************************** * @fn void hme_sort_and_assign_top_ref_ids_areawise * ( * ctb_cluster_info_t *ps_ctb_cluster_info * ) * * @brief Finds best_uni_ref and best_alt_ref * * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data * * @param[in] bidir_enabled: flag that indicates whether or not bi-pred is * enabled * * @param[in] block_width: width of the block in pels * * @param[in] e_cu_pos: position of the block within the CTB * * @return None ******************************************************************************** */ void hme_sort_and_assign_top_ref_ids_areawise( ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos) { cluster_32x32_blk_t *ps_32x32 = NULL; cluster_64x64_blk_t *ps_64x64 = NULL; cluster_data_t *ps_data; S32 j, k; S32 ai4_uni_area[MAX_NUM_REF]; S32 ai4_bi_area[MAX_NUM_REF]; S32 ai4_ref_id_found[MAX_NUM_REF]; S32 ai4_ref_id[MAX_NUM_REF]; S32 best_uni_ref = -1, best_alt_ref = -1; S32 num_clusters; S32 num_ref = 0; S32 num_clusters_evaluated = 0; S32 is_cur_blk_valid; if(32 == block_width) { is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0; ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos]; num_clusters = ps_32x32->num_clusters; ps_data = &ps_32x32->as_cluster_data[0]; } else { is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf); ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk; num_clusters = ps_64x64->num_clusters; ps_data = &ps_64x64->as_cluster_data[0]; } #if !ENABLE_4CTB_EVALUATION if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)) { return; } #endif if(num_clusters == 0) { return; } else if(!is_cur_blk_valid) { return; } memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF); memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF); memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF); memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF); for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++) { S32 ref_id; if(!ps_data->is_valid_cluster) { continue; } ref_id = ps_data->ref_id; num_clusters_evaluated++; ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area; ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area; if(!ai4_ref_id_found[ref_id]) { ai4_ref_id[ref_id] = ref_id; ai4_ref_id_found[ref_id] = 1; num_ref++; } } { S32 ai4_ref_id_temp[MAX_NUM_REF]; memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF); for(k = 1; k < MAX_NUM_REF; k++) { if(ai4_uni_area[k] > ai4_uni_area[0]) { SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32); SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32); } } best_uni_ref = ai4_ref_id_temp[0]; } if(bidir_enabled) { for(k = 1; k < MAX_NUM_REF; k++) { if(ai4_bi_area[k] > ai4_bi_area[0]) { SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32); SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32); } } if(!ai4_bi_area[0]) { best_alt_ref = -1; if(32 == block_width) { SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref); } else { SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref); } return; } if(best_uni_ref == ai4_ref_id[0]) { for(k = 2; k < MAX_NUM_REF; k++) { if(ai4_bi_area[k] > ai4_bi_area[1]) { SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32); SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32); } } best_alt_ref = ai4_ref_id[1]; } else { best_alt_ref = ai4_ref_id[0]; } } if(32 == block_width) { SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref); } else { SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref); } } /** ******************************************************************************** * @fn void hme_find_top_ref_ids * ( * ctb_cluster_info_t *ps_ctb_cluster_info * ) * * @brief Finds best_uni_ref and best_alt_ref * * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data * * @return None ******************************************************************************** */ void hme_find_top_ref_ids( ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width) { S32 i; if(32 == block_width) { for(i = 0; i < 4; i++) { hme_sort_and_assign_top_ref_ids_areawise( ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i); } } else if(64 == block_width) { hme_sort_and_assign_top_ref_ids_areawise( ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA); } } /** ******************************************************************************** * @fn void hme_boot_out_outlier * ( * ctb_cluster_info_t *ps_ctb_cluster_info * ) * * @brief Removes outlier clusters before CU tree population * * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data * * @return None ******************************************************************************** */ void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width) { cluster_32x32_blk_t *ps_32x32; S32 i; cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0]; S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold; if(32 == blk_width) { /* 32x32 clusters */ for(i = 0; i < 4; i++) { ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX) { BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold); } } } else if(64 == blk_width) { /* 64x64 clusters */ if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX) { BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold); } } } /** ******************************************************************************** * @fn void hme_update_cluster_attributes * ( * cluster_data_t *ps_cluster_data, * S32 mvx, * S32 mvy, * PART_ID_T e_part_id * ) * * @brief Implementation fo the clustering algorithm * * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct * * @param[in] mvx : x co-ordinate of the motion vector * * @param[in] mvy : y co-ordinate of the motion vector * * @param[in] ref_idx : ref_id of the motion vector * * @param[in] e_part_id : partition id of the motion vector * * @return None ******************************************************************************** */ static __inline void hme_update_cluster_attributes( cluster_data_t *ps_cluster_data, S32 mvx, S32 mvy, S32 mvdx, S32 mvdy, S32 ref_id, S32 sdi, U08 is_part_of_bi, PART_ID_T e_part_id) { LWORD64 i8_mvx_sum_q8; LWORD64 i8_mvy_sum_q8; S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; if((mvdx > 0) && (ps_cluster_data->min_x > mvx)) { ps_cluster_data->min_x = mvx; } else if((mvdx < 0) && (ps_cluster_data->max_x < mvx)) { ps_cluster_data->max_x = mvx; } if((mvdy > 0) && (ps_cluster_data->min_y > mvy)) { ps_cluster_data->min_y = mvy; } else if((mvdy < 0) && (ps_cluster_data->max_y < mvy)) { ps_cluster_data->max_y = mvy; } { S32 num_mvs = ps_cluster_data->num_mvs; ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id]; ps_cluster_data->as_mv[num_mvs].mvx = mvx; ps_cluster_data->as_mv[num_mvs].mvy = mvy; /***************************/ ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi; ps_cluster_data->as_mv[num_mvs].sdi = sdi; /**************************/ } /* Updation of centroid */ { i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8); i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8); ps_cluster_data->num_mvs++; ps_cluster_data->s_centroid.i4_pos_x_q8 = (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs); ps_cluster_data->s_centroid.i4_pos_y_q8 = (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs); } ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id]; if(is_part_of_bi) { ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id]; } else { ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id]; } } /** ******************************************************************************** * @fn void hme_try_cluster_merge * ( * cluster_data_t *ps_cluster_data, * S32 *pi4_num_clusters, * S32 idx_of_updated_cluster * ) * * @brief Implementation fo the clustering algorithm * * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct * * @param[in/out] pi4_num_clusters : pointer to number of clusters * * @param[in] idx_of_updated_cluster : index of the cluster most recently * updated * * @return Nothing ******************************************************************************** */ void hme_try_cluster_merge( cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster) { centroid_t *ps_centroid; S32 cur_pos_x_q8; S32 cur_pos_y_q8; S32 i; S32 max_dist_from_centroid; S32 mvd; S32 mvdx_q8; S32 mvdx; S32 mvdy_q8; S32 mvdy; S32 num_clusters, num_clusters_evaluated; S32 other_pos_x_q8; S32 other_pos_y_q8; cluster_data_t *ps_root = ps_cluster_data; cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster]; centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid; /* Merge is superfluous if num_clusters is 1 */ if(*pu1_num_clusters == 1) { return; } cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8; cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8; max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid; num_clusters = *pu1_num_clusters; num_clusters_evaluated = 0; for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++) { if(!ps_cluster_data->is_valid_cluster) { continue; } if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster)) { num_clusters_evaluated++; continue; } ps_centroid = &ps_cluster_data->s_centroid; other_pos_x_q8 = ps_centroid->i4_pos_x_q8; other_pos_y_q8 = ps_centroid->i4_pos_y_q8; mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8); mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = ABS(mvdx) + ABS(mvdy); if(mvd <= (max_dist_from_centroid >> 1)) { /* 0 => no updates */ /* 1 => min updated */ /* 2 => max updated */ S32 minmax_x_update_id; S32 minmax_y_update_id; LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs; LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs; LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs; LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs; (*pu1_num_clusters)--; ps_cluster_data->is_valid_cluster = 0; memcpy( &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs], ps_cluster_data->as_mv, sizeof(mv_data_t) * ps_cluster_data->num_mvs); ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs; ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels; ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; i8_mv_x_sum_self += i8_mv_x_sum_cousin; i8_mv_y_sum_self += i8_mv_y_sum_cousin; ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs); ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs); minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x) ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2) : 1; minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y) ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2) : 1; /* Updation of centroid spread */ switch(minmax_x_update_id + (minmax_y_update_id << 2)) { case 1: { S32 mvd, mvd_q8; ps_cur_cluster->min_x = ps_cluster_data->min_x; mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (max_dist_from_centroid)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 2: { S32 mvd, mvd_q8; ps_cur_cluster->max_x = ps_cluster_data->max_x; mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (max_dist_from_centroid)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 4: { S32 mvd, mvd_q8; ps_cur_cluster->min_y = ps_cluster_data->min_y; mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (max_dist_from_centroid)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 5: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; ps_cur_cluster->min_x = ps_cluster_data->min_x; ps_cur_cluster->min_y = ps_cluster_data->min_y; if(mvd > max_dist_from_centroid) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 6: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; ps_cur_cluster->max_x = ps_cluster_data->max_x; ps_cur_cluster->min_y = ps_cluster_data->min_y; if(mvd > max_dist_from_centroid) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 8: { S32 mvd, mvd_q8; ps_cur_cluster->max_y = ps_cluster_data->max_y; mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (max_dist_from_centroid)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 9: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; ps_cur_cluster->min_x = ps_cluster_data->min_x; ps_cur_cluster->max_y = ps_cluster_data->max_y; if(mvd > max_dist_from_centroid) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 10: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; ps_cur_cluster->max_x = ps_cluster_data->max_x; ps_cur_cluster->max_y = ps_cluster_data->max_y; if(mvd > ps_cluster_data->max_dist_from_centroid) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } default: { break; } } hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster); return; } num_clusters_evaluated++; } } /** ******************************************************************************** * @fn void hme_find_and_update_clusters * ( * cluster_data_t *ps_cluster_data, * S32 *pi4_num_clusters, * S32 mvx, * S32 mvy, * S32 ref_idx, * PART_ID_T e_part_id * ) * * @brief Implementation fo the clustering algorithm * * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct * * @param[in/out] pi4_num_clusters : pointer to number of clusters * * @param[in] mvx : x co-ordinate of the motion vector * * @param[in] mvy : y co-ordinate of the motion vector * * @param[in] ref_idx : ref_id of the motion vector * * @param[in] e_part_id : partition id of the motion vector * * @return None ******************************************************************************** */ void hme_find_and_update_clusters( cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S16 i2_mv_x, S16 i2_mv_y, U08 i1_ref_idx, S32 i4_sdi, PART_ID_T e_part_id, U08 is_part_of_bi) { S32 i; S32 min_mvd_cluster_id = -1; S32 mvd, mvd_limit, mvdx, mvdy; S32 min_mvdx, min_mvdy; S32 min_mvd = MAX_32BIT_VAL; S32 num_clusters = *pu1_num_clusters; S32 mvx = i2_mv_x; S32 mvy = i2_mv_y; S32 ref_idx = i1_ref_idx; S32 sdi = i4_sdi; S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16; if(num_clusters == 0) { cluster_data_t *ps_data = &ps_cluster_data[num_clusters]; ps_data->num_mvs = 1; ps_data->s_centroid.i4_pos_x_q8 = mvx << 8; ps_data->s_centroid.i4_pos_y_q8 = mvy << 8; ps_data->ref_id = ref_idx; ps_data->area_in_pixels = gai4_partition_area[e_part_id]; ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id]; ps_data->as_mv[0].mvx = mvx; ps_data->as_mv[0].mvy = mvy; /***************************/ ps_data->as_mv[0].is_uni = !is_part_of_bi; ps_data->as_mv[0].sdi = sdi; if(is_part_of_bi) { ps_data->bi_mv_pixel_area += ps_data->area_in_pixels; } else { ps_data->uni_mv_pixel_area += ps_data->area_in_pixels; } /**************************/ ps_data->max_x = mvx; ps_data->min_x = mvx; ps_data->max_y = mvy; ps_data->min_y = mvy; ps_data->is_valid_cluster = 1; *pu1_num_clusters = 1; } else { S32 num_clusters_evaluated = 0; for(i = 0; num_clusters_evaluated < num_clusters; i++) { cluster_data_t *ps_data = &ps_cluster_data[i]; centroid_t *ps_centroid; S32 mvx_q8; S32 mvy_q8; S32 posx_q8; S32 posy_q8; S32 mvdx_q8; S32 mvdy_q8; /* In anticipation of a possible merging of clusters */ if(ps_data->is_valid_cluster == 0) { new_cluster_idx = i; continue; } if(ref_idx != ps_data->ref_id) { num_clusters_evaluated++; continue; } ps_centroid = &ps_data->s_centroid; posx_q8 = ps_centroid->i4_pos_x_q8; posy_q8 = ps_centroid->i4_pos_y_q8; mvx_q8 = mvx << 8; mvy_q8 = mvy << 8; mvdx_q8 = posx_q8 - mvx_q8; mvdy_q8 = posy_q8 - mvy_q8; mvdx = (((mvdx_q8 + (1 << 7)) >> 8)); mvdy = (((mvdy_q8 + (1 << 7)) >> 8)); mvd = ABS(mvdx) + ABS(mvdy); if(mvd < min_mvd) { min_mvd = mvd; min_mvdx = mvdx; min_mvdy = mvdy; min_mvd_cluster_id = i; } num_clusters_evaluated++; } mvd_limit = (min_mvd_cluster_id == -1) ? ps_cluster_data[0].max_dist_from_centroid : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid; /* This condition implies that min_mvd has been updated */ if(min_mvd <= mvd_limit) { hme_update_cluster_attributes( &ps_cluster_data[min_mvd_cluster_id], mvx, mvy, min_mvdx, min_mvdy, ref_idx, sdi, is_part_of_bi, e_part_id); if(PRT_NxN == ge_part_id_to_part_type[e_part_id]) { hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id); } } else { cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16) ? &ps_cluster_data[num_clusters] : &ps_cluster_data[new_cluster_idx]; ps_data->num_mvs = 1; ps_data->s_centroid.i4_pos_x_q8 = mvx << 8; ps_data->s_centroid.i4_pos_y_q8 = mvy << 8; ps_data->ref_id = ref_idx; ps_data->area_in_pixels = gai4_partition_area[e_part_id]; ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id]; ps_data->as_mv[0].mvx = mvx; ps_data->as_mv[0].mvy = mvy; /***************************/ ps_data->as_mv[0].is_uni = !is_part_of_bi; ps_data->as_mv[0].sdi = sdi; if(is_part_of_bi) { ps_data->bi_mv_pixel_area += ps_data->area_in_pixels; } else { ps_data->uni_mv_pixel_area += ps_data->area_in_pixels; } /**************************/ ps_data->max_x = mvx; ps_data->min_x = mvx; ps_data->max_y = mvy; ps_data->min_y = mvy; ps_data->is_valid_cluster = 1; num_clusters++; *pu1_num_clusters = num_clusters; } } } /** ******************************************************************************** * @fn void hme_update_32x32_cluster_attributes * ( * cluster_32x32_blk_t *ps_blk_32x32, * cluster_data_t *ps_cluster_data * ) * * @brief Updates attributes for 32x32 clusters based on the attributes of * the constituent 16x16 clusters * * @param[out] ps_blk_32x32: structure containing 32x32 block results * * @param[in] ps_cluster_data : structure containing 16x16 block results * * @return None ******************************************************************************** */ void hme_update_32x32_cluster_attributes( cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data) { cluster_data_t *ps_cur_cluster_32; S32 i; S32 mvd_limit; S32 num_clusters = ps_blk_32x32->num_clusters; if(0 == num_clusters) { ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0]; ps_blk_32x32->num_clusters++; ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++; ps_cur_cluster_32->is_valid_cluster = 1; ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels; ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; memcpy( ps_cur_cluster_32->as_mv, ps_cluster_data->as_mv, sizeof(mv_data_t) * ps_cluster_data->num_mvs); ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs; ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id; ps_cur_cluster_32->max_x = ps_cluster_data->max_x; ps_cur_cluster_32->max_y = ps_cluster_data->max_y; ps_cur_cluster_32->min_x = ps_cluster_data->min_x; ps_cur_cluster_32->min_y = ps_cluster_data->min_y; ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid; } else { centroid_t *ps_centroid; S32 cur_posx_q8, cur_posy_q8; S32 min_mvd_cluster_id = -1; S32 mvd; S32 mvdx; S32 mvdy; S32 mvdx_min; S32 mvdy_min; S32 mvdx_q8; S32 mvdy_q8; S32 num_clusters_evaluated = 0; S32 mvd_min = MAX_32BIT_VAL; S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; for(i = 0; num_clusters_evaluated < num_clusters; i++) { ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i]; if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id) { num_clusters_evaluated++; continue; } if(!ps_cluster_data->is_valid_cluster) { continue; } num_clusters_evaluated++; ps_centroid = &ps_cur_cluster_32->s_centroid; cur_posx_q8 = ps_centroid->i4_pos_x_q8; cur_posy_q8 = ps_centroid->i4_pos_y_q8; mvdx_q8 = cur_posx_q8 - mvx_inp_q8; mvdy_q8 = cur_posy_q8 - mvy_inp_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = ABS(mvdx) + ABS(mvdy); if(mvd < mvd_min) { mvd_min = mvd; mvdx_min = mvdx; mvdy_min = mvdy; min_mvd_cluster_id = i; } } ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0]; mvd_limit = (min_mvd_cluster_id == -1) ? ps_cur_cluster_32[0].max_dist_from_centroid : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid; if(mvd_min <= mvd_limit) { LWORD64 i8_updated_posx; LWORD64 i8_updated_posy; WORD32 minmax_updated_x = 0; WORD32 minmax_updated_y = 0; ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id]; ps_centroid = &ps_cur_cluster_32->s_centroid; ps_cur_cluster_32->is_valid_cluster = 1; ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels; ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; memcpy( &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs], ps_cluster_data->as_mv, sizeof(mv_data_t) * ps_cluster_data->num_mvs); if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8)) { ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8; minmax_updated_x = 1; } else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8)) { ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8; minmax_updated_x = 2; } if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8)) { ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8; minmax_updated_y = 1; } else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8)) { ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8; minmax_updated_y = 2; } switch((minmax_updated_y << 2) + minmax_updated_x) { case 1: { S32 mvd, mvd_q8; mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } case 2: { S32 mvd, mvd_q8; mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } case 4: { S32 mvd, mvd_q8; mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } case 5: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } case 6: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } case 8: { S32 mvd, mvd_q8; mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } case 9: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } case 10: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > ps_cur_cluster_32->max_dist_from_centroid) { ps_cur_cluster_32->max_dist_from_centroid = mvd; } break; } default: { break; } } i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) + ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs); i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) + ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs); ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs; ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs); ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs); } else if(num_clusters < MAX_NUM_CLUSTERS_32x32) { ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters]; ps_blk_32x32->num_clusters++; ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++; ps_cur_cluster_32->is_valid_cluster = 1; ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels; ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; memcpy( ps_cur_cluster_32->as_mv, ps_cluster_data->as_mv, sizeof(mv_data_t) * ps_cluster_data->num_mvs); ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs; ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id; ps_cur_cluster_32->max_x = ps_cluster_data->max_x; ps_cur_cluster_32->max_y = ps_cluster_data->max_y; ps_cur_cluster_32->min_x = ps_cluster_data->min_x; ps_cur_cluster_32->min_y = ps_cluster_data->min_y; ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid; } } } /** ******************************************************************************** * @fn void hme_update_64x64_cluster_attributes * ( * cluster_64x64_blk_t *ps_blk_32x32, * cluster_data_t *ps_cluster_data * ) * * @brief Updates attributes for 64x64 clusters based on the attributes of * the constituent 16x16 clusters * * @param[out] ps_blk_64x64: structure containing 64x64 block results * * @param[in] ps_cluster_data : structure containing 32x32 block results * * @return None ******************************************************************************** */ void hme_update_64x64_cluster_attributes( cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data) { cluster_data_t *ps_cur_cluster_64; S32 i; S32 mvd_limit; S32 num_clusters = ps_blk_64x64->num_clusters; if(0 == num_clusters) { ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0]; ps_blk_64x64->num_clusters++; ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++; ps_cur_cluster_64->is_valid_cluster = 1; ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels; ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; memcpy( ps_cur_cluster_64->as_mv, ps_cluster_data->as_mv, sizeof(mv_data_t) * ps_cluster_data->num_mvs); ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs; ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id; ps_cur_cluster_64->max_x = ps_cluster_data->max_x; ps_cur_cluster_64->max_y = ps_cluster_data->max_y; ps_cur_cluster_64->min_x = ps_cluster_data->min_x; ps_cur_cluster_64->min_y = ps_cluster_data->min_y; ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid; } else { centroid_t *ps_centroid; S32 cur_posx_q8, cur_posy_q8; S32 min_mvd_cluster_id = -1; S32 mvd; S32 mvdx; S32 mvdy; S32 mvdx_min; S32 mvdy_min; S32 mvdx_q8; S32 mvdy_q8; S32 num_clusters_evaluated = 0; S32 mvd_min = MAX_32BIT_VAL; S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; for(i = 0; num_clusters_evaluated < num_clusters; i++) { ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i]; if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id) { num_clusters_evaluated++; continue; } if(!ps_cur_cluster_64->is_valid_cluster) { continue; } num_clusters_evaluated++; ps_centroid = &ps_cur_cluster_64->s_centroid; cur_posx_q8 = ps_centroid->i4_pos_x_q8; cur_posy_q8 = ps_centroid->i4_pos_y_q8; mvdx_q8 = cur_posx_q8 - mvx_inp_q8; mvdy_q8 = cur_posy_q8 - mvy_inp_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = ABS(mvdx) + ABS(mvdy); if(mvd < mvd_min) { mvd_min = mvd; mvdx_min = mvdx; mvdy_min = mvdy; min_mvd_cluster_id = i; } } ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data; mvd_limit = (min_mvd_cluster_id == -1) ? ps_cur_cluster_64[0].max_dist_from_centroid : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid; if(mvd_min <= mvd_limit) { LWORD64 i8_updated_posx; LWORD64 i8_updated_posy; WORD32 minmax_updated_x = 0; WORD32 minmax_updated_y = 0; ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id]; ps_centroid = &ps_cur_cluster_64->s_centroid; ps_cur_cluster_64->is_valid_cluster = 1; ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels; ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; memcpy( &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs], ps_cluster_data->as_mv, sizeof(mv_data_t) * ps_cluster_data->num_mvs); if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8)) { ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8; minmax_updated_x = 1; } else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8)) { ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8; minmax_updated_x = 2; } if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8)) { ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8; minmax_updated_y = 1; } else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8)) { ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8; minmax_updated_y = 2; } switch((minmax_updated_y << 2) + minmax_updated_x) { case 1: { S32 mvd, mvd_q8; mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } case 2: { S32 mvd, mvd_q8; mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } case 4: { S32 mvd, mvd_q8; mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } case 5: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } case 6: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } case 8: { S32 mvd, mvd_q8; mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } case 9: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } case 10: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > ps_cur_cluster_64->max_dist_from_centroid) { ps_cur_cluster_64->max_dist_from_centroid = mvd; } break; } default: { break; } } i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) + ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs); i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) + ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs); ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs; ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs); ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs); } else if(num_clusters < MAX_NUM_CLUSTERS_64x64) { ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters]; ps_blk_64x64->num_clusters++; ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++; ps_cur_cluster_64->is_valid_cluster = 1; ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels; ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; memcpy( &ps_cur_cluster_64->as_mv[0], ps_cluster_data->as_mv, sizeof(mv_data_t) * ps_cluster_data->num_mvs); ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs; ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id; ps_cur_cluster_64->max_x = ps_cluster_data->max_x; ps_cur_cluster_64->max_y = ps_cluster_data->max_y; ps_cur_cluster_64->min_x = ps_cluster_data->min_x; ps_cur_cluster_64->min_y = ps_cluster_data->min_y; ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid; } } } /** ******************************************************************************** * @fn void hme_update_32x32_clusters * ( * cluster_32x32_blk_t *ps_blk_32x32, * cluster_16x16_blk_t *ps_blk_16x16 * ) * * @brief Updates attributes for 32x32 clusters based on the attributes of * the constituent 16x16 clusters * * @param[out] ps_blk_32x32: structure containing 32x32 block results * * @param[in] ps_blk_16x16 : structure containing 16x16 block results * * @return None ******************************************************************************** */ static __inline void hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16) { cluster_16x16_blk_t *ps_blk_16x16_cur; cluster_data_t *ps_cur_cluster; S32 i, j; S32 num_clusters_cur_16x16_blk; for(i = 0; i < 4; i++) { S32 num_clusters_evaluated = 0; ps_blk_16x16_cur = &ps_blk_16x16[i]; num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters; ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area; ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost; for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++) { ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j]; if(!ps_cur_cluster->is_valid_cluster) { continue; } hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster); num_clusters_evaluated++; } } } /** ******************************************************************************** * @fn void hme_update_64x64_clusters * ( * cluster_64x64_blk_t *ps_blk_64x64, * cluster_32x32_blk_t *ps_blk_32x32 * ) * * @brief Updates attributes for 64x64 clusters based on the attributes of * the constituent 16x16 clusters * * @param[out] ps_blk_64x64: structure containing 32x32 block results * * @param[in] ps_blk_32x32 : structure containing 16x16 block results * * @return None ******************************************************************************** */ static __inline void hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32) { cluster_32x32_blk_t *ps_blk_32x32_cur; cluster_data_t *ps_cur_cluster; S32 i, j; S32 num_clusters_cur_32x32_blk; for(i = 0; i < 4; i++) { S32 num_clusters_evaluated = 0; ps_blk_32x32_cur = &ps_blk_32x32[i]; num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters; ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area; ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost; for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++) { ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j]; if(!ps_cur_cluster->is_valid_cluster) { continue; } hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster); num_clusters_evaluated++; } } } /** ******************************************************************************** * @fn void hme_try_merge_clusters_blksize_gt_16 * ( * cluster_data_t *ps_cluster_data, * S32 num_clusters * ) * * @brief Merging clusters from blocks of size 32x32 and greater * * @param[in/out] ps_cluster_data: structure containing cluster data * * @param[in/out] pi4_num_clusters : pointer to number of clusters * * @return Success or failure ******************************************************************************** */ S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters) { centroid_t *ps_cur_centroid; cluster_data_t *ps_cur_cluster; S32 i, mvd; S32 mvdx, mvdy, mvdx_q8, mvdy_q8; centroid_t *ps_centroid = &ps_cluster_data->s_centroid; S32 mvd_limit = ps_cluster_data->max_dist_from_centroid; S32 ref_id = ps_cluster_data->ref_id; S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8; S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8; S32 num_clusters_evaluated = 1; S32 ret_value = 0; if(1 >= num_clusters) { return ret_value; } for(i = 1; num_clusters_evaluated < num_clusters; i++) { S32 cur_posx_q8; S32 cur_posy_q8; ps_cur_cluster = &ps_cluster_data[i]; if((ref_id != ps_cur_cluster->ref_id)) { num_clusters_evaluated++; continue; } if((!ps_cur_cluster->is_valid_cluster)) { continue; } num_clusters_evaluated++; ps_cur_centroid = &ps_cur_cluster->s_centroid; cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8; cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8; mvdx_q8 = cur_posx_q8 - node0_posx_q8; mvdy_q8 = cur_posy_q8 - node0_posy_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = ABS(mvdx) + ABS(mvdy); if(mvd <= (mvd_limit >> 1)) { LWORD64 i8_updated_posx; LWORD64 i8_updated_posy; WORD32 minmax_updated_x = 0; WORD32 minmax_updated_y = 0; ps_cur_cluster->is_valid_cluster = 0; ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels; ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area; ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area; memcpy( &ps_cluster_data->as_mv[ps_cluster_data->num_mvs], ps_cur_cluster->as_mv, sizeof(mv_data_t) * ps_cur_cluster->num_mvs); if(mvdx > 0) { ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8; minmax_updated_x = 1; } else { ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8; minmax_updated_x = 2; } if(mvdy > 0) { ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8; minmax_updated_y = 1; } else { ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8; minmax_updated_y = 2; } switch((minmax_updated_y << 2) + minmax_updated_x) { case 1: { S32 mvd, mvd_q8; mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 2: { S32 mvd, mvd_q8; mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 4: { S32 mvd, mvd_q8; mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 5: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 6: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 8: { S32 mvd, mvd_q8; mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; mvd = (mvd_q8 + (1 << 7)) >> 8; if(mvd > (mvd_limit)) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 9: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > mvd_limit) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } case 10: { S32 mvd; S32 mvdx, mvdx_q8; S32 mvdy, mvdy_q8; mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; mvdx = (mvdx_q8 + (1 << 7)) >> 8; mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; mvdy = (mvdy_q8 + (1 << 7)) >> 8; mvd = (mvdx > mvdy) ? mvdx : mvdy; if(mvd > ps_cluster_data->max_dist_from_centroid) { ps_cluster_data->max_dist_from_centroid = mvd; } break; } default: { break; } } i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) + ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs); i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) + ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs); ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs; ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs); ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs); if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters) { num_clusters--; num_clusters_evaluated = 1; i = 0; ret_value++; } else { ret_value++; return ret_value; } } } if(ret_value) { for(i = 1; i < (num_clusters + ret_value); i++) { if(ps_cluster_data[i].is_valid_cluster) { break; } } if(i == (num_clusters + ret_value)) { return ret_value; } } else { i = 1; } return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) + ret_value; } /** ******************************************************************************** * @fn S32 hme_determine_validity_32x32 * ( * ctb_cluster_info_t *ps_ctb_cluster_info * ) * * @brief Determines whther current 32x32 block needs to be evaluated in enc_loop * while recursing through the CU tree or not * * @param[in] ps_cluster_data: structure containing cluster data * * @return Success or failure ******************************************************************************** */ __inline S32 hme_determine_validity_32x32( ctb_cluster_info_t *ps_ctb_cluster_info, S32 *pi4_children_nodes_required, S32 blk_validity_wrt_pic_bndry, S32 parent_blk_validity_wrt_pic_bndry) { cluster_data_t *ps_data; cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk; cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk; S32 num_clusters = ps_32x32_blk->num_clusters; S32 num_clusters_parent = ps_64x64_blk->num_clusters; if(!blk_validity_wrt_pic_bndry) { *pi4_children_nodes_required = 1; return 0; } if(!parent_blk_validity_wrt_pic_bndry) { *pi4_children_nodes_required = 1; return 1; } if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) { *pi4_children_nodes_required = 1; return 0; } if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK) { *pi4_children_nodes_required = 1; return 1; } else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK) { *pi4_children_nodes_required = 0; return 1; } else { if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) { *pi4_children_nodes_required = 0; return 1; } else { S32 i; S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4; S32 min_area = MAX_32BIT_VAL; S32 num_clusters_evaluated = 0; for(i = 0; num_clusters_evaluated < num_clusters; i++) { ps_data = &ps_32x32_blk->as_cluster_data[i]; if(!ps_data->is_valid_cluster) { continue; } num_clusters_evaluated++; if(ps_data->area_in_pixels < min_area) { min_area = ps_data->area_in_pixels; } } if((min_area << 4) < area_of_parent) { *pi4_children_nodes_required = 1; return 0; } else { *pi4_children_nodes_required = 0; return 1; } } } } /** ******************************************************************************** * @fn S32 hme_determine_validity_16x16 * ( * ctb_cluster_info_t *ps_ctb_cluster_info * ) * * @brief Determines whther current 16x16 block needs to be evaluated in enc_loop * while recursing through the CU tree or not * * @param[in] ps_cluster_data: structure containing cluster data * * @return Success or failure ******************************************************************************** */ __inline S32 hme_determine_validity_16x16( ctb_cluster_info_t *ps_ctb_cluster_info, S32 *pi4_children_nodes_required, S32 blk_validity_wrt_pic_bndry, S32 parent_blk_validity_wrt_pic_bndry) { cluster_data_t *ps_data; cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk; cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk; cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk; S32 num_clusters = ps_16x16_blk->num_clusters; S32 num_clusters_parent = ps_32x32_blk->num_clusters; S32 num_clusters_grandparent = ps_64x64_blk->num_clusters; if(!blk_validity_wrt_pic_bndry) { *pi4_children_nodes_required = 1; return 0; } if(!parent_blk_validity_wrt_pic_bndry) { *pi4_children_nodes_required = 1; return 1; } if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) && (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)) { *pi4_children_nodes_required = 1; return 1; } /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */ /* implies nc_64 > 3 when num_clusters_parent < 3 & */ if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) { if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) { *pi4_children_nodes_required = 0; return 1; } else { *pi4_children_nodes_required = 1; return 0; } } /* Implies nc_64 >= 3 */ else { if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) { *pi4_children_nodes_required = 0; return 1; } else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) { *pi4_children_nodes_required = 1; return 0; } else { S32 i; S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2; S32 min_area = MAX_32BIT_VAL; S32 num_clusters_evaluated = 0; for(i = 0; num_clusters_evaluated < num_clusters; i++) { ps_data = &ps_16x16_blk->as_cluster_data[i]; if(!ps_data->is_valid_cluster) { continue; } num_clusters_evaluated++; if(ps_data->area_in_pixels < min_area) { min_area = ps_data->area_in_pixels; } } if((min_area << 4) < area_of_parent) { *pi4_children_nodes_required = 1; return 0; } else { *pi4_children_nodes_required = 0; return 1; } } } } /** ******************************************************************************** * @fn void hme_build_cu_tree * ( * ctb_cluster_info_t *ps_ctb_cluster_info, * cur_ctb_cu_tree_t *ps_cu_tree, * S32 tree_depth, * CU_POS_T e_grand_parent_blk_pos, * CU_POS_T e_parent_blk_pos, * CU_POS_T e_cur_blk_pos * ) * * @brief Recursive function for CU tree initialisation * * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters * corresponding to all block sizes from 64x64 * to 16x16 * * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if * applicable * * @param[in] e_cur_blk_pos: position of current block wrt parent * * @param[out] ps_cu_tree : represents CU tree used in CU recursion * * @param[in] tree_depth : specifies depth of the CU tree * * @return Nothing ******************************************************************************** */ void hme_build_cu_tree( ctb_cluster_info_t *ps_ctb_cluster_info, cur_ctb_cu_tree_t *ps_cu_tree, S32 tree_depth, CU_POS_T e_grandparent_blk_pos, CU_POS_T e_parent_blk_pos, CU_POS_T e_cur_blk_pos) { ihevce_cu_tree_init( ps_cu_tree, ps_ctb_cluster_info->ps_cu_tree_root, &ps_ctb_cluster_info->nodes_created_in_cu_tree, tree_depth, e_grandparent_blk_pos, e_parent_blk_pos, e_cur_blk_pos); } /** ******************************************************************************** * @fn S32 hme_sdi_based_cluster_spread_eligibility * ( * cluster_32x32_blk_t *ps_blk_32x32 * ) * * @brief Determines whether the spread of high SDI MV's around each cluster * center is below a pre-determined threshold * * @param[in] ps_blk_32x32: structure containing pointers to clusters * corresponding to all block sizes from 64x64 * to 16x16 * * @return 1 if the spread is constrained, else 0 ******************************************************************************** */ __inline S32 hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold) { S32 cumulative_mv_distance; S32 i, j; S32 num_high_sdi_mvs; S32 num_clusters = ps_blk_32x32->num_clusters; for(i = 0; i < num_clusters; i++) { cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i]; num_high_sdi_mvs = 0; cumulative_mv_distance = 0; for(j = 0; j < ps_data->num_mvs; j++) { mv_data_t *ps_mv = &ps_data->as_mv[j]; if(ps_mv->sdi >= sdi_threshold) { num_high_sdi_mvs++; COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance); } } if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs)) { return 0; } } return 1; } /** ******************************************************************************** * @fn S32 hme_populate_cu_tree * ( * ctb_cluster_info_t *ps_ctb_cluster_info, * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, * cur_ctb_cu_tree_t *ps_cu_tree, * S32 tree_depth, * CU_POS_T e_parent_blk_pos, * CU_POS_T e_cur_blk_pos * ) * * @brief Recursive function for CU tree population based on output of * clustering algorithm * * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters * corresponding to all block sizes from 64x64 * to 16x16 * * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if applicable * * @param[in] e_cur_blk_pos: position of current block wrt parent * * @param[in] ps_cur_ipe_ctb : output container for ipe analyses * * @param[out] ps_cu_tree : represents CU tree used in CU recursion * * @param[in] tree_depth : specifies depth of the CU tree * * @param[in] ipe_decision_precedence : specifies whether precedence should * be given to decisions made either by IPE(1) or clustering algos. * * @return 1 if re-evaluation of parent node's validity is not required, else 0 ******************************************************************************** */ void hme_populate_cu_tree( ctb_cluster_info_t *ps_ctb_cluster_info, cur_ctb_cu_tree_t *ps_cu_tree, S32 tree_depth, ME_QUALITY_PRESETS_T e_quality_preset, CU_POS_T e_grandparent_blk_pos, CU_POS_T e_parent_blk_pos, CU_POS_T e_cur_blk_pos) { S32 area_of_cur_blk; S32 area_limit_for_me_decision_precedence; S32 children_nodes_required; S32 intra_mv_area; S32 intra_eval_enable; S32 inter_eval_enable; S32 ipe_decision_precedence; S32 node_validity; S32 num_clusters; ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb; if(NULL == ps_cu_tree) { return; } switch(tree_depth) { case 0: { /* 64x64 block */ S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask; cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk; area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4; area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; children_nodes_required = 0; intra_mv_area = ps_blk_64x64->intra_mv_area; ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); intra_eval_enable = ipe_decision_precedence; inter_eval_enable = !!ps_blk_64x64->num_clusters; #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS if(e_quality_preset >= ME_HIGH_QUALITY) { inter_eval_enable = 1; node_validity = (blk_32x32_mask == 0xf); #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk); #endif break; } #endif #if ENABLE_4CTB_EVALUATION node_validity = (blk_32x32_mask == 0xf); break; #else { S32 i; num_clusters = ps_blk_64x64->num_clusters; node_validity = (ipe_decision_precedence) ? (!ps_cur_ipe_ctb->u1_split_flag) : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK); for(i = 0; i < MAX_NUM_REF; i++) { node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <= MAX_NUM_CLUSTERS_IN_ONE_REF_IDX); } node_validity = node_validity && (blk_32x32_mask == 0xf); } break; #endif } case 1: { /* 32x32 block */ S32 is_percent_intra_area_gt_threshold; cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos]; S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask; #if !ENABLE_4CTB_EVALUATION S32 best_inter_cost = ps_blk_32x32->best_inter_cost; S32 best_intra_cost = ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] + ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4) < 0) ? MAX_32BIT_VAL : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] + ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4); S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost; S32 cost_differential = (best_inter_cost - best_cost); #endif area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2; area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; intra_mv_area = ps_blk_32x32->intra_mv_area; is_percent_intra_area_gt_threshold = (intra_mv_area > area_limit_for_me_decision_precedence); ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); intra_eval_enable = ipe_decision_precedence; inter_eval_enable = !!ps_blk_32x32->num_clusters; children_nodes_required = 1; #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS if(e_quality_preset >= ME_HIGH_QUALITY) { inter_eval_enable = 1; node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk); #endif break; } #endif #if ENABLE_4CTB_EVALUATION node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); break; #else { S32 i; num_clusters = ps_blk_32x32->num_clusters; if(ipe_decision_precedence) { node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag); node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); } else { node_validity = ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) && (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++) { node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <= MAX_NUM_CLUSTERS_IN_ONE_REF_IDX); } if(node_validity) { node_validity = node_validity && hme_sdi_based_cluster_spread_eligibility( ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold); } } } break; #endif } case 2: { cluster_16x16_blk_t *ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)]; S32 blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N]; area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; children_nodes_required = 1; intra_mv_area = ps_blk_16x16->intra_mv_area; ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); num_clusters = ps_blk_16x16->num_clusters; intra_eval_enable = ipe_decision_precedence; inter_eval_enable = 1; #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS if(e_quality_preset >= ME_HIGH_QUALITY) { node_validity = !ps_ctb_cluster_info ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; children_nodes_required = !node_validity; break; } #endif #if ENABLE_4CTB_EVALUATION node_validity = (blk_8x8_mask == 0xf); #if ENABLE_CU_TREE_CULLING { cur_ctb_cu_tree_t *ps_32x32_root; switch(e_parent_blk_pos) { case POS_TL: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; break; } case POS_TR: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; break; } case POS_BL: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; break; } case POS_BR: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; break; } } if(ps_32x32_root->is_node_valid) { node_validity = node_validity && !ps_ctb_cluster_info ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; children_nodes_required = !node_validity; } } #endif break; #else if(ipe_decision_precedence) { S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos] .as_intra16_analyse[e_cur_blk_pos] .b1_merge_flag); S32 valid_flag = (blk_8x8_mask == 0xf); node_validity = merge_flag_16 && valid_flag; } else { node_validity = (blk_8x8_mask == 0xf); } break; #endif } case 3: { S32 blk_8x8_mask = ps_ctb_cluster_info ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos]; S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos] .as_intra16_analyse[e_parent_blk_pos] .b1_merge_flag); S32 merge_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag); intra_eval_enable = !merge_flag_16 || !merge_flag_32; inter_eval_enable = 1; children_nodes_required = 0; #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS if(e_quality_preset >= ME_HIGH_QUALITY) { node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0); break; } #endif #if ENABLE_4CTB_EVALUATION node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0); break; #else { cur_ctb_cu_tree_t *ps_32x32_root; cur_ctb_cu_tree_t *ps_16x16_root; cluster_32x32_blk_t *ps_32x32_blk; switch(e_grandparent_blk_pos) { case POS_TL: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; break; } case POS_TR: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; break; } case POS_BL: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; break; } case POS_BR: { ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; break; } } switch(e_parent_blk_pos) { case POS_TL: { ps_16x16_root = ps_32x32_root->ps_child_node_tl; break; } case POS_TR: { ps_16x16_root = ps_32x32_root->ps_child_node_tr; break; } case POS_BL: { ps_16x16_root = ps_32x32_root->ps_child_node_bl; break; } case POS_BR: { ps_16x16_root = ps_32x32_root->ps_child_node_br; break; } } ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos]; node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) && ((!ps_32x32_root->is_node_valid) || (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) || (!ps_16x16_root->is_node_valid)); break; } #endif } } /* Fill the current cu_tree node */ ps_cu_tree->is_node_valid = node_validity; ps_cu_tree->u1_intra_eval_enable = intra_eval_enable; ps_cu_tree->u1_inter_eval_enable = inter_eval_enable; if(children_nodes_required) { tree_depth++; hme_populate_cu_tree( ps_ctb_cluster_info, ps_cu_tree->ps_child_node_tl, tree_depth, e_quality_preset, e_parent_blk_pos, e_cur_blk_pos, POS_TL); hme_populate_cu_tree( ps_ctb_cluster_info, ps_cu_tree->ps_child_node_tr, tree_depth, e_quality_preset, e_parent_blk_pos, e_cur_blk_pos, POS_TR); hme_populate_cu_tree( ps_ctb_cluster_info, ps_cu_tree->ps_child_node_bl, tree_depth, e_quality_preset, e_parent_blk_pos, e_cur_blk_pos, POS_BL); hme_populate_cu_tree( ps_ctb_cluster_info, ps_cu_tree->ps_child_node_br, tree_depth, e_quality_preset, e_parent_blk_pos, e_cur_blk_pos, POS_BR); } } /** ******************************************************************************** * @fn void hme_analyse_mv_clustering * ( * search_results_t *ps_search_results, * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, * cur_ctb_cu_tree_t *ps_cu_tree * ) * * @brief Implementation for the clustering algorithm * * @param[in] ps_search_results: structure containing 16x16 block results * * @param[in] ps_cur_ipe_ctb : output container for ipe analyses * * @param[out] ps_cu_tree : represents CU tree used in CU recursion * * @return None ******************************************************************************** */ void hme_analyse_mv_clustering( search_results_t *ps_search_results, inter_cu_results_t *ps_16x16_cu_results, inter_cu_results_t *ps_8x8_cu_results, ctb_cluster_info_t *ps_ctb_cluster_info, S08 *pi1_future_list, S08 *pi1_past_list, S32 bidir_enabled, ME_QUALITY_PRESETS_T e_quality_preset) { cluster_16x16_blk_t *ps_blk_16x16; cluster_32x32_blk_t *ps_blk_32x32; cluster_64x64_blk_t *ps_blk_64x64; part_type_results_t *ps_best_result; pu_result_t *aps_part_result[MAX_NUM_PARTS]; pu_result_t *aps_inferior_parts[MAX_NUM_PARTS]; PART_ID_T e_part_id; PART_TYPE_T e_part_type; S32 enable_64x64_merge; S32 i, j, k; S32 mvx, mvy; S32 num_parts; S32 ref_idx; S32 ai4_pred_mode[MAX_NUM_PARTS]; S32 num_32x32_merges = 0; /*****************************************/ /*****************************************/ /********* Enter ye who is HQ ************/ /*****************************************/ /*****************************************/ ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk; /* Initialise data in each of the clusters */ for(i = 0; i < 16; i++) { ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i]; #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS if(e_quality_preset < ME_HIGH_QUALITY) { hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled); } else { ps_blk_16x16->best_inter_cost = 0; ps_blk_16x16->intra_mv_area = 0; } #else hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled); #endif } for(i = 0; i < 4; i++) { ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS if(e_quality_preset < ME_HIGH_QUALITY) { hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled); } else { ps_blk_32x32->best_inter_cost = 0; ps_blk_32x32->intra_mv_area = 0; } #else hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled); #endif } #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS if(e_quality_preset < ME_HIGH_QUALITY) { hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled); } else { ps_blk_64x64->best_inter_cost = 0; ps_blk_64x64->intra_mv_area = 0; } #else hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled); #endif /* Initialise data for all nodes in the CU tree */ hme_build_cu_tree( ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA); if(e_quality_preset >= ME_HIGH_QUALITY) { memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08)); } #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8 return; #endif for(i = 0; i < 16; i++) { S32 blk_8x8_mask; S32 is_16x16_blk_valid; S32 num_clusters_updated; S32 num_clusters; blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i]; ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i]; is_16x16_blk_valid = (blk_8x8_mask == 0xf); if(is_16x16_blk_valid) { /* Use 8x8 data when 16x16 CU is split */ if(ps_search_results[i].u1_split_flag) { S32 blk_8x8_idx = i << 2; num_parts = 4; e_part_type = PRT_NxN; for(j = 0; j < num_parts; j++, blk_8x8_idx++) { /* Only 2Nx2N partition supported for 8x8 block */ ASSERT( ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type == ((PART_TYPE_T)PRT_2Nx2N)); aps_part_result[j] = &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0]; aps_inferior_parts[j] = &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0]; ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode); } } else { ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0]; e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type; num_parts = gau1_num_parts_in_part_type[e_part_type]; for(j = 0; j < num_parts; j++) { aps_part_result[j] = &ps_best_result->as_pu_results[j]; aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j]; ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode); } ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0; } for(j = 0; j < num_parts; j++) { pu_result_t *ps_part_result = aps_part_result[j]; S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1); e_part_id = ge_part_type_to_part_id[e_part_type][j]; /* Skip clustering if best mode is intra */ if((ps_part_result->pu.b1_intra_flag)) { ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id]; ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost; continue; } else { ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost; } #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS if(e_quality_preset >= ME_HIGH_QUALITY) { continue; } #endif for(k = 0; k < num_mvs; k++) { mv_t *ps_mv; pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv; S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0); ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv); mvx = ps_mv->i2_mvx; mvy = ps_mv->i2_mvy; ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx] : pi1_future_list[ps_pu_mv->i1_l1_ref_idx]; num_clusters = ps_blk_16x16->num_clusters; hme_find_and_update_clusters( ps_blk_16x16->as_cluster_data, &(ps_blk_16x16->num_clusters), mvx, mvy, ref_idx, ps_part_result->i4_sdi, e_part_id, (ai4_pred_mode[j] == 2)); num_clusters_updated = (ps_blk_16x16->num_clusters); ps_blk_16x16->au1_num_clusters[ref_idx] += (num_clusters_updated - num_clusters); } } } } /* Search for 32x32 clusters */ for(i = 0; i < 4; i++) { S32 num_clusters_merged; S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0; if(is_32x32_blk_valid) { ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2]; #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS if(e_quality_preset >= ME_HIGH_QUALITY) { for(j = 0; j < 4; j++, ps_blk_16x16++) { ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area; ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost; } continue; } #endif hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16); if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)) { num_clusters_merged = hme_try_merge_clusters_blksize_gt_16( ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters)); if(num_clusters_merged) { ps_blk_32x32->num_clusters -= num_clusters_merged; UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32); } } } } #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS /* Eliminate outlier 32x32 clusters */ if(e_quality_preset < ME_HIGH_QUALITY) #endif { hme_boot_out_outlier(ps_ctb_cluster_info, 32); /* Find best_uni_ref and best_alt_ref */ hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32); } /* Populate the CU tree for depths 1 and higher */ { cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root; cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl; cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr; cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl; cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br; hme_populate_cu_tree( ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL); num_32x32_merges += (ps_tl->is_node_valid == 1); hme_populate_cu_tree( ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR); num_32x32_merges += (ps_tr->is_node_valid == 1); hme_populate_cu_tree( ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL); num_32x32_merges += (ps_bl->is_node_valid == 1); hme_populate_cu_tree( ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR); num_32x32_merges += (ps_br->is_node_valid == 1); } #if !ENABLE_4CTB_EVALUATION if(e_quality_preset < ME_HIGH_QUALITY) { enable_64x64_merge = (num_32x32_merges >= 3); } #else if(e_quality_preset < ME_HIGH_QUALITY) { enable_64x64_merge = 1; } #endif #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS if(e_quality_preset >= ME_HIGH_QUALITY) { enable_64x64_merge = 1; } #else if(e_quality_preset >= ME_HIGH_QUALITY) { enable_64x64_merge = (num_32x32_merges >= 3); } #endif if(enable_64x64_merge) { S32 num_clusters_merged; ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0]; #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS if(e_quality_preset >= ME_HIGH_QUALITY) { for(j = 0; j < 4; j++, ps_blk_32x32++) { ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area; ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost; } } else #endif { hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32); if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)) { num_clusters_merged = hme_try_merge_clusters_blksize_gt_16( ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters)); if(num_clusters_merged) { ps_blk_64x64->num_clusters -= num_clusters_merged; UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64); } } } #if !ENABLE_4CTB_EVALUATION if(e_quality_preset < ME_HIGH_QUALITY) { S32 best_inter_cost = ps_blk_64x64->best_inter_cost; S32 best_intra_cost = ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost + ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0) ? MAX_32BIT_VAL : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost + ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16); S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost; S32 cost_differential = (best_inter_cost - best_cost); enable_64x64_merge = ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)); } #endif } if(enable_64x64_merge) { #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS if(e_quality_preset < ME_HIGH_QUALITY) #endif { hme_boot_out_outlier(ps_ctb_cluster_info, 64); hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64); } hme_populate_cu_tree( ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, e_quality_preset, POS_NA, POS_NA, POS_NA); } } #endif static __inline void hme_merge_prms_init( hme_merge_prms_t *ps_prms, layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, me_frm_ctxt_t *ps_me_ctxt, range_prms_t *ps_range_prms_rec, range_prms_t *ps_range_prms_inp, mv_grid_t **pps_mv_grid, inter_ctb_prms_t *ps_inter_ctb_prms, S32 i4_num_pred_dir, S32 i4_32x32_id, BLK_SIZE_T e_blk_size, ME_QUALITY_PRESETS_T e_me_quality_presets) { S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel; S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0; /* Currently not enabling segmentation info from prev layers */ ps_prms->i4_seg_info_avail = 0; ps_prms->i4_part_mask = 0; /* Number of reference pics in which to do merge */ ps_prms->i4_num_ref = i4_num_pred_dir; /* Layer ctxt info */ ps_prms->ps_layer_ctxt = ps_curr_layer; ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms; /* Top left, top right, bottom left and bottom right 16x16 units */ if(BLK_32x32 == e_blk_size) { ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16]; ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1]; ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2]; ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3]; /* Merge results stored here */ ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id]; /* This could be lesser than the number of 16x16results generated*/ /* For now, keeping it to be same */ ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results; ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4]; ps_prms->ps_results_grandchild = NULL; } else { ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0]; ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1]; ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2]; ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3]; /* Merge results stored here */ ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64; ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results; ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0]; ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16; } if(i4_use_rec) { WORD32 ref_ctr; for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) { ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr]; } } else { WORD32 ref_ctr; for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) { ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr]; } } ps_prms->i4_use_rec = i4_use_rec; ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; ps_prms->pps_mv_grid = pps_mv_grid; ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size; ps_prms->e_quality_preset = e_me_quality_presets; ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list; ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list; ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info; } /** ******************************************************************************** * @fn void hme_refine(me_ctxt_t *ps_ctxt, * refine_layer_prms_t *ps_refine_prms) * * @brief Top level entry point for refinement ME * * @param[in,out] ps_ctxt: ME Handle * * @param[in] ps_refine_prms : refinement layer prms * * @return None ******************************************************************************** */ void hme_refine( me_ctxt_t *ps_thrd_ctxt, refine_prms_t *ps_refine_prms, PF_EXT_UPDATE_FXN_T pf_ext_update_fxn, layer_ctxt_t *ps_coarse_layer, multi_thrd_ctxt_t *ps_multi_thrd_ctxt, S32 lyr_job_type, S32 thrd_id, S32 me_frm_id, pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input) { inter_ctb_prms_t s_common_frm_prms; BLK_SIZE_T e_search_blk_size, e_result_blk_size; WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL; me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id]; ME_QUALITY_PRESETS_T e_me_quality_presets = ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; WORD32 num_rows_proc = 0; WORD32 num_act_ref_pics; WORD16 i2_prev_enc_frm_max_mv_y; WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p; /*************************************************************************/ /* Complexity of search: Low to High */ /*************************************************************************/ SEARCH_COMPLEXITY_T e_search_complexity; /*************************************************************************/ /* to store the PU results which are passed to the decide_part_types */ /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/ /*************************************************************************/ pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST]; inter_pu_results_t as_inter_pu_results[4]; inter_pu_results_t *ps_pu_results = as_inter_pu_results; /*************************************************************************/ /* Config parameter structures for varius ME submodules */ /*************************************************************************/ hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr; hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br; hme_merge_prms_t s_merge_prms_64x64; hme_search_prms_t s_search_prms_blk; mvbank_update_prms_t s_mv_update_prms; hme_ctb_prms_t s_ctb_prms; hme_subpel_prms_t s_subpel_prms; fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt; ctb_cluster_info_t *ps_ctb_cluster_info; fpel_srch_cand_init_data_t s_srch_cand_init_data; /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */ S32 en_merge_32x32; /* 5 lsb's specify whether or not merge algorithm is required */ /* to be executed or not. Relevant only in PQ. Ought to be */ /* used in conjunction with en_merge_32x32 and */ /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */ /* required when all children are deemed to be intras */ S32 en_merge_execution; /*************************************************************************/ /* All types of search candidates for predictor based search. */ /*************************************************************************/ S32 num_init_candts = 0; S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS]; search_node_t as_top_neighbours[4], as_left_neighbours[3]; pf_get_wt_inp fp_get_wt_inp; search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9]; U32 au4_unique_node_map[MAP_X_MAX * 2]; /* Controls the boundary attributes of CTB, whether it has 64x64 or not */ ctb_boundary_attrs_t *ps_ctb_bound_attrs; /*************************************************************************/ /* points ot the search results for the blk level search (8x8/16x16) */ /*************************************************************************/ search_results_t *ps_search_results; /*************************************************************************/ /* Coordinates */ /*************************************************************************/ S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb; S32 pos_x, pos_y; S32 blk_id_in_full_ctb; /*************************************************************************/ /* Related to dimensions of block being searched and pic dimensions */ /*************************************************************************/ S32 blk_4x4_to_16x16; S32 blk_wd, blk_ht, blk_size_shift; S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb; S32 num_results_prev_layer; /*************************************************************************/ /* Size of a basic unit for this layer. For non encode layers, we search */ /* in block sizes of 8x8. For encode layers, though we search 16x16s the */ /* basic unit size is the ctb size. */ /*************************************************************************/ S32 unit_size; /*************************************************************************/ /* Local variable storing results of any 4 CU merge to bigger CU */ /*************************************************************************/ CU_MERGE_RESULT_T e_merge_result; /*************************************************************************/ /* This mv grid stores results during and after fpel search, during */ /* merge, subpel and bidirect refinements stages. 2 instances of this are*/ /* meant for the 2 directions of search (l0 and l1). */ /*************************************************************************/ mv_grid_t *aps_mv_grid[2]; /*************************************************************************/ /* Pointers to context in current and coarser layers */ /*************************************************************************/ layer_ctxt_t *ps_curr_layer, *ps_prev_layer; /*************************************************************************/ /* to store mv range per blk, and picture limit, allowed search range */ /* range prms in hpel and qpel units as well */ /*************************************************************************/ range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF]; range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF]; range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF]; /*************************************************************************/ /* These variables are used to track number of references at different */ /* stages of ME. */ /*************************************************************************/ S32 i4_num_pred_dir; S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer; S32 lambda_recon = ps_refine_prms->lambda_recon; /* Counts successful merge to 32x32 every CTB (0-4) */ S32 merge_count_32x32; S32 ai4_id_coloc[14], ai4_id_Z[2]; U08 au1_search_candidate_list_index[2]; S32 ai4_num_coloc_cands[2]; U08 u1_pred_dir, u1_pred_dir_ctr; /*************************************************************************/ /* Input pointer and stride */ /*************************************************************************/ U08 *pu1_inp; S32 i4_inp_stride; S32 end_of_frame; S32 num_sync_units_in_row, num_sync_units_in_tile; /*************************************************************************/ /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/ /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/ /* we need to stop merges and force 8x8 CUs for that 16x16 blk */ /*************************************************************************/ S32 blk_8x8_mask; S32 ai4_blk_8x8_mask[16]; U08 au1_is_64x64Blk_noisy[1]; U08 au1_is_32x32Blk_noisy[4]; U08 au1_is_16x16Blk_noisy[16]; ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list = ps_thrd_ctxt->ps_cmn_utils_optimised_function_list; ihevce_me_optimised_function_list_t *ps_me_optimised_function_list = ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list); ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1); /*************************************************************************/ /* Pointers to current and coarse layer are needed for projection */ /* Pointer to prev layer are needed for other candts like coloc */ /*************************************************************************/ ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id]; ps_prev_layer = hme_get_past_layer_ctxt( ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel); num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref; /* Function pointer is selected based on the C vc X86 macro */ fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb; i4_inp_stride = ps_curr_layer->i4_inp_stride; i4_pic_wd = ps_curr_layer->i4_wd; i4_pic_ht = ps_curr_layer->i4_ht; e_search_complexity = ps_refine_prms->e_search_complexity; end_of_frame = 0; /* This points to all the initial candts */ ps_search_candts = &as_search_candts[0]; /* mv grid being huge strucutre is part of context */ aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0]; aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1]; /*************************************************************************/ /* If the current layer is encoded (since it may be multicast or final */ /* layer (finest)), then we use 16x16 blk size with some selected parts */ /* If the current layer is not encoded, then we use 8x8 blk size, with */ /* enable or disable of 4x4 partitions depending on the input prms */ /*************************************************************************/ e_search_blk_size = BLK_16x16; blk_wd = blk_ht = 16; blk_size_shift = 4; e_result_blk_size = BLK_8x8; s_mv_update_prms.i4_shift = 1; if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4) { blk_4x4_to_16x16 = 1; } else { blk_4x4_to_16x16 = 0; } unit_size = 1 << ps_ctxt->log_ctb_size; s_search_prms_blk.i4_inp_stride = unit_size; /* This is required to properly update the layer mv bank */ s_mv_update_prms.e_search_blk_size = e_search_blk_size; s_search_prms_blk.e_blk_size = e_search_blk_size; /*************************************************************************/ /* If current layer is explicit, then the number of ref frames are to */ /* be same as previous layer. Else it will be 2 */ /*************************************************************************/ i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; i4_num_pred_dir = (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) + 1; #if USE_MODIFIED == 1 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; #else s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; #endif i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer); if(i4_num_ref_prev_layer <= 2) { i4_num_ref_each_dir = 1; } else { i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1; } s_mv_update_prms.i4_num_ref = i4_num_pred_dir; s_mv_update_prms.i4_num_results_to_store = MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref : (i4_num_act_ref_l0 > 1) + 1, ps_refine_prms->i4_num_results_per_part); /*************************************************************************/ /* Initialization of merge params for 16x16 to 32x32 merge. */ /* There are 4 32x32 units in a CTB, so 4 param structures initialized */ /*************************************************************************/ { hme_merge_prms_t *aps_merge_prms[4]; aps_merge_prms[0] = &s_merge_prms_32x32_tl; aps_merge_prms[1] = &s_merge_prms_32x32_tr; aps_merge_prms[2] = &s_merge_prms_32x32_bl; aps_merge_prms[3] = &s_merge_prms_32x32_br; for(i = 0; i < 4; i++) { hme_merge_prms_init( aps_merge_prms[i], ps_curr_layer, ps_refine_prms, ps_ctxt, as_range_prms_rec, as_range_prms_inp, &aps_mv_grid[0], &s_common_frm_prms, i4_num_pred_dir, i, BLK_32x32, e_me_quality_presets); } } /*************************************************************************/ /* Initialization of merge params for 32x32 to 64x64 merge. */ /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */ /*************************************************************************/ { hme_merge_prms_init( &s_merge_prms_64x64, ps_curr_layer, ps_refine_prms, ps_ctxt, as_range_prms_rec, as_range_prms_inp, &aps_mv_grid[0], &s_common_frm_prms, i4_num_pred_dir, 0, BLK_64x64, e_me_quality_presets); } /* Pointers to cu_results are initialised here */ { WORD32 i; ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results; for(i = 0; i < 4; i++) { ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i]; } for(i = 0; i < 16; i++) { ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i]; } } /*************************************************************************/ /* SUBPEL Params initialized here */ /*************************************************************************/ { s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0]; s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0]; s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64; s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results; s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results; s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results; s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine; s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine; s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel; s_subpel_prms.i4_inp_stride = unit_size; s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N; s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN; s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold; s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic; { WORD32 ref_ctr; for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) { s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr]; s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr]; } } s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck; #if USE_MODIFIED == 0 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; #else s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; #endif s_subpel_prms.e_me_quality_presets = e_me_quality_presets; /* BI Refinement done only if this field is 1 */ s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled; s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past; s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; s_subpel_prms.u1_max_num_subpel_refine_centers = ps_refine_prms->u1_max_num_subpel_refine_centers; } /* inter_ctb_prms_t struct initialisation */ { inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms; hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms; ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0; ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1; ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc; ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth; ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets; ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled; ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride; ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref; ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd; ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride; ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; ps_inter_ctb_prms->i4_lamda = lambda_recon; ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift; ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8; ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt; ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list; ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list; ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance; ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands = ps_refine_prms->u1_max_2nx2n_tu_recur_cands; } for(i = 0; i < MAX_INIT_CANDTS; i++) { ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i]; INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0); } num_act_ref_pics = ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1; if(num_act_ref_pics) { hme_search_cand_data_init( ai4_id_Z, ai4_id_coloc, ai4_num_coloc_cands, au1_search_candidate_list_index, i4_num_act_ref_l0, i4_num_act_ref_l1, ps_ctxt->s_frm_prms.bidir_enabled, blk_4x4_to_16x16); } if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1)) { ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0]; ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1]; } else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1)) { ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0]; } for(i = 0; i < 3; i++) { search_node_t *ps_search_node; ps_search_node = &as_left_neighbours[i]; INIT_SEARCH_NODE(ps_search_node, 0); ps_search_node = &as_top_neighbours[i]; INIT_SEARCH_NODE(ps_search_node, 0); } INIT_SEARCH_NODE(&as_top_neighbours[3], 0); as_left_neighbours[2].u1_is_avail = 0; /*************************************************************************/ /* Initialize all the search results structure here. We update all the */ /* search results to default values, and configure things like blk sizes */ /*************************************************************************/ if(num_act_ref_pics) { S32 i4_x, i4_y; /* 16x16 results */ for(i = 0; i < 16; i++) { search_results_t *ps_search_results; S32 pred_lx; ps_search_results = &ps_ctxt->as_search_results_16x16[i]; i4_x = (S32)gau1_encode_to_raster_x[i]; i4_y = (S32)gau1_encode_to_raster_y[i]; i4_x <<= 4; i4_y <<= 4; hme_init_search_results( ps_search_results, i4_num_pred_dir, ps_refine_prms->i4_num_fpel_results, ps_refine_prms->i4_num_results_per_part, e_search_blk_size, i4_x, i4_y, &ps_ctxt->au1_is_past[0]); for(pred_lx = 0; pred_lx < 2; pred_lx++) { pred_ctxt_t *ps_pred_ctxt; ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; hme_init_pred_ctxt_encode( ps_pred_ctxt, ps_search_results, ps_search_candts[ai4_id_coloc[0]].ps_search_node, ps_search_candts[ai4_id_Z[0]].ps_search_node, aps_mv_grid[pred_lx], pred_lx, lambda_recon, ps_refine_prms->lambda_q_shift, &ps_ctxt->apu1_ref_bits_tlu_lc[0], &ps_ctxt->ai2_ref_scf[0]); } } for(i = 0; i < 4; i++) { search_results_t *ps_search_results; S32 pred_lx; ps_search_results = &ps_ctxt->as_search_results_32x32[i]; i4_x = (S32)gau1_encode_to_raster_x[i]; i4_y = (S32)gau1_encode_to_raster_y[i]; i4_x <<= 5; i4_y <<= 5; hme_init_search_results( ps_search_results, i4_num_pred_dir, ps_refine_prms->i4_num_32x32_merge_results, ps_refine_prms->i4_num_results_per_part, BLK_32x32, i4_x, i4_y, &ps_ctxt->au1_is_past[0]); for(pred_lx = 0; pred_lx < 2; pred_lx++) { pred_ctxt_t *ps_pred_ctxt; ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; hme_init_pred_ctxt_encode( ps_pred_ctxt, ps_search_results, ps_search_candts[ai4_id_coloc[0]].ps_search_node, ps_search_candts[ai4_id_Z[0]].ps_search_node, aps_mv_grid[pred_lx], pred_lx, lambda_recon, ps_refine_prms->lambda_q_shift, &ps_ctxt->apu1_ref_bits_tlu_lc[0], &ps_ctxt->ai2_ref_scf[0]); } } { search_results_t *ps_search_results; S32 pred_lx; ps_search_results = &ps_ctxt->s_search_results_64x64; hme_init_search_results( ps_search_results, i4_num_pred_dir, ps_refine_prms->i4_num_64x64_merge_results, ps_refine_prms->i4_num_results_per_part, BLK_64x64, 0, 0, &ps_ctxt->au1_is_past[0]); for(pred_lx = 0; pred_lx < 2; pred_lx++) { pred_ctxt_t *ps_pred_ctxt; ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; hme_init_pred_ctxt_encode( ps_pred_ctxt, ps_search_results, ps_search_candts[ai4_id_coloc[0]].ps_search_node, ps_search_candts[ai4_id_Z[0]].ps_search_node, aps_mv_grid[pred_lx], pred_lx, lambda_recon, ps_refine_prms->lambda_q_shift, &ps_ctxt->apu1_ref_bits_tlu_lc[0], &ps_ctxt->ai2_ref_scf[0]); } } } /* Initialise the structure used in clustering */ if(ME_PRISTINE_QUALITY == e_me_quality_presets) { ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info; ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16; ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32; ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64; ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask; ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold; ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep; ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16; } /*********************************************************************/ /* Initialize the dyn. search range params. for each reference index */ /* in current layer ctxt */ /*********************************************************************/ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) { WORD32 ref_ctr; /* set no. of act ref in L0 for further use at frame level */ ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++) { INIT_DYN_SEARCH_PRMS( &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr], ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]); } } /*************************************************************************/ /* Now that the candidates have been ordered, to choose the right number */ /* of initial candidates. */ /*************************************************************************/ if(blk_4x4_to_16x16) { if(i4_num_ref_prev_layer > 2) { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else ASSERT(0); } else if(i4_num_ref_prev_layer == 2) { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else ASSERT(0); } else { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 5; else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 12; else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 19; else ASSERT(0); } } else { if(i4_num_ref_prev_layer > 2) { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else ASSERT(0); } else if(i4_num_ref_prev_layer == 2) { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); else ASSERT(0); } else { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 5; else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 11; else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 16; else ASSERT(0); } } /*************************************************************************/ /* The following search parameters are fixed throughout the search across*/ /* all blks. So these are configured outside processing loop */ /*************************************************************************/ s_search_prms_blk.i4_num_init_candts = num_init_candts; s_search_prms_blk.i4_start_step = 1; s_search_prms_blk.i4_use_satd = 0; s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel; /* we use recon only for encoded layers, otherwise it is not available */ s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel; s_search_prms_blk.ps_search_candts = ps_search_candts; if(s_search_prms_blk.i4_use_rec) { WORD32 ref_ctr; for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr]; } else { WORD32 ref_ctr; for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr]; } /*************************************************************************/ /* Initialize coordinates. Meaning as follows */ /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */ /* blk_y : same as above, y coord. */ /* num_blks_in_this_ctb : number of blks in this given ctb that starts */ /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */ /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */ /* corner of the picture. Always multiple of 64. */ /* blk_id_in_ctb : encode order id of the blk in the ctb. */ /*************************************************************************/ blk_y = 0; blk_id_in_ctb = 0; i4_ctb_y = 0; /*************************************************************************/ /* Picture limit on all 4 sides. This will be used to set mv limits for */ /* every block given its coordinate. Note thsi assumes that the min amt */ /* of padding to right of pic is equal to the blk size. If we go all the */ /* way upto 64x64, then the min padding on right size of picture should */ /* be 64, and also on bottom side of picture. */ /*************************************************************************/ SET_PIC_LIMIT( s_pic_limit_inp, ps_curr_layer->i4_pad_x_rec, ps_curr_layer->i4_pad_y_rec, ps_curr_layer->i4_wd, ps_curr_layer->i4_ht, s_search_prms_blk.i4_num_steps_post_refine); SET_PIC_LIMIT( s_pic_limit_rec, ps_curr_layer->i4_pad_x_rec, ps_curr_layer->i4_pad_y_rec, ps_curr_layer->i4_wd, ps_curr_layer->i4_ht, s_search_prms_blk.i4_num_steps_post_refine); /*************************************************************************/ /* set the MV limit per ref. pic. */ /* - P pic. : Based on the config params. */ /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ /*************************************************************************/ hme_set_mv_limit_using_dvsr_data( ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics); s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands; s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer; s_srch_cand_init_data.ps_curr_layer = ps_curr_layer; s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts; s_srch_cand_init_data.ps_search_cands = ps_search_candts; s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store; s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0; s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1; s_srch_cand_init_data.e_search_blk_size = e_search_blk_size; while(0 == end_of_frame) { job_queue_t *ps_job; frm_ctb_ctxt_t *ps_frm_ctb_prms; ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; WORD32 i4_max_mv_x_in_ctb; WORD32 i4_max_mv_y_in_ctb; void *pv_dep_mngr_encloop_dep_me; WORD32 offset_val, check_dep_pos, set_dep_pos; WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0; pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me; ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms; /* Get the current row from the job queue */ ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job( ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id); /* If all rows are done, set the end of process flag to 1, */ /* and the current row to -1 */ if(NULL == ps_job) { blk_y = -1; i4_ctb_y = -1; tile_col_idx = -1; end_of_frame = 1; continue; } /* set the output dependency after picking up the row */ ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id); /* Obtain the current row's details from the job */ { ihevce_tile_params_t *ps_col_tile_params; i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; /* Obtain the current colum tile index from the job */ tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx; /* in encode layer block are 16x16 and CTB is 64 x 64 */ /* note if ctb is 32x32 the this calc needs to be changed */ num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >> ps_ctxt->log_ctb_size; /* The tile parameter for the col. idx. Use only the properties which is same for all the bottom tiles like width, start_x, etc. Don't use height, start_y, etc. */ ps_col_tile_params = ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx); /* in encode layer block are 16x16 and CTB is 64 x 64 */ /* note if ctb is 32x32 the this calc needs to be changed */ num_sync_units_in_tile = (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >> ps_ctxt->log_ctb_size; i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x; i4_ctb_x = i4_first_ctb_x; if(!num_act_ref_pics) { for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile); i4_ctb_x++) { S32 blk_i = 0, blk_j = 0; /* set the dependency for the corresponding row in enc loop */ ihevce_dmgr_set_row_row_sync( pv_dep_mngr_encloop_dep_me, (i4_ctb_x + 1), i4_ctb_y, tile_col_idx /* Col Tile No. */); } continue; } /* increment the number of rows proc */ num_rows_proc++; /* Set Variables for Dep. Checking and Setting */ set_dep_pos = i4_ctb_y + 1; if(i4_ctb_y > 0) { offset_val = 2; check_dep_pos = i4_ctb_y - 1; } else { /* First row should run without waiting */ offset_val = -1; check_dep_pos = 0; } /* row ctb out pointer */ ps_ctxt->ps_ctb_analyse_curr_row = ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; /* Row level CU Tree buffer */ ps_ctxt->ps_cu_tree_curr_row = ps_ctxt->ps_cu_tree_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE; ps_ctxt->ps_me_ctb_data_curr_row = ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; } /* This flag says the CTB under processing is at the start of tile in horz dir.*/ left_ctb_in_diff_tile = 1; /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */ /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */ { S32 i4_ref_id, i4_bits_req; for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1); i4_ref_id++) { GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]); if(i4_bits_req > 12) { ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12); } else { ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0; } } s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val; } /* if non-encode layer then i4_ctb_x will be same as blk_x */ /* loop over all the units is a row */ for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile); i4_ctb_x++) { ihevce_ctb_noise_params *ps_ctb_noise_params = &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params; s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6; s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6; ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6; ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6; /* Initialize ptr to current IPE CTB */ ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; { ps_ctb_bound_attrs = get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt); en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag; num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb; } /* Block to initialise pointers to part_type_results_t */ /* in each size-specific inter_cu_results_t */ { WORD32 i; for(i = 0; i < 64; i++) { ps_ctxt->as_cu8x8_results[i].ps_best_results = ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x] .as_8x8_block_data[i] .as_best_results; ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0; } for(i = 0; i < 16; i++) { ps_ctxt->as_cu16x16_results[i].ps_best_results = ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results; ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0; } for(i = 0; i < 4; i++) { ps_ctxt->as_cu32x32_results[i].ps_best_results = ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x] .as_32x32_block_data[i] .as_best_results; ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0; } ps_ctxt->s_cu64x64_results.ps_best_results = ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results; ps_ctxt->s_cu64x64_results.u1_num_best_results = 0; } if(ME_PRISTINE_QUALITY == e_me_quality_presets) { ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32; ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb; ps_ctb_cluster_info->ps_cu_tree_root = ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE); ps_ctb_cluster_info->nodes_created_in_cu_tree = 1; } if(ME_PRISTINE_QUALITY != e_me_quality_presets) { S32 i4_nodes_created_in_cu_tree = 1; ihevce_cu_tree_init( (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)), (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)), &i4_nodes_created_in_cu_tree, 0, POS_NA, POS_NA, POS_NA); } memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32)); if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb) { S32 j; ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row; lambda_recon = hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb); lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f); for(i = 0; i < 4; i++) { ps_search_results = &ps_ctxt->as_search_results_32x32[i]; for(j = 0; j < 2; j++) { ps_search_results->as_pred_ctxt[j].lambda = lambda_recon; } } ps_search_results = &ps_ctxt->s_search_results_64x64; for(j = 0; j < 2; j++) { ps_search_results->as_pred_ctxt[j].lambda = lambda_recon; } s_common_frm_prms.i4_lamda = lambda_recon; } else { lambda_recon = ps_refine_prms->lambda_recon; } /*********************************************************************/ /* replicate the inp buffer at blk or ctb level for each ref id, */ /* Instead of searching with wk * ref(k), we search with Ik = I / wk */ /* thereby avoiding a bloat up of memory. If we did all references */ /* weighted pred, we will end up with a duplicate copy of each ref */ /* at each layer, since we need to preserve the original reference. */ /* ToDo: Need to observe performance with this mechanism and compare */ /* with case where ref is weighted. */ /*********************************************************************/ fp_get_wt_inp( ps_curr_layer, &ps_ctxt->s_wt_pred, unit_size, s_common_frm_prms.i4_ctb_x_off, s_common_frm_prms.i4_ctb_y_off, unit_size, ps_ctxt->num_ref_future + ps_ctxt->num_ref_past, ps_ctxt->i4_wt_pred_enable_flag); if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled) { #if TEMPORAL_NOISE_DETECT { WORD32 had_block_size = 16; WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) ? 64 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off; WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) ? 64 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off; WORD32 num_pred_dir = i4_num_pred_dir; WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off; WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off; WORD32 i; WORD32 noise_detected; WORD32 ctb_size; WORD32 num_comp_had_blocks; WORD32 noisy_block_cnt; WORD32 index_8x8_block; WORD32 num_8x8_in_ctb_row; WORD32 ht_offset; WORD32 wd_offset; WORD32 block_ht; WORD32 block_wd; WORD32 num_horz_blocks; WORD32 num_vert_blocks; WORD32 mean; UWORD32 variance_8x8; WORD32 hh_energy_percent; /* variables to hold the constant values. The variable values held are decided by the HAD block size */ WORD32 min_noisy_block_cnt; WORD32 min_coeffs_above_avg; WORD32 min_coeff_avg_energy; /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */ WORD32 i4_cu_x_off, i4_cu_y_off; WORD32 is_noisy; /* intialise the variables holding the constants */ if(had_block_size == 8) { min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;// min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8; min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8; } else { min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;// min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16; min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16; } /* initialize the variables */ noise_detected = 0; noisy_block_cnt = 0; hh_energy_percent = 0; variance_8x8 = 0; block_ht = ctb_height; block_wd = ctb_width; mean = 0; ctb_size = block_ht * block_wd; //ctb_width * ctb_height; num_comp_had_blocks = ctb_size / (had_block_size * had_block_size); num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size; num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size; ht_offset = -had_block_size; wd_offset = -had_block_size; num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb for(i = 0; i < num_comp_had_blocks; i++) { if(i % num_horz_blocks == 0) { wd_offset = -had_block_size; ht_offset += had_block_size; } wd_offset += had_block_size; /* CU level offsets */ i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16 i4_cu_y_off = i4_y_off + (i / 4) * 16; /* if 50 % or more of the CU is noisy then the return value is 1 */ is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data( ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16); /* only if the CU is noisy then check the temporal noise detect call is made on the CU */ if(is_noisy) { index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row + (i % num_horz_blocks) * 2; noisy_block_cnt += ihevce_16x16block_temporal_noise_detect( 16, ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) ? 64 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off, ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) ? 64 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off, ps_ctb_noise_params, &s_srch_cand_init_data, &s_search_prms_blk, ps_ctxt, num_pred_dir, i4_num_act_ref_l0, i4_num_act_ref_l1, i4_cu_x_off, i4_cu_y_off, &ps_ctxt->s_wt_pred, unit_size, index_8x8_block, num_horz_blocks, /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra i); } /* if 16x16 is noisy */ } /* loop over for all 16x16*/ if(noisy_block_cnt >= min_noisy_block_cnt) { noise_detected = 1; } /* write back the noise presence detected for the current CTB to the structure */ ps_ctb_noise_params->i4_noise_present = noise_detected; } #endif #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled && ps_ctb_noise_params->i4_noise_present) { memset( ps_ctb_noise_params->au1_is_8x8Blk_noisy, 1, sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy)); } #endif for(i = 0; i < 16; i++) { au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16); } for(i = 0; i < 4; i++) { au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32); } for(i = 0; i < 1; i++) { au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64); } if(ps_ctxt->s_frm_prms.bidir_enabled && (ps_ctxt->s_frm_prms.i4_temporal_layer_id <= MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION)) { ps_ctb_noise_params->i4_noise_present = 0; memset( ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy)); } #if ME_LAMBDA_DISCOUNT_WHEN_NOISY for(i = 0; i < 4; i++) { S32 j; S32 lambda; if(au1_is_32x32Blk_noisy[i]) { lambda = lambda_recon; lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); ps_search_results = &ps_ctxt->as_search_results_32x32[i]; for(j = 0; j < 2; j++) { ps_search_results->as_pred_ctxt[j].lambda = lambda; } } } { S32 j; S32 lambda; if(au1_is_64x64Blk_noisy[0]) { lambda = lambda_recon; lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); ps_search_results = &ps_ctxt->s_search_results_64x64; for(j = 0; j < 2; j++) { ps_search_results->as_pred_ctxt[j].lambda = lambda; } } } #endif if(au1_is_64x64Blk_noisy[0]) { U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off + (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride)); hme_compute_sigmaX_and_sigmaXSquared( pu1_inp, ps_curr_layer->i4_inp_stride, ps_ctxt->au4_4x4_src_sigmaX, ps_ctxt->au4_4x4_src_sigmaXSquared, 4, 4, 64, 64, 1, 16); } else { for(i = 0; i < 4; i++) { if(au1_is_32x32Blk_noisy[i]) { U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off + (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride)); U08 u1_cu_size = 32; WORD32 i4_inp_buf_offset = (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) + ((i % 2) * u1_cu_size)); U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128; U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8; S32 i4_sigma_arr_offset = (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) + ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb)); hme_compute_sigmaX_and_sigmaXSquared( pu1_inp + i4_inp_buf_offset, ps_curr_layer->i4_inp_stride, ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset, ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset, 4, 4, 32, 32, 1, 16); } else { S32 j; U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8; U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2; S32 i4_16x16_blk_start_index_in_i_th_32x32_blk = (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) + ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb)); for(j = 0; j < 4; j++) { U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4; U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1; S32 i4_16x16_blk_index_in_ctb = i4_16x16_blk_start_index_in_i_th_32x32_blk + ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) + ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk); //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4); if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb]) { U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off + (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride)); U08 u1_cu_size = 16; WORD32 i4_inp_buf_offset = (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) + ((i4_16x16_blk_index_in_ctb / 4) * (u1_cu_size * ps_curr_layer->i4_inp_stride))); U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64; U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4; S32 i4_sigma_arr_offset = (((i4_16x16_blk_index_in_ctb % 4) * u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) + ((i4_16x16_blk_index_in_ctb / 4) * u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk)); hme_compute_sigmaX_and_sigmaXSquared( pu1_inp + i4_inp_buf_offset, ps_curr_layer->i4_inp_stride, (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset), (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset), 4, 4, 16, 16, 1, 16); } } } } } } else { memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy)); memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy)); memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy)); } for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++) { S32 ref_ctr; U08 au1_pred_dir_searched[2]; U08 u1_is_cu_noisy; ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17]; { blk_x = (i4_ctb_x << 2) + (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x); blk_y = (i4_ctb_y << 2) + (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y); blk_id_in_full_ctb = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb; blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask; ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask; s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6); s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6); } /* get the current input blk point */ pos_x = blk_x << blk_size_shift; pos_y = blk_y << blk_size_shift; pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride); /*********************************************************************/ /* For every blk in the picture, the search range needs to be derived*/ /* Any blk can have any mv, but practical search constraints are */ /* imposed by the picture boundary and amt of padding. */ /*********************************************************************/ /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { if(!s_search_prms_blk.i4_use_rec) { hme_derive_search_range( &as_range_prms_inp[ref_ctr], &s_pic_limit_inp, &as_mv_limit[ref_ctr], pos_x, pos_y, blk_wd, blk_ht); } else { hme_derive_search_range( &as_range_prms_rec[ref_ctr], &s_pic_limit_rec, &as_mv_limit[ref_ctr], pos_x, pos_y, blk_wd, blk_ht); } } s_search_prms_blk.i4_x_off = blk_x << blk_size_shift; s_search_prms_blk.i4_y_off = blk_y << blk_size_shift; /* Select search results from a suitable search result in the context */ { ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb]; if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb) { S32 i; for(i = 0; i < 2; i++) { ps_search_results->as_pred_ctxt[i].lambda = lambda_recon; } } } u1_is_cu_noisy = au1_is_16x16Blk_noisy [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)]; s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy; #if ME_LAMBDA_DISCOUNT_WHEN_NOISY if(u1_is_cu_noisy) { S32 j; S32 lambda; lambda = lambda_recon; lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); for(j = 0; j < 2; j++) { ps_search_results->as_pred_ctxt[j].lambda = lambda; } } else { S32 j; S32 lambda; lambda = lambda_recon; for(j = 0; j < 2; j++) { ps_search_results->as_pred_ctxt[j].lambda = lambda; } } #endif s_search_prms_blk.ps_search_results = ps_search_results; s_search_prms_blk.i4_part_mask = hme_part_mask_populator( pu1_inp, i4_inp_stride, ps_refine_prms->limit_active_partitions, ps_ctxt->ps_hme_frm_prms->bidir_enabled, ps_ctxt->u1_is_curFrame_a_refFrame, blk_8x8_mask, e_me_quality_presets); if(ME_PRISTINE_QUALITY == e_me_quality_presets) { ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] = s_search_prms_blk.i4_part_mask; } /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ { /* Setting u1_num_active_refs to 2 */ /* for the sole purpose of the */ /* function called below */ ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1; hme_reset_search_results( ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL); ps_search_results->u1_num_active_ref = i4_num_pred_dir; } if(0 == blk_id_in_ctb) { UWORD8 u1_ctr; for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1); u1_ctr++) { WORD32 i4_max_dep_ctb_y; WORD32 i4_max_dep_ctb_x; /* Set max mv in ctb units */ i4_max_mv_x_in_ctb = (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >> ps_ctxt->log_ctb_size; i4_max_mv_y_in_ctb = (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >> ps_ctxt->log_ctb_size; /********************************************************************/ /* Set max ctb_x and ctb_y dependency on reference picture */ /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */ /********************************************************************/ i4_max_dep_ctb_x = CLIP3( (i4_ctb_x + i4_max_mv_x_in_ctb + 1), 0, ps_frm_ctb_prms->i4_num_ctbs_horz - 1); i4_max_dep_ctb_y = CLIP3( (i4_ctb_y + i4_max_mv_y_in_ctb + 1), 0, ps_frm_ctb_prms->i4_num_ctbs_vert - 1); ihevce_dmgr_map_chk_sync( ps_curr_layer->ppv_dep_mngr_recon[u1_ctr], ps_ctxt->thrd_id, i4_ctb_x, i4_ctb_y, i4_max_mv_x_in_ctb, i4_max_mv_y_in_ctb); } } /* Loop across different Ref IDx */ for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++) { S32 resultid; S08 u1_default_ref_id; S32 i4_num_srch_cands = 0; S32 i4_num_refinement_iterations; S32 i4_refine_iter_ctr; if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) || (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)) { u1_pred_dir = u1_pred_dir_ctr; } else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0) { u1_pred_dir = 1; } u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0] : ps_ctxt->ai1_future_list[0]; au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir; i4_num_srch_cands = 0; resultid = 0; /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */ if(0 == blk_id_in_ctb) { /*****************************************************************/ /* Initialize the mv grid with results of neighbours for the next*/ /* ctb. */ /*****************************************************************/ hme_fill_ctb_neighbour_mvs( ps_curr_layer, blk_x, blk_y, aps_mv_grid[u1_pred_dir], u1_pred_dir_ctr, u1_default_ref_id, ps_ctxt->s_frm_prms.u1_num_active_ref_l0); } s_search_prms_blk.i1_ref_idx = u1_pred_dir; { if((blk_id_in_full_ctb % 4) == 0) { ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2] .as_pred_ctxt[u1_pred_dir] .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1; } if(blk_id_in_full_ctb == 0) { ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1; } ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used = !gau1_encode_to_raster_y[blk_id_in_full_ctb]; } { S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; U08 u1_is_blk_at_ctb_boundary = !y; s_srch_cand_init_data.u1_is_left_available = !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off); if(u1_is_blk_at_ctb_boundary) { s_srch_cand_init_data.u1_is_topRight_available = 0; s_srch_cand_init_data.u1_is_topLeft_available = 0; s_srch_cand_init_data.u1_is_top_available = 0; } else { s_srch_cand_init_data.u1_is_topRight_available = gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd); s_srch_cand_init_data.u1_is_top_available = 1; s_srch_cand_init_data.u1_is_topLeft_available = s_srch_cand_init_data.u1_is_left_available; } } s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id; s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1]; s_srch_cand_init_data.i4_pos_x = pos_x; s_srch_cand_init_data.i4_pos_y = pos_y; s_srch_cand_init_data.u1_pred_dir = u1_pred_dir; s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr; s_srch_cand_init_data.u1_search_candidate_list_index = au1_search_candidate_list_index[u1_pred_dir]; i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data); /* Note this block also clips the MV range for all candidates */ { S08 i1_check_for_mult_refs; i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1) : (ps_ctxt->num_ref_past > 1); ps_me_optimised_function_list->pf_mv_clipper( &s_search_prms_blk, i4_num_srch_cands, i1_check_for_mult_refs, ps_refine_prms->i4_num_steps_fpel_refine, ps_refine_prms->i4_num_steps_hpel_refine, ps_refine_prms->i4_num_steps_qpel_refine); } #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 i4_num_refinement_iterations = ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0) : 1; #else i4_num_refinement_iterations = ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1; #endif #if ENABLE_EXPLICIT_SEARCH_IN_PQ if(e_me_quality_presets == ME_PRISTINE_QUALITY) { i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0 : i4_num_act_ref_l1; } #endif for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations; i4_refine_iter_ctr++) { S32 center_x; S32 center_y; S32 center_ref_idx; S08 *pi1_pred_dir_to_ref_idx = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list; { WORD32 i4_i; for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++) { ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL; ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL; ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL; ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0; ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0; ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id; if(ps_refine_prms->i4_num_results_per_part == 2) { ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = MAX_SIGNED_16BIT_VAL; ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = MAX_SIGNED_16BIT_VAL; ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] = MAX_SIGNED_16BIT_VAL; ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0; ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0; ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id; } } s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt; s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt; } { search_node_t *ps_coloc_node; S32 i = 0; if(i4_num_refinement_iterations > 1) { for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++) { ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]] .ps_search_node; if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] == ps_coloc_node->i1_ref_idx) { break; } } if(i == ai4_num_coloc_cands[u1_pred_dir]) { i = 0; } } else { ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]] .ps_search_node; } hme_set_mvp_node( ps_search_results, ps_coloc_node, u1_pred_dir, (i4_num_refinement_iterations > 1) ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] : u1_default_ref_id); center_x = ps_coloc_node->ps_mv->i2_mvx; center_y = ps_coloc_node->ps_mv->i2_mvy; center_ref_idx = ps_coloc_node->i1_ref_idx; } /* Full-Pel search */ { S32 num_unique_nodes; memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map)); num_unique_nodes = hme_remove_duplicate_fpel_search_candidates( as_unique_search_nodes, s_search_prms_blk.ps_search_candts, au4_unique_node_map, pi1_pred_dir_to_ref_idx, i4_num_srch_cands, s_search_prms_blk.i4_num_init_candts, i4_refine_iter_ctr, i4_num_refinement_iterations, i4_num_act_ref_l0, center_ref_idx, center_x, center_y, ps_ctxt->s_frm_prms.bidir_enabled, e_me_quality_presets); /*************************************************************************/ /* This array stores the ids of the partitions whose */ /* SADs are updated. Since the partitions whose SADs are updated may not */ /* be in contiguous order, we supply another level of indirection. */ /*************************************************************************/ ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids( s_search_prms_blk.i4_part_mask, &ps_fullpel_refine_ctxt->ai4_part_id[0]); if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy) { S32 i; /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/ S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) + (s_search_prms_blk.i4_cu_y_off * 4); for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++) { S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i]; hme_compute_final_sigma_of_pu_from_base_blocks( ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset, ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset, au8_final_src_sigmaX, au8_final_src_sigmaXSquared, 16, 4, i4_part_id, 16); } s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX; s_common_frm_prms.pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared; s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX; s_search_prms_blk.pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared; } if(0 == num_unique_nodes) { continue; } if(num_unique_nodes >= 2) { s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; if(ps_ctxt->i4_pic_type != IV_P_FRAME) { if(ps_ctxt->i4_temporal_layer == 1) { hme_fullpel_cand_sifter( &s_search_prms_blk, ps_curr_layer, &ps_ctxt->s_wt_pred, ALPHA_FOR_NOISE_TERM_IN_ME, u1_is_cu_noisy, ps_me_optimised_function_list); } else { hme_fullpel_cand_sifter( &s_search_prms_blk, ps_curr_layer, &ps_ctxt->s_wt_pred, ALPHA_FOR_NOISE_TERM_IN_ME, u1_is_cu_noisy, ps_me_optimised_function_list); } } else { hme_fullpel_cand_sifter( &s_search_prms_blk, ps_curr_layer, &ps_ctxt->s_wt_pred, ALPHA_FOR_NOISE_TERM_IN_ME_P, u1_is_cu_noisy, ps_me_optimised_function_list); } } s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; hme_fullpel_refine( ps_refine_prms, &s_search_prms_blk, ps_curr_layer, &ps_ctxt->s_wt_pred, au4_unique_node_map, num_unique_nodes, blk_8x8_mask, center_x, center_y, center_ref_idx, e_me_quality_presets, ps_me_optimised_function_list); } /* Sub-Pel search */ { hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem( &ps_ctxt->s_buf_mgr, INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE); /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { SCALE_RANGE_PRMS( as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1); SCALE_RANGE_PRMS( as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2); } s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6; s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6; hme_subpel_refine_cu_hs( &s_subpel_prms, ps_curr_layer, ps_search_results, u1_pred_dir, &ps_ctxt->s_wt_pred, blk_8x8_mask, ps_ctxt->ps_func_selector, ps_cmn_utils_optimised_function_list, ps_me_optimised_function_list); } } } /* Populate the new PU struct with the results post subpel refinement*/ { inter_cu_results_t *ps_cu_results; WORD32 best_inter_cost, intra_cost, posx, posy; UWORD8 intra_8x8_enabled = 0; /* cost of 16x16 cu parent */ WORD32 parent_cost = MAX_32BIT_VAL; /* cost of 8x8 cu children */ /*********************************************************************/ /* Assuming parent is not split, then we signal 1 bit for this parent*/ /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */ /* So, 4*lambda is extra for children cost. */ /*********************************************************************/ WORD32 child_cost = 0; ps_cu_results = ps_search_results->ps_cu_results; /* Initialize the pu_results pointers to the first struct in the stack array */ ps_pu_results = as_inter_pu_results; hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); hme_populate_pus( ps_thrd_ctxt, ps_ctxt, &s_subpel_prms, ps_search_results, ps_cu_results, ps_pu_results, &(as_pu_results[0][0][0]), &s_common_frm_prms, &ps_ctxt->s_wt_pred, ps_curr_layer, au1_pred_dir_searched, i4_num_pred_dir); ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64); hme_decide_part_types( ps_cu_results, ps_pu_results, &s_common_frm_prms, ps_ctxt, ps_cmn_utils_optimised_function_list, ps_me_optimised_function_list ); /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) { WORD32 res_ctr; for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++) { WORD32 num_part = 2, part_ctr; part_type_results_t *ps_best_results = &ps_cu_results->ps_best_results[res_ctr]; if(PRT_2Nx2N == ps_best_results->u1_part_type) num_part = 1; for(part_ctr = 0; part_ctr < num_part; part_ctr++) { pu_result_t *ps_pu_results = &ps_best_results->as_pu_results[part_ctr]; ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode); hme_update_dynamic_search_params( &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p] .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx], ps_pu_results->pu.mv.s_l0_mv.i2_mvy); /* Sanity Check */ ASSERT( ps_pu_results->pu.mv.i1_l0_ref_idx < ps_ctxt->s_frm_prms.u1_num_active_ref_l0); /* No L1 for P Pic. */ ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode); /* No BI for P Pic. */ ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode); } } } /*****************************************************************/ /* INSERT INTRA RESULTS AT 16x16 LEVEL. */ /*****************************************************************/ #if DISABLE_INTRA_IN_BPICS if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) && (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))) #endif { if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy)) { hme_insert_intra_nodes_post_bipred( ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); } } #if DISABLE_INTRA_IN_BPICS if((ME_XTREME_SPEED_25 == e_me_quality_presets) && (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)) { intra_8x8_enabled = 0; } else #endif { /*TRAQO intra flag updation*/ if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag) { best_inter_cost = ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost; intra_cost = ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost; /*@16x16 level*/ posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x << 2) >> 4; posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y << 2) >> 4; } else { best_inter_cost = ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost; posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x << 2) >> 3; posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y << 2) >> 3; } /* Disable intra16/32/64 flags based on split flags recommended by IPE */ if(ps_cur_ipe_ctb->u1_split_flag) { /* Id of the 32x32 block, 16x16 block in a CTB */ WORD32 i4_32x32_id = (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5); WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 + ((ps_cu_results->u1_x_off >> 4) & 0x1); if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) { if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] .as_intra16_analyse[i4_16x16_id] .b1_split_flag) { intra_8x8_enabled = ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] .as_intra16_analyse[i4_16x16_id] .as_intra8_analyse[0] .b1_valid_cu; intra_8x8_enabled &= ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] .as_intra16_analyse[i4_16x16_id] .as_intra8_analyse[1] .b1_valid_cu; intra_8x8_enabled &= ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] .as_intra16_analyse[i4_16x16_id] .as_intra8_analyse[2] .b1_valid_cu; intra_8x8_enabled &= ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] .as_intra16_analyse[i4_16x16_id] .as_intra8_analyse[3] .b1_valid_cu; } } } } if(blk_8x8_mask == 0xf) { parent_cost = ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost; ps_search_results->u1_split_flag = 0; } else { ps_search_results->u1_split_flag = 1; } ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2]; if(s_common_frm_prms.u1_is_cu_noisy) { intra_8x8_enabled = 0; } /* Evalaute 8x8 if NxN part id is enabled */ if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled) { /* Populates the PU's for the 4 8x8's in one call */ hme_populate_pus_8x8_cu( ps_thrd_ctxt, ps_ctxt, &s_subpel_prms, ps_search_results, ps_cu_results, ps_pu_results, &(as_pu_results[0][0][0]), &s_common_frm_prms, au1_pred_dir_searched, i4_num_pred_dir, blk_8x8_mask); /* Re-initialize the pu_results pointers to the first struct in the stack array */ ps_pu_results = as_inter_pu_results; for(i = 0; i < 4; i++) { if((blk_8x8_mask & (1 << i))) { if(ps_cu_results->i4_part_mask) { hme_decide_part_types( ps_cu_results, ps_pu_results, &s_common_frm_prms, ps_ctxt, ps_cmn_utils_optimised_function_list, ps_me_optimised_function_list ); } /*****************************************************************/ /* INSERT INTRA RESULTS AT 8x8 LEVEL. */ /*****************************************************************/ #if DISABLE_INTRA_IN_BPICS if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) && (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))) #endif { if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy)) { hme_insert_intra_nodes_post_bipred( ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); } } child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost; } ps_cu_results++; ps_pu_results++; } /* Compare 16x16 vs 8x8 cost */ if(child_cost < parent_cost) { ps_search_results->best_cu_cost = child_cost; ps_search_results->u1_split_flag = 1; } } } hme_update_mv_bank_encode( ps_search_results, ps_curr_layer->ps_layer_mvbank, blk_x, blk_y, &s_mv_update_prms, au1_pred_dir_searched, i4_num_act_ref_l0); /*********************************************************************/ /* Map the best results to an MV Grid. This is a 18x18 grid that is */ /* useful for doing things like predictor for cost calculation or */ /* also for merge calculations if need be. */ /*********************************************************************/ hme_map_mvs_to_grid( &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir); } /* Set the CU tree nodes appropriately */ if(e_me_quality_presets != ME_PRISTINE_QUALITY) { WORD32 i, j; for(i = 0; i < 16; i++) { cur_ctb_cu_tree_t *ps_tree_node = ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE); search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i]; switch(i >> 2) { case 0: { ps_tree_node = ps_tree_node->ps_child_node_tl; break; } case 1: { ps_tree_node = ps_tree_node->ps_child_node_tr; break; } case 2: { ps_tree_node = ps_tree_node->ps_child_node_bl; break; } case 3: { ps_tree_node = ps_tree_node->ps_child_node_br; break; } } switch(i % 4) { case 0: { ps_tree_node = ps_tree_node->ps_child_node_tl; break; } case 1: { ps_tree_node = ps_tree_node->ps_child_node_tr; break; } case 2: { ps_tree_node = ps_tree_node->ps_child_node_bl; break; } case 3: { ps_tree_node = ps_tree_node->ps_child_node_br; break; } } if(ai4_blk_8x8_mask[i] == 15) { if(!ps_results->u1_split_flag) { ps_tree_node->is_node_valid = 1; NULLIFY_THE_CHILDREN_NODES(ps_tree_node); } else { ps_tree_node->is_node_valid = 0; ENABLE_THE_CHILDREN_NODES(ps_tree_node); } } else { cur_ctb_cu_tree_t *ps_tree_child; ps_tree_node->is_node_valid = 0; for(j = 0; j < 4; j++) { switch(j) { case 0: { ps_tree_child = ps_tree_node->ps_child_node_tl; break; } case 1: { ps_tree_child = ps_tree_node->ps_child_node_tr; break; } case 2: { ps_tree_child = ps_tree_node->ps_child_node_bl; break; } case 3: { ps_tree_child = ps_tree_node->ps_child_node_br; break; } } ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j)); } } } } if(ME_PRISTINE_QUALITY == e_me_quality_presets) { cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root; hme_analyse_mv_clustering( ps_ctxt->as_search_results_16x16, ps_ctxt->as_cu16x16_results, ps_ctxt->as_cu8x8_results, ps_ctxt->ps_ctb_cluster_info, ps_ctxt->ai1_future_list, ps_ctxt->ai1_past_list, ps_ctxt->s_frm_prms.bidir_enabled, e_me_quality_presets); #if DISABLE_BLK_MERGE_WHEN_NOISY ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0]; ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1]; ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2]; ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3]; ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0]; ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1]; ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2]; ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3]; ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0]; ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0]; #endif en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) | (ps_tree->ps_child_node_tr->is_node_valid << 1) | (ps_tree->ps_child_node_bl->is_node_valid << 2) | (ps_tree->ps_child_node_br->is_node_valid << 3); en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) | (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) | (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) | (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) | (ps_tree->u1_inter_eval_enable << 4); } else { en_merge_execution = 0x1f; #if DISABLE_BLK_MERGE_WHEN_NOISY en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) | ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) | ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) | ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8)); #endif } /* Re-initialize the pu_results pointers to the first struct in the stack array */ ps_pu_results = as_inter_pu_results; { WORD32 ref_ctr; s_ctb_prms.i4_ctb_x = i4_ctb_x << 6; s_ctb_prms.i4_ctb_y = i4_ctb_y << 6; /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1); SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2); } e_merge_result = CU_SPLIT; merge_count_32x32 = 0; if((en_merge_32x32 & 1) && (en_merge_execution & 1)) { range_prms_t *ps_pic_limit; if(s_merge_prms_32x32_tl.i4_use_rec == 1) { ps_pic_limit = &s_pic_limit_rec; } else { ps_pic_limit = &s_pic_limit_inp; } /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { hme_derive_search_range( s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], ps_pic_limit, &as_mv_limit[ref_ctr], i4_ctb_x << 6, i4_ctb_y << 6, 32, 32); SCALE_RANGE_PRMS_POINTERS( s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], 2); } s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6; s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6; s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0]; e_merge_result = hme_try_merge_high_speed( ps_thrd_ctxt, ps_ctxt, ps_cur_ipe_ctb, &s_subpel_prms, &s_merge_prms_32x32_tl, ps_pu_results, &as_pu_results[0][0][0]); if(e_merge_result == CU_MERGED) { inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results; if(!((ps_cu_results->u1_num_best_results == 1) && (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) { hme_map_mvs_to_grid( &aps_mv_grid[0], s_merge_prms_32x32_tl.ps_results_merge, s_merge_prms_32x32_tl.au1_pred_dir_searched, s_merge_prms_32x32_tl.i4_num_pred_dir_actual); } if(ME_PRISTINE_QUALITY != e_me_quality_presets) { ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_tl->is_node_valid = 1; NULLIFY_THE_CHILDREN_NODES( ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_tl); } merge_count_32x32++; e_merge_result = CU_SPLIT; } else if(ME_PRISTINE_QUALITY == e_me_quality_presets) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif } } else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1))) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY) { ps_tree->is_node_valid = 0; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); } } if((en_merge_32x32 & 2) && (en_merge_execution & 2)) { range_prms_t *ps_pic_limit; if(s_merge_prms_32x32_tr.i4_use_rec == 1) { ps_pic_limit = &s_pic_limit_rec; } else { ps_pic_limit = &s_pic_limit_inp; } /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { hme_derive_search_range( s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], ps_pic_limit, &as_mv_limit[ref_ctr], (i4_ctb_x << 6) + 32, i4_ctb_y << 6, 32, 32); SCALE_RANGE_PRMS_POINTERS( s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], 2); } s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6; s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6; s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1]; e_merge_result = hme_try_merge_high_speed( ps_thrd_ctxt, ps_ctxt, ps_cur_ipe_ctb, &s_subpel_prms, &s_merge_prms_32x32_tr, ps_pu_results, &as_pu_results[0][0][0]); if(e_merge_result == CU_MERGED) { inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results; if(!((ps_cu_results->u1_num_best_results == 1) && (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) { hme_map_mvs_to_grid( &aps_mv_grid[0], s_merge_prms_32x32_tr.ps_results_merge, s_merge_prms_32x32_tr.au1_pred_dir_searched, s_merge_prms_32x32_tr.i4_num_pred_dir_actual); } if(ME_PRISTINE_QUALITY != e_me_quality_presets) { ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_tr->is_node_valid = 1; NULLIFY_THE_CHILDREN_NODES( ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_tr); } merge_count_32x32++; e_merge_result = CU_SPLIT; } else if(ME_PRISTINE_QUALITY == e_me_quality_presets) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif } } else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2))) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY) { ps_tree->is_node_valid = 0; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); } } if((en_merge_32x32 & 4) && (en_merge_execution & 4)) { range_prms_t *ps_pic_limit; if(s_merge_prms_32x32_bl.i4_use_rec == 1) { ps_pic_limit = &s_pic_limit_rec; } else { ps_pic_limit = &s_pic_limit_inp; } /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { hme_derive_search_range( s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], ps_pic_limit, &as_mv_limit[ref_ctr], i4_ctb_x << 6, (i4_ctb_y << 6) + 32, 32, 32); SCALE_RANGE_PRMS_POINTERS( s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], 2); } s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6; s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6; s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2]; e_merge_result = hme_try_merge_high_speed( ps_thrd_ctxt, ps_ctxt, ps_cur_ipe_ctb, &s_subpel_prms, &s_merge_prms_32x32_bl, ps_pu_results, &as_pu_results[0][0][0]); if(e_merge_result == CU_MERGED) { inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results; if(!((ps_cu_results->u1_num_best_results == 1) && (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) { hme_map_mvs_to_grid( &aps_mv_grid[0], s_merge_prms_32x32_bl.ps_results_merge, s_merge_prms_32x32_bl.au1_pred_dir_searched, s_merge_prms_32x32_bl.i4_num_pred_dir_actual); } if(ME_PRISTINE_QUALITY != e_me_quality_presets) { ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_bl->is_node_valid = 1; NULLIFY_THE_CHILDREN_NODES( ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_bl); } merge_count_32x32++; e_merge_result = CU_SPLIT; } else if(ME_PRISTINE_QUALITY == e_me_quality_presets) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif } } else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4))) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY) { ps_tree->is_node_valid = 0; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); } } if((en_merge_32x32 & 8) && (en_merge_execution & 8)) { range_prms_t *ps_pic_limit; if(s_merge_prms_32x32_br.i4_use_rec == 1) { ps_pic_limit = &s_pic_limit_rec; } else { ps_pic_limit = &s_pic_limit_inp; } /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { hme_derive_search_range( s_merge_prms_32x32_br.aps_mv_range[ref_ctr], ps_pic_limit, &as_mv_limit[ref_ctr], (i4_ctb_x << 6) + 32, (i4_ctb_y << 6) + 32, 32, 32); SCALE_RANGE_PRMS_POINTERS( s_merge_prms_32x32_br.aps_mv_range[ref_ctr], s_merge_prms_32x32_br.aps_mv_range[ref_ctr], 2); } s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6; s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6; s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3]; e_merge_result = hme_try_merge_high_speed( ps_thrd_ctxt, ps_ctxt, ps_cur_ipe_ctb, &s_subpel_prms, &s_merge_prms_32x32_br, ps_pu_results, &as_pu_results[0][0][0]); if(e_merge_result == CU_MERGED) { /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results; if(!((ps_cu_results->u1_num_best_results == 1) && (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) { hme_map_mvs_to_grid ( &aps_mv_grid[0], s_merge_prms_32x32_br.ps_results_merge, s_merge_prms_32x32_br.au1_pred_dir_searched, s_merge_prms_32x32_br.i4_num_pred_dir_actual ); }*/ if(ME_PRISTINE_QUALITY != e_me_quality_presets) { ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_br->is_node_valid = 1; NULLIFY_THE_CHILDREN_NODES( ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .ps_child_node_br); } merge_count_32x32++; e_merge_result = CU_SPLIT; } else if(ME_PRISTINE_QUALITY == e_me_quality_presets) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif } } else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8))) { #if ENABLE_CU_TREE_CULLING cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; ENABLE_THE_CHILDREN_NODES(ps_tree); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); #endif if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY) { ps_tree->is_node_valid = 0; ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; en_merge_execution = (en_merge_execution & (~(1 << 4))); } } /* Try merging all 32x32 to 64x64 candts */ if(((en_merge_32x32 & 0xf) == 0xf) && (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) || ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY)))) if((((e_me_quality_presets == ME_XTREME_SPEED_25) && !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) || (e_me_quality_presets != ME_XTREME_SPEED_25))) { range_prms_t *ps_pic_limit; if(s_merge_prms_64x64.i4_use_rec == 1) { ps_pic_limit = &s_pic_limit_rec; } else { ps_pic_limit = &s_pic_limit_inp; } /* MV limit is different based on ref. PIC */ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) { hme_derive_search_range( s_merge_prms_64x64.aps_mv_range[ref_ctr], ps_pic_limit, &as_mv_limit[ref_ctr], i4_ctb_x << 6, i4_ctb_y << 6, 64, 64); SCALE_RANGE_PRMS_POINTERS( s_merge_prms_64x64.aps_mv_range[ref_ctr], s_merge_prms_64x64.aps_mv_range[ref_ctr], 2); } s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6; s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6; s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0]; e_merge_result = hme_try_merge_high_speed( ps_thrd_ctxt, ps_ctxt, ps_cur_ipe_ctb, &s_subpel_prms, &s_merge_prms_64x64, ps_pu_results, &as_pu_results[0][0][0]); if((e_merge_result == CU_MERGED) && (ME_PRISTINE_QUALITY != e_me_quality_presets)) { ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .is_node_valid = 1; NULLIFY_THE_CHILDREN_NODES( ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)); } else if( (e_merge_result == CU_SPLIT) && (ME_PRISTINE_QUALITY == e_me_quality_presets)) { ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] .is_node_valid = 0; } } /*****************************************************************/ /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */ /*****************************************************************/ pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y); { #ifdef _DEBUG S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) ? 64 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off; S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) ? 64 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off; ASSERT( (wd * ht) == ihevce_compute_area_of_valid_cus_in_ctb( &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)])); #endif } } /* set the dependency for the corresponding row in enc loop */ ihevce_dmgr_set_row_row_sync( pv_dep_mngr_encloop_dep_me, (i4_ctb_x + 1), i4_ctb_y, tile_col_idx /* Col Tile No. */); left_ctb_in_diff_tile = 0; } } } /** ******************************************************************************** * @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt, * refine_layer_prms_t *ps_refine_prms) * * @brief Top level entry point for refinement ME * * @param[in,out] ps_ctxt: ME Handle * * @param[in] ps_refine_prms : refinement layer prms * * @return None ******************************************************************************** */ void hme_refine_no_encode( coarse_me_ctxt_t *ps_ctxt, refine_prms_t *ps_refine_prms, multi_thrd_ctxt_t *ps_multi_thrd_ctxt, S32 lyr_job_type, WORD32 i4_ping_pong, void **ppv_dep_mngr_hme_sync) { BLK_SIZE_T e_search_blk_size, e_result_blk_size; ME_QUALITY_PRESETS_T e_me_quality_presets = ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; /*************************************************************************/ /* Complexity of search: Low to High */ /*************************************************************************/ SEARCH_COMPLEXITY_T e_search_complexity; /*************************************************************************/ /* Config parameter structures for varius ME submodules */ /*************************************************************************/ hme_search_prms_t s_search_prms_blk; mvbank_update_prms_t s_mv_update_prms; /*************************************************************************/ /* All types of search candidates for predictor based search. */ /*************************************************************************/ S32 num_init_candts = 0; search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS]; search_node_t as_top_neighbours[4], as_left_neighbours[3]; search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr; search_node_t *ps_candt_l, *ps_candt_t; search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2]; search_node_t *ps_candt_prj_bl[2]; search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2]; search_node_t *ps_candt_prj_coloc[2]; pf_get_wt_inp fp_get_wt_inp; search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9]; U32 au4_unique_node_map[MAP_X_MAX * 2]; /*EIID */ WORD32 i4_num_inter_wins = 0; //debug code to find stat of WORD32 i4_num_comparisions = 0; //debug code WORD32 i4_threshold_multiplier; WORD32 i4_threshold_divider; WORD32 i4_temporal_layer = ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id; /*************************************************************************/ /* points ot the search results for the blk level search (8x8/16x16) */ /*************************************************************************/ search_results_t *ps_search_results; /*************************************************************************/ /* Coordinates */ /*************************************************************************/ S32 blk_x, i4_ctb_x, blk_id_in_ctb; //S32 i4_ctb_y; S32 pos_x, pos_y; S32 blk_id_in_full_ctb; S32 i4_num_srch_cands; S32 blk_y; /*************************************************************************/ /* Related to dimensions of block being searched and pic dimensions */ /*************************************************************************/ S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic; S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb; S32 num_results_prev_layer; /*************************************************************************/ /* Size of a basic unit for this layer. For non encode layers, we search */ /* in block sizes of 8x8. For encode layers, though we search 16x16s the */ /* basic unit size is the ctb size. */ /*************************************************************************/ S32 unit_size; /*************************************************************************/ /* Pointers to context in current and coarser layers */ /*************************************************************************/ layer_ctxt_t *ps_curr_layer, *ps_coarse_layer; /*************************************************************************/ /* to store mv range per blk, and picture limit, allowed search range */ /* range prms in hpel and qpel units as well */ /*************************************************************************/ range_prms_t s_range_prms_inp, s_range_prms_rec; range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF]; /*************************************************************************/ /* These variables are used to track number of references at different */ /* stages of ME. */ /*************************************************************************/ S32 i4_num_ref_fpel, i4_num_ref_before_merge; S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer; S32 lambda_inp = ps_refine_prms->lambda_inp; /*************************************************************************/ /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */ /* Explicit means it searches on all active ref idx. */ /*************************************************************************/ S32 curr_layer_implicit, prev_layer_implicit; /*************************************************************************/ /* Variables for loop counts */ /*************************************************************************/ S32 id; S08 i1_ref_idx; /*************************************************************************/ /* Input pointer and stride */ /*************************************************************************/ U08 *pu1_inp; S32 i4_inp_stride; S32 end_of_frame; S32 num_sync_units_in_row; PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt; ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1); /*************************************************************************/ /* Pointers to current and coarse layer are needed for projection */ /* Pointer to prev layer are needed for other candts like coloc */ /*************************************************************************/ ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id]; ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1]; num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref; /* Function pointer is selected based on the C vc X86 macro */ fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list) ->pf_get_wt_inp_8x8; i4_inp_stride = ps_curr_layer->i4_inp_stride; i4_pic_wd = ps_curr_layer->i4_wd; i4_pic_ht = ps_curr_layer->i4_ht; e_search_complexity = ps_refine_prms->e_search_complexity; end_of_frame = 0; /* If the previous layer is non-encode layer, then use dyadic projection */ if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1]) pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic; else pf_hme_project_coloc_candt = hme_project_coloc_candt; /* This points to all the initial candts */ ps_search_candts = &as_search_candts[0]; { e_search_blk_size = BLK_8x8; blk_wd = blk_ht = 8; blk_size_shift = 3; s_mv_update_prms.i4_shift = 0; /*********************************************************************/ /* In case we do not encode this layer, we search 8x8 with or without*/ /* enable 4x4 SAD. */ /*********************************************************************/ { S32 i4_mask = (ENABLE_2Nx2N); e_result_blk_size = BLK_8x8; if(ps_refine_prms->i4_enable_4x4_part) { i4_mask |= (ENABLE_NxN); e_result_blk_size = BLK_4x4; s_mv_update_prms.i4_shift = 1; } s_search_prms_blk.i4_part_mask = i4_mask; } unit_size = blk_wd; s_search_prms_blk.i4_inp_stride = unit_size; } /* This is required to properly update the layer mv bank */ s_mv_update_prms.e_search_blk_size = e_search_blk_size; s_search_prms_blk.e_blk_size = e_search_blk_size; /*************************************************************************/ /* If current layer is explicit, then the number of ref frames are to */ /* be same as previous layer. Else it will be 2 */ /*************************************************************************/ i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; if(ps_refine_prms->explicit_ref) { curr_layer_implicit = 0; i4_num_ref_fpel = i4_num_ref_prev_layer; /* 100578 : Using same mv cost fun. for all presets. */ s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine; } else { i4_num_ref_fpel = 2; curr_layer_implicit = 1; { if(ME_MEDIUM_SPEED > e_me_quality_presets) { s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit; } else { #if USE_MODIFIED == 1 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; #else s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; #endif } } } i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer); if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_IDR_FRAME || ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME) { i4_num_ref_fpel = 1; } if(i4_num_ref_prev_layer <= 2) { prev_layer_implicit = 1; curr_layer_implicit = 1; i4_num_ref_each_dir = 1; } else { /* It is assumed that we have equal number of references in each dir */ //ASSERT(!(i4_num_ref_prev_layer & 1)); prev_layer_implicit = 0; i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1; } s_mv_update_prms.i4_num_ref = i4_num_ref_fpel; s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; /* this can be kept to 1 or 2 */ i4_num_ref_before_merge = 2; i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel); /* Set up place holders to hold the search nodes of each initial candt */ for(i = 0; i < MAX_INIT_CANDTS; i++) { ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0); } /* redundant, but doing it here since it is used in pred ctxt init */ ps_candt_zeromv = ps_search_candts[0].ps_search_node; for(i = 0; i < 3; i++) { search_node_t *ps_search_node; ps_search_node = &as_left_neighbours[i]; INIT_SEARCH_NODE(ps_search_node, 0); ps_search_node = &as_top_neighbours[i]; INIT_SEARCH_NODE(ps_search_node, 0); } INIT_SEARCH_NODE(&as_top_neighbours[3], 0); /* bottom left node always not available for the blk being searched */ as_left_neighbours[2].u1_is_avail = 0; /*************************************************************************/ /* Initialize all the search results structure here. We update all the */ /* search results to default values, and configure things like blk sizes */ /*************************************************************************/ if(ps_refine_prms->i4_encode == 0) { S32 pred_lx; search_results_t *ps_search_results; ps_search_results = &ps_ctxt->s_search_results_8x8; hme_init_search_results( ps_search_results, i4_num_ref_fpel, ps_refine_prms->i4_num_fpel_results, ps_refine_prms->i4_num_results_per_part, e_search_blk_size, 0, 0, &ps_ctxt->au1_is_past[0]); for(pred_lx = 0; pred_lx < 2; pred_lx++) { hme_init_pred_ctxt_no_encode( &ps_search_results->as_pred_ctxt[pred_lx], ps_search_results, &as_top_neighbours[0], &as_left_neighbours[0], &ps_candt_prj_coloc[0], ps_candt_zeromv, ps_candt_zeromv, pred_lx, lambda_inp, ps_refine_prms->lambda_q_shift, &ps_ctxt->apu1_ref_bits_tlu_lc[0], &ps_ctxt->ai2_ref_scf[0]); } } /*********************************************************************/ /* Initialize the dyn. search range params. for each reference index */ /* in current layer ctxt */ /*********************************************************************/ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) { WORD32 ref_ctr; for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++) { INIT_DYN_SEARCH_PRMS( &ps_ctxt->s_coarse_dyn_range_prms .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr], ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]); } } /* Next set up initial candidates according to a given set of rules. */ /* The number of initial candidates affects the quality of ME in the */ /* case of motion with multiple degrees of freedom. In case of simple */ /* translational motion, a current and a few causal and non causal */ /* candts would suffice. More candidates help to cover more complex */ /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */ /* where multiple ref helps etc. */ /* The candidate choice also depends on the following parameters. */ /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */ /* Whether we encode or not, and the type of search across reference */ /* i.e. the previous layer may have been explicit/implicit and curr */ /* layer may be explicit/implicit */ /* 0, 0, L, T, projected coloc best always presnt by default */ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets); ps_candt_zeromv = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 0; ps_candt_zeromv->s_mv.i2_mvx = 0; ps_candt_zeromv->s_mv.i2_mvy = 0; id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets); ps_candt_l = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 0; /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */ /* not at the CTB boundary use the causal T and */ /* not the projected T, although the candidate is */ /* still pointed to by ps_candt_prj_t[0] */ if(ME_MEDIUM_SPEED <= e_me_quality_presets) { /* Using Projected top to eliminate sync */ id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_TOP0, e_me_quality_presets); ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; } else { id = hme_decide_search_candidate_priority_in_l1_and_l2_me( SPATIAL_TOP0, e_me_quality_presets); ps_candt_t = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 0; } id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_COLOC0, e_me_quality_presets); ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_COLOC1, e_me_quality_presets); ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; if(ME_MEDIUM_SPEED <= e_me_quality_presets) { id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_TOP_RIGHT0, e_me_quality_presets); ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_TOP_LEFT0, e_me_quality_presets); ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; } else { id = hme_decide_search_candidate_priority_in_l1_and_l2_me( SPATIAL_TOP_RIGHT0, e_me_quality_presets); ps_candt_tr = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 0; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( SPATIAL_TOP_LEFT0, e_me_quality_presets); ps_candt_tl = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 0; } id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_RIGHT0, e_me_quality_presets); ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_BOTTOM0, e_me_quality_presets); ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets); ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_BOTTOM_LEFT0, e_me_quality_presets); ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_RIGHT1, e_me_quality_presets); ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_BOTTOM1, e_me_quality_presets); ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets); ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_BOTTOM_LEFT1, e_me_quality_presets); ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets); ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_TOP_RIGHT1, e_me_quality_presets); ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; id = hme_decide_search_candidate_priority_in_l1_and_l2_me( PROJECTED_TOP_LEFT1, e_me_quality_presets); ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node; ps_search_candts[id].u1_num_steps_refine = 1; /*************************************************************************/ /* Now that the candidates have been ordered, to choose the right number */ /* of initial candidates. */ /*************************************************************************/ if(curr_layer_implicit && !prev_layer_implicit) { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 7; else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 13; else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 18; else ASSERT(0); } else { if(e_search_complexity == SEARCH_CX_LOW) num_init_candts = 5; else if(e_search_complexity == SEARCH_CX_MED) num_init_candts = 11; else if(e_search_complexity == SEARCH_CX_HIGH) num_init_candts = 16; else ASSERT(0); } if(ME_XTREME_SPEED_25 == e_me_quality_presets) { num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25; } /*************************************************************************/ /* The following search parameters are fixed throughout the search across*/ /* all blks. So these are configured outside processing loop */ /*************************************************************************/ s_search_prms_blk.i4_num_init_candts = num_init_candts; s_search_prms_blk.i4_start_step = 1; s_search_prms_blk.i4_use_satd = 0; s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel; /* we use recon only for encoded layers, otherwise it is not available */ s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel; s_search_prms_blk.ps_search_candts = ps_search_candts; /* We use the same mv_range for all ref. pic. So assign to member 0 */ if(s_search_prms_blk.i4_use_rec) s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec; else s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp; /*************************************************************************/ /* Initialize coordinates. Meaning as follows */ /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */ /* blk_y : same as above, y coord. */ /* num_blks_in_this_ctb : number of blks in this given ctb that starts */ /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */ /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */ /* corner of the picture. Always multiple of 64. */ /* blk_id_in_ctb : encode order id of the blk in the ctb. */ /*************************************************************************/ blk_y = 0; blk_id_in_ctb = 0; GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic); /* Get the number of sync units in a row based on encode/non enocde layer */ num_sync_units_in_row = num_blks_in_row; /*************************************************************************/ /* Picture limit on all 4 sides. This will be used to set mv limits for */ /* every block given its coordinate. Note thsi assumes that the min amt */ /* of padding to right of pic is equal to the blk size. If we go all the */ /* way upto 64x64, then the min padding on right size of picture should */ /* be 64, and also on bottom side of picture. */ /*************************************************************************/ SET_PIC_LIMIT( s_pic_limit_inp, ps_curr_layer->i4_pad_x_inp, ps_curr_layer->i4_pad_y_inp, ps_curr_layer->i4_wd, ps_curr_layer->i4_ht, s_search_prms_blk.i4_num_steps_post_refine); SET_PIC_LIMIT( s_pic_limit_rec, ps_curr_layer->i4_pad_x_rec, ps_curr_layer->i4_pad_y_rec, ps_curr_layer->i4_wd, ps_curr_layer->i4_ht, s_search_prms_blk.i4_num_steps_post_refine); /*************************************************************************/ /* set the MV limit per ref. pic. */ /* - P pic. : Based on the config params. */ /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ /*************************************************************************/ { WORD32 ref_ctr; /* Only for B/b pic. */ if(1 == ps_ctxt->s_frm_prms.bidir_enabled) { WORD16 i2_mv_y_per_poc, i2_max_mv_y; WORD32 cur_poc, ref_poc, abs_poc_diff; cur_poc = ps_ctxt->i4_curr_poc; /* Get abs MAX for symmetric search */ i2_mv_y_per_poc = MAX( ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id], (ABS(ps_ctxt->s_coarse_dyn_range_prms .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id]))); for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++) { ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]; abs_poc_diff = ABS((cur_poc - ref_poc)); /* Get the cur. max MV based on POC distance */ i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff; i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y); as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y; as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y; } } else { /* Set the Config. File Params for P pic. */ for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++) { as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y; as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y; } } } /* EIID: Calculate threshold based on quality preset and/or temporal layers */ if(e_me_quality_presets == ME_MEDIUM_SPEED) { i4_threshold_multiplier = 1; i4_threshold_divider = 4; } else if(e_me_quality_presets == ME_HIGH_SPEED) { i4_threshold_multiplier = 1; i4_threshold_divider = 2; } else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25)) { #if OLD_XTREME_SPEED /* Hard coding the temporal ID value to 1, if it is older xtreme speed */ i4_temporal_layer = 1; #endif if(i4_temporal_layer == 0) { i4_threshold_multiplier = 3; i4_threshold_divider = 4; } else if(i4_temporal_layer == 1) { i4_threshold_multiplier = 3; i4_threshold_divider = 4; } else if(i4_temporal_layer == 2) { i4_threshold_multiplier = 1; i4_threshold_divider = 1; } else { i4_threshold_multiplier = 5; i4_threshold_divider = 4; } } else if(e_me_quality_presets == ME_HIGH_QUALITY) { i4_threshold_multiplier = 1; i4_threshold_divider = 1; } /*************************************************************************/ /*************************************************************************/ /*************************************************************************/ /* START OF THE CORE LOOP */ /* If Encode is 0, then we just loop over each blk */ /*************************************************************************/ /*************************************************************************/ /*************************************************************************/ while(0 == end_of_frame) { job_queue_t *ps_job; ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4) WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only //+3 to get ceil values when divided by 4 WORD32 i4_num_4x4_blocks_in_ctb_at_l1 = 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now //if there is variable for ctb size use that and this variable can be derived WORD32 offset_val, check_dep_pos, set_dep_pos; void *pv_hme_dep_mngr; ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row; /* Get the current layer HME Dep Mngr */ /* Note : Use layer_id - 1 in HME layers */ pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1]; /* Get the current row from the job queue */ ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong); /* If all rows are done, set the end of process flag to 1, */ /* and the current row to -1 */ if(NULL == ps_job) { blk_y = -1; end_of_frame = 1; continue; } if(1 == ps_ctxt->s_frm_prms.is_i_pic) { /* set the output dependency of current row */ ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); continue; } blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; blk_x = 0; i4_ctb_x = 0; /* wait for Corresponding Pre intra Job to be completed */ if(1 == ps_refine_prms->i4_layer_id) { volatile UWORD32 i4_l1_done; volatile UWORD32 *pi4_l1_done; pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2]; i4_l1_done = *pi4_l1_done; while(!i4_l1_done) { i4_l1_done = *pi4_l1_done; } } /* Set Variables for Dep. Checking and Setting */ set_dep_pos = blk_y + 1; if(blk_y > 0) { offset_val = 2; check_dep_pos = blk_y - 1; } else { /* First row should run without waiting */ offset_val = -1; check_dep_pos = 0; } /* EIID: calculate ed_blk_ctxt pointer for current row */ /* valid for only layer-1. not varified and used for other layers */ i4_ctb_row_ctr = blk_y / 4; ps_ed_blk_ctxt_curr_row = ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row * i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row); /* if non-encode layer then i4_ctb_x will be same as blk_x */ /* loop over all the units is a row */ for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++) { ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr; WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4; /* Wait till top row block is processed */ /* Currently checking till top right block*/ /* Disabled since all candidates, except for */ /* L and C, are projected from the coarser layer, */ /* only in ME_HIGH_SPEED mode */ if((ME_MEDIUM_SPEED > e_me_quality_presets)) { if(i4_ctb_x < (num_sync_units_in_row - 1)) { ihevce_dmgr_chk_row_row_sync( pv_hme_dep_mngr, i4_ctb_x, offset_val, check_dep_pos, 0, /* Col Tile No. : Not supported in PreEnc*/ ps_ctxt->thrd_id); } } { /* for non encoder layer only one block is processed */ num_blks_in_this_ctb = 1; } /* EIID: derive ed_ctxt ptr for current CTB */ ps_ed_blk_ctxt_curr_ctb = ps_ed_blk_ctxt_curr_row + (i4_ctb_blk_ctr * i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr; /* loop over all the blocks in CTB will always be 1 */ for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++) { { /* non encode layer */ blk_x = i4_ctb_x; blk_id_in_full_ctb = 0; s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0; } /* get the current input blk point */ pos_x = blk_x << blk_size_shift; pos_y = blk_y << blk_size_shift; pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride); /*********************************************************************/ /* replicate the inp buffer at blk or ctb level for each ref id, */ /* Instead of searching with wk * ref(k), we search with Ik = I / wk */ /* thereby avoiding a bloat up of memory. If we did all references */ /* weighted pred, we will end up with a duplicate copy of each ref */ /* at each layer, since we need to preserve the original reference. */ /* ToDo: Need to observe performance with this mechanism and compare */ /* with case where ref is weighted. */ /*********************************************************************/ if(blk_id_in_ctb == 0) { fp_get_wt_inp( ps_curr_layer, &ps_ctxt->s_wt_pred, unit_size, pos_x, pos_y, unit_size, ps_ctxt->num_ref_future + ps_ctxt->num_ref_past, ps_ctxt->i4_wt_pred_enable_flag); } s_search_prms_blk.i4_x_off = blk_x << blk_size_shift; s_search_prms_blk.i4_y_off = blk_y << blk_size_shift; /* Select search results from a suitable search result in the context */ { ps_search_results = &ps_ctxt->s_search_results_8x8; } s_search_prms_blk.ps_search_results = ps_search_results; /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ hme_reset_search_results( ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL); /* Loop across different Ref IDx */ for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++) { S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12; S32 prev_blk_offset = 6; S32 resultid; /*********************************************************************/ /* For every blk in the picture, the search range needs to be derived*/ /* Any blk can have any mv, but practical search constraints are */ /* imposed by the picture boundary and amt of padding. */ /*********************************************************************/ /* MV limit is different based on ref. PIC */ hme_derive_search_range( &s_range_prms_inp, &s_pic_limit_inp, &as_mv_limit[i1_ref_idx], pos_x, pos_y, blk_wd, blk_ht); hme_derive_search_range( &s_range_prms_rec, &s_pic_limit_rec, &as_mv_limit[i1_ref_idx], pos_x, pos_y, blk_wd, blk_ht); s_search_prms_blk.i1_ref_idx = i1_ref_idx; ps_candt_zeromv->i1_ref_idx = i1_ref_idx; i4_num_srch_cands = 1; if(1 != ps_refine_prms->i4_layer_id) { S32 x, y; x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; if(ME_MEDIUM_SPEED > e_me_quality_presets) { hme_get_spatial_candt( ps_curr_layer, e_search_blk_size, blk_x, blk_y, i1_ref_idx, &as_top_neighbours[0], &as_left_neighbours[0], 0, ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1), 0, ps_refine_prms->i4_encode); *ps_candt_tr = as_top_neighbours[3]; *ps_candt_t = as_top_neighbours[1]; *ps_candt_tl = as_top_neighbours[0]; i4_num_srch_cands += 3; } else { layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; search_node_t *ps_search_node; S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y; hme_mv_t *ps_mv, *ps_mv_base; S08 *pi1_ref_idx, *pi1_ref_idx_base; S32 jump = 1, mvs_in_blk, mvs_in_row; S32 shift = (ps_refine_prms->i4_encode ? 2 : 0); if(i4_blk_size1 != i4_blk_size2) { blk_x_temp <<= 1; blk_y_temp <<= 1; jump = 2; if((i4_blk_size1 << 2) == i4_blk_size2) { blk_x_temp <<= 1; blk_y_temp <<= 1; jump = 4; } } mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; /* Adjust teh blk coord to point to top left locn */ blk_x_temp -= 1; blk_y_temp -= 1; /* Pick up the mvs from the location */ i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk); i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp); ps_mv = ps_layer_mvbank->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref); pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref); ps_mv_base = ps_mv; pi1_ref_idx_base = pi1_ref_idx; ps_search_node = &as_left_neighbours[0]; ps_mv = ps_mv_base + mvs_in_row; pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; COPY_MV_TO_SEARCH_NODE( ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); i4_num_srch_cands++; } } else { S32 x, y; x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; if(ME_MEDIUM_SPEED > e_me_quality_presets) { hme_get_spatial_candt_in_l1_me( ps_curr_layer, e_search_blk_size, blk_x, blk_y, i1_ref_idx, !ps_search_results->pu1_is_past[i1_ref_idx], &as_top_neighbours[0], &as_left_neighbours[0], 0, ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1), 0, ps_ctxt->s_frm_prms.u1_num_active_ref_l0, ps_ctxt->s_frm_prms.u1_num_active_ref_l1); *ps_candt_tr = as_top_neighbours[3]; *ps_candt_t = as_top_neighbours[1]; *ps_candt_tl = as_top_neighbours[0]; i4_num_srch_cands += 3; } else { layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; S32 i4_mv_pos_in_implicit_array; search_node_t *ps_search_node; S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y; hme_mv_t *ps_mv, *ps_mv_base; S08 *pi1_ref_idx, *pi1_ref_idx_base; S32 jump = 1, mvs_in_blk, mvs_in_row; S32 shift = (ps_refine_prms->i4_encode ? 2 : 0); U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx]; S32 i4_num_results_in_given_dir = ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * ps_ctxt->s_frm_prms.u1_num_active_ref_l1) : (ps_layer_mvbank->i4_num_mvs_per_ref * ps_ctxt->s_frm_prms.u1_num_active_ref_l0)); if(i4_blk_size1 != i4_blk_size2) { blk_x_temp <<= 1; blk_y_temp <<= 1; jump = 2; if((i4_blk_size1 << 2) == i4_blk_size2) { blk_x_temp <<= 1; blk_y_temp <<= 1; jump = 4; } } mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; /* Adjust teh blk coord to point to top left locn */ blk_x_temp -= 1; blk_y_temp -= 1; /* Pick up the mvs from the location */ i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk); i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp); i4_offset += ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * ps_ctxt->s_frm_prms.u1_num_active_ref_l0) : 0); ps_mv = ps_layer_mvbank->ps_mv + i4_offset; pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; ps_mv_base = ps_mv; pi1_ref_idx_base = pi1_ref_idx; { /* ps_mv and pi1_ref_idx now point to the top left locn */ ps_search_node = &as_left_neighbours[0]; ps_mv = ps_mv_base + mvs_in_row; pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir); if(-1 != i4_mv_pos_in_implicit_array) { COPY_MV_TO_SEARCH_NODE( ps_search_node, &ps_mv[i4_mv_pos_in_implicit_array], &pi1_ref_idx[i4_mv_pos_in_implicit_array], i1_ref_idx, shift); } else { ps_search_node->u1_is_avail = 0; ps_search_node->s_mv.i2_mvx = 0; ps_search_node->s_mv.i2_mvy = 0; ps_search_node->i1_ref_idx = i1_ref_idx; } i4_num_srch_cands++; } } } *ps_candt_l = as_left_neighbours[0]; /* when 16x16 is searched in an encode layer, and the prev layer */ /* stores results for 4x4 blks, we project 5 candts corresponding */ /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */ /* However in other cases, only 2,2 best and 2nd best reqd */ resultid = 0; pf_hme_project_coloc_candt( ps_candt_prj_coloc[0], ps_curr_layer, ps_coarse_layer, pos_x + 2, pos_y + 2, i1_ref_idx, resultid); i4_num_srch_cands++; resultid = 1; if(num_results_prev_layer > 1) { pf_hme_project_coloc_candt( ps_candt_prj_coloc[1], ps_curr_layer, ps_coarse_layer, pos_x + 2, pos_y + 2, i1_ref_idx, resultid); i4_num_srch_cands++; } resultid = 0; if(ME_MEDIUM_SPEED <= e_me_quality_presets) { pf_hme_project_coloc_candt( ps_candt_prj_t[0], ps_curr_layer, ps_coarse_layer, pos_x, pos_y - prev_blk_offset, i1_ref_idx, resultid); i4_num_srch_cands++; } { pf_hme_project_coloc_candt( ps_candt_prj_br[0], ps_curr_layer, ps_coarse_layer, pos_x + next_blk_offset, pos_y + next_blk_offset, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_bl[0], ps_curr_layer, ps_coarse_layer, pos_x - prev_blk_offset, pos_y + next_blk_offset, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_r[0], ps_curr_layer, ps_coarse_layer, pos_x + next_blk_offset, pos_y, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_b[0], ps_curr_layer, ps_coarse_layer, pos_x, pos_y + next_blk_offset, i1_ref_idx, resultid); i4_num_srch_cands += 4; if(ME_MEDIUM_SPEED <= e_me_quality_presets) { pf_hme_project_coloc_candt( ps_candt_prj_tr[0], ps_curr_layer, ps_coarse_layer, pos_x + next_blk_offset, pos_y - prev_blk_offset, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_tl[0], ps_curr_layer, ps_coarse_layer, pos_x - prev_blk_offset, pos_y - prev_blk_offset, i1_ref_idx, resultid); i4_num_srch_cands += 2; } } if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED)) { resultid = 1; pf_hme_project_coloc_candt( ps_candt_prj_br[1], ps_curr_layer, ps_coarse_layer, pos_x + next_blk_offset, pos_y + next_blk_offset, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_bl[1], ps_curr_layer, ps_coarse_layer, pos_x - prev_blk_offset, pos_y + next_blk_offset, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_r[1], ps_curr_layer, ps_coarse_layer, pos_x + next_blk_offset, pos_y, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_b[1], ps_curr_layer, ps_coarse_layer, pos_x, pos_y + next_blk_offset, i1_ref_idx, resultid); i4_num_srch_cands += 4; pf_hme_project_coloc_candt( ps_candt_prj_tr[1], ps_curr_layer, ps_coarse_layer, pos_x + next_blk_offset, pos_y - prev_blk_offset, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_tl[1], ps_curr_layer, ps_coarse_layer, pos_x - prev_blk_offset, pos_y - prev_blk_offset, i1_ref_idx, resultid); pf_hme_project_coloc_candt( ps_candt_prj_t[1], ps_curr_layer, ps_coarse_layer, pos_x, pos_y - prev_blk_offset, i1_ref_idx, resultid); i4_num_srch_cands += 3; } /* Note this block also clips the MV range for all candidates */ #ifdef _DEBUG { S32 candt; range_prms_t *ps_range_prms; S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past; for(candt = 0; candt < i4_num_srch_cands; candt++) { search_node_t *ps_search_node; ps_search_node = s_search_prms_blk.ps_search_candts[candt].ps_search_node; ps_range_prms = s_search_prms_blk.aps_mv_range[0]; if((ps_search_node->i1_ref_idx >= num_ref_valid) || (ps_search_node->i1_ref_idx < 0)) { ASSERT(0); } } } #endif { S32 srch_cand; S32 num_unique_nodes = 0; S32 num_nodes_searched = 0; S32 num_best_cand = 0; S08 i1_grid_enable = 0; search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2]; /* has list of valid partition to search terminated by -1 */ S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; S32 center_x; S32 center_y; /* indicates if the centre point of grid needs to be explicitly added for search */ S32 add_centre = 0; memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map)); center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx; center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy; for(srch_cand = 0; (srch_cand < i4_num_srch_cands) && (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts); srch_cand++) { search_node_t s_search_node_temp = s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0]; s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX; /* Clip the motion vectors as well here since after clipping two candidates can become same and they will be removed during deduplication */ CLIP_MV_WITHIN_RANGE( s_search_node_temp.s_mv.i2_mvx, s_search_node_temp.s_mv.i2_mvy, s_search_prms_blk.aps_mv_range[0], ps_refine_prms->i4_num_steps_fpel_refine, ps_refine_prms->i4_num_steps_hpel_refine, ps_refine_prms->i4_num_steps_qpel_refine); /* PT_C */ INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); num_nodes_searched += 1; } num_unique_nodes = MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts); /* If number of candidates projected/number of candidates to be refined are more than 2, then filter out and choose the best two here */ if(num_unique_nodes >= 2) { S32 num_results; S32 cnt; S32 *pi4_valid_part_ids; s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; pi4_valid_part_ids = &ai4_valid_part_ids[0]; /* pi4_valid_part_ids is updated inside */ hme_pred_search_no_encode( &s_search_prms_blk, ps_curr_layer, &ps_ctxt->s_wt_pred, pi4_valid_part_ids, 1, e_me_quality_presets, i1_grid_enable, (ihevce_me_optimised_function_list_t *) ps_ctxt->pv_me_optimised_function_list ); num_best_cand = 0; cnt = 0; num_results = ps_search_results->u1_num_results_per_part; while((id = pi4_valid_part_ids[cnt++]) >= 0) { num_results = MIN(ps_refine_prms->pu1_num_best_results[id], num_results); for(i = 0; i < num_results; i++) { search_node_t s_search_node_temp; s_search_node_temp = *(ps_search_results->aps_part_results[i1_ref_idx][id] + i); if(s_search_node_temp.i1_ref_idx >= 0) { INSERT_NEW_NODE_NOMAP( as_best_two_proj_node, num_best_cand, s_search_node_temp, 0); } } } } else { add_centre = 1; num_best_cand = num_unique_nodes; as_best_two_proj_node[0] = as_unique_search_nodes[0]; } num_unique_nodes = 0; num_nodes_searched = 0; if(1 == num_best_cand) { search_node_t s_search_node_temp = as_best_two_proj_node[0]; S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx; S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy; S08 i1_ref_idx = s_search_node_temp.i1_ref_idx; i1_grid_enable = 1; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; if(add_centre) { as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; } } else { /* For the candidates where refinement was required, choose the best two */ for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++) { search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand]; WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx; WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy; /* Because there may not be two best unique candidates (because of clipping), second best candidate can be uninitialized, ignore that */ if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV || s_search_node_temp.i1_ref_idx < 0) { num_nodes_searched++; continue; } /* PT_C */ /* Since the center point has already be evaluated and best results are persistent, it will not be evaluated again */ if(add_centre) /* centre point added explicitly again if search results is not updated */ { INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); } /* PT_L */ s_search_node_temp.s_mv.i2_mvx = mv_x - 1; s_search_node_temp.s_mv.i2_mvy = mv_y; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); /* PT_T */ s_search_node_temp.s_mv.i2_mvx = mv_x; s_search_node_temp.s_mv.i2_mvy = mv_y - 1; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); /* PT_R */ s_search_node_temp.s_mv.i2_mvx = mv_x + 1; s_search_node_temp.s_mv.i2_mvy = mv_y; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); /* PT_B */ s_search_node_temp.s_mv.i2_mvx = mv_x; s_search_node_temp.s_mv.i2_mvy = mv_y + 1; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); /* PT_TL */ s_search_node_temp.s_mv.i2_mvx = mv_x - 1; s_search_node_temp.s_mv.i2_mvy = mv_y - 1; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); /* PT_TR */ s_search_node_temp.s_mv.i2_mvx = mv_x + 1; s_search_node_temp.s_mv.i2_mvy = mv_y - 1; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); /* PT_BL */ s_search_node_temp.s_mv.i2_mvx = mv_x - 1; s_search_node_temp.s_mv.i2_mvy = mv_y + 1; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); /* PT_BR */ s_search_node_temp.s_mv.i2_mvx = mv_x + 1; s_search_node_temp.s_mv.i2_mvy = mv_y + 1; INSERT_NEW_NODE( as_unique_search_nodes, num_unique_nodes, s_search_node_temp, 0, au4_unique_node_map, center_x, center_y, 1); } } s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; /*****************************************************************/ /* Call the search algorithm, this includes: */ /* Pre-Search-Refinement (for coarse candts) */ /* Search on each candidate */ /* Post Search Refinement on winners/other new candidates */ /*****************************************************************/ hme_pred_search_no_encode( &s_search_prms_blk, ps_curr_layer, &ps_ctxt->s_wt_pred, ai4_valid_part_ids, 0, e_me_quality_presets, i1_grid_enable, (ihevce_me_optimised_function_list_t *) ps_ctxt->pv_me_optimised_function_list); i1_grid_enable = 0; } } /* for non encode layer update MV and end processing for block */ { WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0; search_node_t *ps_search_node; /* now update the reqd results back to the layer mv bank. */ if(1 == ps_refine_prms->i4_layer_id) { hme_update_mv_bank_in_l1_me( ps_search_results, ps_curr_layer->ps_layer_mvbank, blk_x, blk_y, &s_mv_update_prms); } else { hme_update_mv_bank_noencode( ps_search_results, ps_curr_layer->ps_layer_mvbank, blk_x, blk_y, &s_mv_update_prms); } /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) { WORD32 i4_j; layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank; //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size) /* Not considering this for Dyn. Search Update */ { for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref; i4_ref_id++) { ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) { hme_update_dynamic_search_params( &ps_ctxt->s_coarse_dyn_range_prms .as_dyn_range_prms[ps_refine_prms->i4_layer_id] [i4_ref_id], ps_search_node->s_mv.i2_mvy); ps_search_node++; } } } } if(1 == ps_refine_prms->i4_layer_id) { WORD32 wt_pred_val, log_wt_pred_val; WORD32 ref_id_of_nearest_poc = 0; WORD32 max_val = 0x7fffffff; WORD32 max_l0_val = 0x7fffffff; WORD32 max_l1_val = 0x7fffffff; WORD32 cur_val; WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred; WORD32 bestl0_sad = 0x7fffffff; WORD32 bestl1_sad = 0x7fffffff; search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL; for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref; i4_ref_id++) { wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id]; log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc; ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) + ((1 << log_wt_pred_val) >> 1)) >> log_wt_pred_val; i4_local_cost_weighted_pred = i4_local_weighted_sad + (ps_search_node->i4_tot_cost - ps_search_node->i4_sad); //the loop is redundant as the results are already sorted based on total cost //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++) { if(i4_local_cost_weighted_pred < min_cost) { min_cost = i4_local_cost_weighted_pred; min_sad = i4_local_weighted_sad; } } /* For P frame, calculate the nearest poc which is either P or I frame*/ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) { if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]) { cur_val = ABS(ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]); if(cur_val < max_val) { max_val = cur_val; ref_id_of_nearest_poc = i4_ref_id; } } } } /*Store me cost wrt. to past frame only for P frame */ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) { if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc]) { WORD16 i2_mvx, i2_mvy; WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); WORD32 z_scan_idx = gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; WORD32 wt, log_wt; /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc]) <= (1 + ps_ctxt->num_b_frms));*/ /*obtain mvx and mvy */ i2_mvx = ps_search_results ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] ->s_mv.i2_mvx; i2_mvy = ps_search_results ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] ->s_mv.i2_mvy; /*register the min cost for l1 me in blk context */ wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc]; log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc; /*register the min cost for l1 me in blk context */ ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] = ((ps_search_results ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] ->i4_sad * wt) + ((1 << log_wt) >> 1)) >> log_wt; ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] = ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] + (ps_search_results ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] ->i4_tot_cost - ps_search_results ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] ->i4_sad); /*for complexity change detection*/ ps_ctxt->i4_num_blks++; if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] > (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms))) { ps_ctxt->i4_num_blks_high_sad++; } } } } /* EIID: Early inter intra decisions */ /* tap L1 level SAD for inter intra decisions */ if((e_me_quality_presets >= ME_MEDIUM_SPEED) && (!ps_ctxt->s_frm_prms .is_i_pic)) //for high-quality preset->disable early decisions { if(1 == ps_refine_prms->i4_layer_id) { WORD32 i4_min_sad_cost_8x8_block = min_cost; ihevce_ed_blk_t *ps_curr_ed_blk_ctxt; WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); WORD32 z_scan_idx = gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx; /*register the min cost for l1 me in blk context */ ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] = i4_min_sad_cost_8x8_block; i4_num_comparisions++; /* take early inter-intra decision here */ ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */ #if DISABLE_INTRA_IN_BPICS if((e_me_quality_presets == ME_XTREME_SPEED_25) && (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)) { ps_curr_ed_blk_ctxt->intra_or_inter = 2; /*eval only inter if inter cost is less */ i4_num_inter_wins++; } else #endif { if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] < ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] * i4_threshold_multiplier) / i4_threshold_divider)) { ps_curr_ed_blk_ctxt->intra_or_inter = 2; /*eval only inter if inter cost is less */ i4_num_inter_wins++; } } //{ // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n", // blk_x,blk_y, // i4_ctb_blk_ctr, i4_ctb_row_ctr, // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe, // i4_min_sad_cost_8x8_block // ); //} } //end of layer-1 } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED) else { if(1 == ps_refine_prms->i4_layer_id) { WORD32 i4_min_sad_cost_8x8_block = min_cost; WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); WORD32 z_scan_idx = gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; /*register the min cost for l1 me in blk context */ ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] = i4_min_sad_cost_8x8_block; } } if(1 == ps_refine_prms->i4_layer_id) { WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); WORD32 z_scan_idx = gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] = min_sad; if(min_cost < ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2]) { ps_ctxt->i4_L1_hme_best_cost += min_cost; ps_ctxt->i4_L1_hme_sad += min_sad; ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad; } else { ps_ctxt->i4_L1_hme_best_cost += ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2]; ps_ctxt->i4_L1_hme_sad += ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2]; ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2]; } } } } /* Update the number of blocks processed in the current row */ if((ME_MEDIUM_SPEED > e_me_quality_presets)) { ihevce_dmgr_set_row_row_sync( pv_hme_dep_mngr, (i4_ctb_x + 1), blk_y, 0 /* Col Tile No. : Not supported in PreEnc*/); } } /* set the output dependency after completion of row */ ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); } }