1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_inter_mode_sifter.c
24 *
25 * \brief
26 *    This file contains functions for selecting best inter candidates for RDOPT evaluation
27 *
28 * \date
29 *    10/09/2014
30 *
31 ******************************************************************************
32 */
33 
34 /*****************************************************************************/
35 /* File Includes                                                             */
36 /*****************************************************************************/
37 /* System include files */
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 #include <stdarg.h>
43 #include <math.h>
44 #include <limits.h>
45 
46 /* User include files */
47 #include "ihevc_typedefs.h"
48 #include "itt_video_api.h"
49 #include "ihevce_api.h"
50 
51 #include "rc_cntrl_param.h"
52 #include "rc_frame_info_collector.h"
53 #include "rc_look_ahead_params.h"
54 
55 #include "ihevc_defs.h"
56 #include "ihevc_macros.h"
57 #include "ihevc_debug.h"
58 #include "ihevc_structs.h"
59 #include "ihevc_platform_macros.h"
60 #include "ihevc_deblk.h"
61 #include "ihevc_itrans_recon.h"
62 #include "ihevc_chroma_itrans_recon.h"
63 #include "ihevc_chroma_intra_pred.h"
64 #include "ihevc_intra_pred.h"
65 #include "ihevc_inter_pred.h"
66 #include "ihevc_mem_fns.h"
67 #include "ihevc_padding.h"
68 #include "ihevc_weighted_pred.h"
69 #include "ihevc_sao.h"
70 #include "ihevc_resi_trans.h"
71 #include "ihevc_quant_iquant_ssd.h"
72 #include "ihevc_cabac_tables.h"
73 
74 #include "ihevce_defs.h"
75 #include "ihevce_hle_interface.h"
76 #include "ihevce_lap_enc_structs.h"
77 #include "ihevce_multi_thrd_structs.h"
78 #include "ihevce_multi_thrd_funcs.h"
79 #include "ihevce_me_common_defs.h"
80 #include "ihevce_had_satd.h"
81 #include "ihevce_error_codes.h"
82 #include "ihevce_bitstream.h"
83 #include "ihevce_cabac.h"
84 #include "ihevce_rdoq_macros.h"
85 #include "ihevce_function_selector.h"
86 #include "ihevce_enc_structs.h"
87 #include "ihevce_entropy_structs.h"
88 #include "ihevce_cmn_utils_instr_set_router.h"
89 #include "ihevce_ipe_instr_set_router.h"
90 #include "ihevce_decomp_pre_intra_structs.h"
91 #include "ihevce_decomp_pre_intra_pass.h"
92 #include "ihevce_enc_loop_structs.h"
93 #include "ihevce_global_tables.h"
94 #include "ihevce_nbr_avail.h"
95 #include "ihevce_enc_loop_utils.h"
96 #include "ihevce_bs_compute_ctb.h"
97 #include "ihevce_cabac_rdo.h"
98 #include "ihevce_dep_mngr_interface.h"
99 #include "ihevce_enc_loop_pass.h"
100 #include "ihevce_rc_enc_structs.h"
101 #include "ihevce_common_utils.h"
102 #include "ihevce_stasino_helpers.h"
103 
104 #include "hme_datatype.h"
105 #include "hme_common_defs.h"
106 #include "hme_common_utils.h"
107 #include "hme_interface.h"
108 #include "hme_defs.h"
109 #include "ihevce_me_instr_set_router.h"
110 #include "hme_err_compute.h"
111 #include "hme_globals.h"
112 #include "ihevce_mv_pred.h"
113 #include "ihevce_mv_pred_merge.h"
114 #include "ihevce_inter_pred.h"
115 #include "ihevce_enc_loop_inter_mode_sifter.h"
116 
117 /*****************************************************************************/
118 /* Function Definitions                                                      */
119 /*****************************************************************************/
ihevce_get_num_part_types_in_me_cand_list(cu_inter_cand_t * ps_me_cand_list,UWORD8 * pu1_part_type_ref_cand,UWORD8 * pu1_idx_ref_cand,UWORD8 * pu1_diff_skip_cand_flag,WORD8 * pi1_skip_cand_from_merge_idx,WORD8 * pi1_final_skip_cand_merge_idx,UWORD8 u1_max_num_part_types_to_select,UWORD8 u1_num_me_cands)120 static WORD32 ihevce_get_num_part_types_in_me_cand_list(
121     cu_inter_cand_t *ps_me_cand_list,
122     UWORD8 *pu1_part_type_ref_cand,
123     UWORD8 *pu1_idx_ref_cand,
124     UWORD8 *pu1_diff_skip_cand_flag,
125     WORD8 *pi1_skip_cand_from_merge_idx,
126     WORD8 *pi1_final_skip_cand_merge_idx,
127     UWORD8 u1_max_num_part_types_to_select,
128     UWORD8 u1_num_me_cands)
129 {
130     UWORD8 i, j;
131     UWORD8 u1_num_unique_parts = 0;
132 
133     for(i = 0; i < u1_num_me_cands; i++)
134     {
135         UWORD8 u1_cur_part_type = ps_me_cand_list[i].b3_part_size;
136         UWORD8 u1_is_unique = 1;
137 
138         if(u1_num_unique_parts >= u1_max_num_part_types_to_select)
139         {
140             return u1_num_unique_parts;
141         }
142 
143         /* loop to check if the current cand is already present in the list */
144         for(j = 0; j < u1_num_unique_parts; j++)
145         {
146             if(u1_cur_part_type == pu1_part_type_ref_cand[j])
147             {
148                 u1_is_unique = 0;
149                 break;
150             }
151         }
152 
153         if(u1_is_unique)
154         {
155             if(SIZE_2Nx2N == u1_cur_part_type)
156             {
157                 *pu1_diff_skip_cand_flag = 0;
158                 *pi1_skip_cand_from_merge_idx = u1_num_unique_parts;
159                 *pi1_final_skip_cand_merge_idx = u1_num_unique_parts;
160             }
161 
162             pu1_part_type_ref_cand[u1_num_unique_parts] = u1_cur_part_type;
163             pu1_idx_ref_cand[u1_num_unique_parts] = i;
164             u1_num_unique_parts++;
165         }
166     }
167 
168     return u1_num_unique_parts;
169 }
170 
ihevce_compute_inter_pred_and_cost(inter_pred_ctxt_t * ps_mc_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_func,pu_t * ps_pu,void * pv_src,void * pv_pred,WORD32 i4_src_stride,WORD32 i4_pred_stride,UWORD8 u1_compute_error,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)171 static WORD32 ihevce_compute_inter_pred_and_cost(
172     inter_pred_ctxt_t *ps_mc_ctxt,
173     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
174     PF_SAD_FXN_T pf_sad_func,
175     pu_t *ps_pu,
176     void *pv_src,
177     void *pv_pred,
178     WORD32 i4_src_stride,
179     WORD32 i4_pred_stride,
180     UWORD8 u1_compute_error,
181     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
182 {
183     IV_API_CALL_STATUS_T u1_is_valid_mv;
184     WORD32 i4_error;
185 
186     u1_is_valid_mv = pf_luma_inter_pred_pu(ps_mc_ctxt, ps_pu, pv_pred, i4_pred_stride, 0);
187 
188     if(u1_compute_error)
189     {
190         if(IV_SUCCESS == u1_is_valid_mv)
191         {
192             err_prms_t s_err_prms;
193 
194             s_err_prms.i4_blk_ht = (ps_pu->b4_ht + 1) << 2;
195             s_err_prms.i4_blk_wd = (ps_pu->b4_wd + 1) << 2;
196             s_err_prms.pu1_inp = (UWORD8 *)pv_src;
197             s_err_prms.pu2_inp = (UWORD16 *)pv_src;
198             s_err_prms.pu1_ref = (UWORD8 *)pv_pred;
199             s_err_prms.pu2_ref = (UWORD16 *)pv_pred;
200             s_err_prms.i4_inp_stride = i4_src_stride;
201             s_err_prms.i4_ref_stride = i4_pred_stride;
202             s_err_prms.pi4_sad_grid = &i4_error;
203 
204             s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
205 
206             pf_sad_func(&s_err_prms);
207         }
208         else
209         {
210             /* max 32 bit satd */
211             i4_error = INT_MAX;
212         }
213 
214         return i4_error;
215     }
216 
217     return INT_MAX;
218 }
219 
ihevce_determine_best_merge_pu(merge_prms_t * ps_prms,pu_t * ps_pu_merge,pu_t * ps_pu_me,void * pv_src,WORD32 i4_me_cand_cost,WORD32 i4_pred_buf_offset,UWORD8 u1_num_cands,UWORD8 u1_part_id,UWORD8 u1_force_pred_evaluation)220 static WORD32 ihevce_determine_best_merge_pu(
221     merge_prms_t *ps_prms,
222     pu_t *ps_pu_merge,
223     pu_t *ps_pu_me,
224     void *pv_src,
225     WORD32 i4_me_cand_cost,
226     WORD32 i4_pred_buf_offset,
227     UWORD8 u1_num_cands,
228     UWORD8 u1_part_id,
229     UWORD8 u1_force_pred_evaluation)
230 {
231     pu_t *ps_pu;
232 
233     INTER_CANDIDATE_ID_T e_cand_id;
234 
235     UWORD8 i;
236     UWORD8 u1_best_pred_mode;
237     WORD32 i4_mean;
238     UWORD32 u4_cur_variance, u4_best_variance;
239 
240     merge_cand_list_t *ps_list = ps_prms->ps_list;
241     inter_pred_ctxt_t *ps_mc_ctxt = ps_prms->ps_mc_ctxt;
242     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_prms->pf_luma_inter_pred_pu;
243     PF_SAD_FXN_T pf_sad_fxn = ps_prms->pf_sad_fxn;
244 
245     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
246         ps_prms->ps_cmn_utils_optimised_function_list;
247 
248     WORD32(*pai4_noise_term)[MAX_NUM_INTER_PARTS] = ps_prms->pai4_noise_term;
249     UWORD32(*pau4_pred_variance)[MAX_NUM_INTER_PARTS] = ps_prms->pau4_pred_variance;
250     WORD32 i4_alpha_stim_multiplier = ps_prms->i4_alpha_stim_multiplier;
251     UWORD32 *pu4_src_variance = ps_prms->pu4_src_variance;
252     UWORD8 u1_is_cu_noisy = ps_prms->u1_is_cu_noisy;
253     UWORD8 u1_is_hbd = ps_prms->u1_is_hbd;
254     UWORD8 *pu1_valid_merge_indices = ps_prms->au1_valid_merge_indices;
255     void **ppv_pred_buf_list = ps_prms->ppv_pred_buf_list;
256     UWORD8 *pu1_merge_pred_buf_array = ps_prms->pu1_merge_pred_buf_array;
257     UWORD8(*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS] = ps_prms->pau1_best_pred_buf_id;
258     UWORD8 u1_merge_idx_cabac_model = ps_prms->u1_merge_idx_cabac_model;
259     WORD32 i4_lambda = ps_prms->i4_lambda;
260     WORD32 i4_src_stride = ps_prms->i4_src_stride;
261     WORD32 i4_pred_stride = ps_prms->i4_pred_stride;
262     UWORD8 u1_max_cands = ps_prms->u1_max_cands;
263     UWORD8 u1_best_buf_id = pu1_merge_pred_buf_array[0];
264     UWORD8 u1_cur_buf_id = pu1_merge_pred_buf_array[1];
265     UWORD8 u1_best_cand_id = UCHAR_MAX;
266     WORD32 i4_best_cost = INT_MAX;
267     WORD32 i4_cur_noise_term = 0;
268     WORD32 i4_best_noise_term = 0;
269 
270     ps_pu = ps_pu_merge;
271     e_cand_id = MERGE_DERIVED;
272 
273     ASSERT(ps_pu->b1_merge_flag);
274 
275     for(i = 0; i < u1_num_cands; i++)
276     {
277         WORD32 i4_cur_cost;
278 
279         void *pv_pred = (UWORD8 *)ppv_pred_buf_list[u1_cur_buf_id] + i4_pred_buf_offset;
280         UWORD8 u1_is_pred_available = 0;
281 
282         if(!ps_prms->u1_use_merge_cand_from_top_row && ps_prms->pu1_is_top_used[i])
283         {
284             continue;
285         }
286 
287         ps_pu->mv = ps_list[i].mv;
288         ps_pu->b3_merge_idx = pu1_valid_merge_indices[i];
289 
290         /* set the prediction mode */
291         if(ps_list[i].u1_pred_flag_l0 && ps_list[i].u1_pred_flag_l1)
292         {
293             ps_pu->b2_pred_mode = PRED_BI;
294         }
295         else if(ps_list[i].u1_pred_flag_l0)
296         {
297             ps_pu->b2_pred_mode = PRED_L0;
298         }
299         else
300         {
301             ps_pu->b2_pred_mode = PRED_L1;
302         }
303 
304         /* 8x8 SMPs should not have bipred mode as per std */
305         {
306             WORD32 i4_part_wd, i4_part_ht;
307 
308             i4_part_wd = (ps_pu->b4_wd + 1) << 2;
309             i4_part_ht = (ps_pu->b4_ht + 1) << 2;
310 
311             if((PRED_BI == ps_pu->b2_pred_mode) && ((i4_part_wd + i4_part_ht) < 16))
312             {
313                 continue;
314             }
315         }
316 
317         if((!u1_force_pred_evaluation) &&
318            (ihevce_compare_pu_mv_t(
319                &ps_pu->mv, &ps_pu_me->mv, ps_pu->b2_pred_mode, ps_pu_me->b2_pred_mode)))
320         {
321             i4_cur_cost = i4_me_cand_cost;
322             u1_is_pred_available = 1;
323 
324             if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
325             {
326                 i4_cur_noise_term = pai4_noise_term[ME_OR_SKIP_DERIVED][u1_part_id];
327                 u4_cur_variance = pau4_pred_variance[ME_OR_SKIP_DERIVED][u1_part_id];
328             }
329         }
330         else
331         {
332             i4_cur_cost = ihevce_compute_inter_pred_and_cost(
333                 ps_mc_ctxt,
334                 pf_luma_inter_pred_pu,
335                 pf_sad_fxn,
336                 ps_pu,
337                 pv_src,
338                 pv_pred,
339                 i4_src_stride,
340                 i4_pred_stride,
341                 1,
342                 ps_cmn_utils_optimised_function_list);
343 
344             if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
345             {
346                 ihevce_calc_variance(
347                     pv_pred,
348                     i4_pred_stride,
349                     &i4_mean,
350                     &u4_cur_variance,
351                     (ps_pu->b4_ht + 1) << 2,
352                     (ps_pu->b4_wd + 1) << 2,
353                     u1_is_hbd,
354                     0);
355 
356                 i4_cur_noise_term = ihevce_compute_noise_term(
357                     i4_alpha_stim_multiplier, pu4_src_variance[u1_part_id], u4_cur_variance);
358 
359                 MULTIPLY_STIM_WITH_DISTORTION(
360                     i4_cur_cost, i4_cur_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
361             }
362         }
363 
364         if(i4_cur_cost < INT_MAX)
365         {
366             WORD32 i4_merge_idx_cost = 0;
367             COMPUTE_MERGE_IDX_COST(
368                 u1_merge_idx_cabac_model, i, u1_max_cands, i4_lambda, i4_merge_idx_cost);
369             i4_cur_cost += i4_merge_idx_cost;
370         }
371 
372         if(i4_cur_cost < i4_best_cost)
373         {
374             i4_best_cost = i4_cur_cost;
375 
376             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
377             {
378                 i4_best_noise_term = i4_cur_noise_term;
379                 u4_best_variance = u4_cur_variance;
380             }
381 
382             u1_best_cand_id = i;
383             u1_best_pred_mode = ps_pu->b2_pred_mode;
384 
385             if(u1_is_pred_available)
386             {
387                 pau1_best_pred_buf_id[e_cand_id][u1_part_id] =
388                     pau1_best_pred_buf_id[ME_OR_SKIP_DERIVED][u1_part_id];
389             }
390             else
391             {
392                 SWAP(u1_best_buf_id, u1_cur_buf_id);
393                 pau1_best_pred_buf_id[e_cand_id][u1_part_id] = u1_best_buf_id;
394             }
395         }
396     }
397 
398     if(u1_best_cand_id != UCHAR_MAX)
399     {
400         ps_pu->mv = ps_list[u1_best_cand_id].mv;
401         ps_pu->b2_pred_mode = u1_best_pred_mode;
402         ps_pu->b3_merge_idx = pu1_valid_merge_indices[u1_best_cand_id];
403 
404         if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
405         {
406             pai4_noise_term[MERGE_DERIVED][u1_part_id] = i4_best_noise_term;
407             pau4_pred_variance[MERGE_DERIVED][u1_part_id] = u4_best_variance;
408         }
409     }
410 
411     return i4_best_cost;
412 }
413 
ihevce_merge_cand_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)414 static WORD8 ihevce_merge_cand_pred_buffer_preparation(
415     void **ppv_pred_buf_list,
416     cu_inter_cand_t *ps_cand,
417     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
418     WORD32 i4_pred_stride,
419     UWORD8 u1_cu_size,
420     UWORD8 u1_part_type,
421     UWORD8 u1_num_bytes_per_pel,
422     FT_COPY_2D *pf_copy_2d)
423 {
424     WORD32 i4_part_wd;
425     WORD32 i4_part_ht;
426     WORD32 i4_part_wd_pu2;
427     WORD32 i4_part_ht_pu2;
428     WORD32 i4_buf_offset;
429     UWORD8 *pu1_pred_src;
430     UWORD8 *pu1_pred_dst;
431     WORD8 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
432 
433     WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
434 
435     if((0 == u1_part_type) ||
436        (pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[MERGE_DERIVED][1]))
437     {
438         ps_cand->pu1_pred_data =
439             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
440         ps_cand->pu2_pred_data =
441             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
442         ps_cand->i4_pred_data_stride = i4_pred_stride;
443 
444         i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
445     }
446     else if(pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
447     {
448         i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
449         i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
450 
451         i4_buf_offset = 0;
452 
453         pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
454                        i4_buf_offset;
455         pu1_pred_dst =
456             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] + i4_buf_offset;
457 
458         pf_copy_2d(
459             pu1_pred_dst,
460             i4_stride,
461             pu1_pred_src,
462             i4_stride,
463             i4_part_wd * u1_num_bytes_per_pel,
464             i4_part_ht);
465 
466         ps_cand->pu1_pred_data =
467             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
468         ps_cand->pu2_pred_data =
469             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
470         ps_cand->i4_pred_data_stride = i4_pred_stride;
471 
472         i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
473     }
474     else if(pau1_final_pred_buf_id[MERGE_DERIVED][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
475     {
476         i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
477         i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
478 
479         i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
480                         (i4_part_wd < u1_cu_size) * i4_part_wd;
481 
482         i4_buf_offset *= u1_num_bytes_per_pel;
483 
484         i4_part_wd = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
485         i4_part_ht = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
486 
487         pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
488                        i4_buf_offset;
489         pu1_pred_dst =
490             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] + i4_buf_offset;
491 
492         pf_copy_2d(
493             pu1_pred_dst,
494             i4_stride,
495             pu1_pred_src,
496             i4_stride,
497             i4_part_wd * u1_num_bytes_per_pel,
498             i4_part_ht);
499 
500         ps_cand->pu1_pred_data =
501             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
502         ps_cand->pu2_pred_data =
503             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
504         ps_cand->i4_pred_data_stride = i4_pred_stride;
505 
506         i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
507     }
508     else
509     {
510         i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
511         i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
512 
513         i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
514         i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
515 
516         switch((PART_TYPE_T)u1_part_type)
517         {
518         case PRT_2NxN:
519         case PRT_Nx2N:
520         case PRT_2NxnU:
521         case PRT_nLx2N:
522         {
523             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
524             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
525 
526             ps_cand->pu1_pred_data =
527                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
528             ps_cand->pu2_pred_data =
529                 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
530 
531             i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
532 
533             break;
534         }
535         case PRT_nRx2N:
536         case PRT_2NxnD:
537         {
538             i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
539                             (i4_part_wd < u1_cu_size) * i4_part_wd;
540 
541             i4_buf_offset *= u1_num_bytes_per_pel;
542 
543             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
544                            i4_buf_offset;
545             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
546                            i4_buf_offset;
547 
548             i4_part_wd = i4_part_wd_pu2;
549             i4_part_ht = i4_part_ht_pu2;
550 
551             ps_cand->pu1_pred_data =
552                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
553             ps_cand->pu2_pred_data =
554                 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
555 
556             i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
557 
558             break;
559         }
560         }
561 
562         pf_copy_2d(
563             pu1_pred_dst,
564             i4_stride,
565             pu1_pred_src,
566             i4_stride,
567             i4_part_wd * u1_num_bytes_per_pel,
568             i4_part_ht);
569 
570         ps_cand->i4_pred_data_stride = i4_pred_stride;
571     }
572 
573     return i1_retval;
574 }
575 
ihevce_mixed_mode_cand_type1_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,WORD32 i4_pred_stride,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_type0_cand_is_valid,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)576 static WORD8 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
577     void **ppv_pred_buf_list,
578     cu_inter_cand_t *ps_cand,
579     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
580     UWORD8 *pu1_merge_pred_buf_idx_array,
581     WORD32 i4_pred_stride,
582     UWORD8 u1_me_pred_buf_id,
583     UWORD8 u1_merge_pred_buf_id,
584     UWORD8 u1_type0_cand_is_valid,
585     UWORD8 u1_cu_size,
586     UWORD8 u1_part_type,
587     UWORD8 u1_num_bytes_per_pel,
588     FT_COPY_2D *pf_copy_2d)
589 {
590     WORD32 i4_part_wd;
591     WORD32 i4_part_ht;
592     WORD32 i4_part_wd_pu2;
593     WORD32 i4_part_ht_pu2;
594     UWORD8 *pu1_pred_src;
595     UWORD8 *pu1_pred_dst = NULL;
596     WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
597 
598     WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
599 
600     ASSERT(0 != u1_part_type);
601 
602     i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
603     i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
604 
605     i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
606     i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
607 
608     if(pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
609     {
610         ps_cand->pu1_pred_data =
611             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
612         ps_cand->pu2_pred_data =
613             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
614         ps_cand->i4_pred_data_stride = i4_pred_stride;
615 
616         i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
617 
618         return i1_retval;
619     }
620     else
621     {
622         UWORD8 u1_bitfield = ((u1_merge_pred_buf_id == UCHAR_MAX) << 3) |
623                              ((u1_me_pred_buf_id == UCHAR_MAX) << 2) |
624                              ((!u1_type0_cand_is_valid) << 1) |
625                              (pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] ==
626                               pau1_final_pred_buf_id[MERGE_DERIVED][1]);
627 
628         WORD32 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
629                                (i4_part_wd < u1_cu_size) * i4_part_wd;
630 
631         i4_buf_offset *= u1_num_bytes_per_pel;
632 
633         switch(u1_bitfield)
634         {
635         case 15:
636         case 14:
637         case 6:
638         {
639             switch((PART_TYPE_T)u1_part_type)
640             {
641             case PRT_2NxN:
642             case PRT_Nx2N:
643             case PRT_2NxnU:
644             case PRT_nLx2N:
645             {
646                 pu1_pred_src =
647                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
648                 pu1_pred_dst =
649                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
650 
651                 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
652 
653                 break;
654             }
655             case PRT_nRx2N:
656             case PRT_2NxnD:
657             {
658                 pu1_pred_src =
659                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]] +
660                     i4_buf_offset;
661                 pu1_pred_dst =
662                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
663                     i4_buf_offset;
664 
665                 i4_part_wd = i4_part_wd_pu2;
666                 i4_part_ht = i4_part_ht_pu2;
667 
668                 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
669 
670                 break;
671             }
672             }
673 
674             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
675             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
676             ps_cand->i4_pred_data_stride = i4_pred_stride;
677 
678             pf_copy_2d(
679                 pu1_pred_dst,
680                 i4_stride,
681                 pu1_pred_src,
682                 i4_stride,
683                 i4_part_wd * u1_num_bytes_per_pel,
684                 i4_part_ht);
685 
686             break;
687         }
688         case 13:
689         case 9:
690         case 5:
691         {
692             UWORD8 i;
693 
694             for(i = 0; i < 3; i++)
695             {
696                 if((pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
697                    (pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
698                 {
699                     pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
700                                    i4_buf_offset;
701 
702                     i1_retval = pu1_merge_pred_buf_idx_array[i];
703 
704                     break;
705                 }
706             }
707 
708             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
709                            i4_buf_offset;
710 
711             pf_copy_2d(
712                 pu1_pred_dst,
713                 i4_stride,
714                 pu1_pred_src,
715                 i4_stride,
716                 i4_part_wd_pu2 * u1_num_bytes_per_pel,
717                 i4_part_ht_pu2);
718             /* Copy PU1 */
719             pu1_pred_src =
720                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
721             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
722 
723             pf_copy_2d(
724                 pu1_pred_dst,
725                 i4_stride,
726                 pu1_pred_src,
727                 i4_stride,
728                 i4_part_wd * u1_num_bytes_per_pel,
729                 i4_part_ht);
730 
731             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
732             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
733             ps_cand->i4_pred_data_stride = i4_pred_stride;
734 
735             break;
736         }
737         case 12:
738         case 10:
739         case 8:
740         case 4:
741         case 2:
742         case 0:
743         {
744             pu1_pred_src =
745                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
746             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
747 
748             i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
749 
750             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
751             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
752             ps_cand->i4_pred_data_stride = i4_pred_stride;
753 
754             pf_copy_2d(
755                 pu1_pred_dst,
756                 i4_stride,
757                 pu1_pred_src,
758                 i4_stride,
759                 i4_part_wd * u1_num_bytes_per_pel,
760                 i4_part_ht);
761 
762             break;
763         }
764         case 11:
765         {
766             pu1_pred_src =
767                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
768             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
769 
770             i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
771 
772             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
773             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
774             ps_cand->i4_pred_data_stride = i4_pred_stride;
775 
776             pf_copy_2d(
777                 pu1_pred_dst,
778                 i4_stride,
779                 pu1_pred_src,
780                 i4_stride,
781                 i4_part_wd * u1_num_bytes_per_pel,
782                 i4_part_ht);
783 
784             break;
785         }
786         case 7:
787         {
788             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
789                            i4_buf_offset;
790             pu1_pred_dst =
791                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
792                 i4_buf_offset;
793 
794             i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
795 
796             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
797             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
798             ps_cand->i4_pred_data_stride = i4_pred_stride;
799 
800             pf_copy_2d(
801                 pu1_pred_dst,
802                 i4_stride,
803                 pu1_pred_src,
804                 i4_stride,
805                 i4_part_wd_pu2 * u1_num_bytes_per_pel,
806                 i4_part_ht_pu2);
807 
808             break;
809         }
810         case 3:
811         case 1:
812         {
813             if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][0]) &&
814                (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][1]))
815             {
816                 pu1_pred_src =
817                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
818                 pu1_pred_dst =
819                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
820 
821                 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
822 
823                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
824                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
825                 ps_cand->i4_pred_data_stride = i4_pred_stride;
826 
827                 pf_copy_2d(
828                     pu1_pred_dst,
829                     i4_stride,
830                     pu1_pred_src,
831                     i4_stride,
832                     i4_part_wd * u1_num_bytes_per_pel,
833                     i4_part_ht);
834             }
835             else
836             {
837                 UWORD8 i;
838 
839                 for(i = 0; i < 3; i++)
840                 {
841                     if((pu1_merge_pred_buf_idx_array[i] !=
842                         pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
843                        (pu1_merge_pred_buf_idx_array[i] !=
844                         pau1_final_pred_buf_id[MERGE_DERIVED][0]))
845                     {
846                         pu1_pred_dst =
847                             (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
848                             i4_buf_offset;
849 
850                         i1_retval = pu1_merge_pred_buf_idx_array[i];
851 
852                         break;
853                     }
854                 }
855 
856                 pu1_pred_src =
857                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
858                     i4_buf_offset;
859 
860                 pf_copy_2d(
861                     pu1_pred_dst,
862                     i4_stride,
863                     pu1_pred_src,
864                     i4_stride,
865                     i4_part_wd_pu2 * u1_num_bytes_per_pel,
866                     i4_part_ht_pu2);
867 
868                 /* Copy PU1 */
869                 pu1_pred_src =
870                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
871                 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
872 
873                 pf_copy_2d(
874                     pu1_pred_dst,
875                     i4_stride,
876                     pu1_pred_src,
877                     i4_stride,
878                     i4_part_wd * u1_num_bytes_per_pel,
879                     i4_part_ht);
880 
881                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
882                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
883                 ps_cand->i4_pred_data_stride = i4_pred_stride;
884 
885                 break;
886             }
887         }
888         }
889     }
890 
891     return i1_retval;
892 }
893 
ihevce_mixed_mode_cand_type0_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_mixed_tyep1_pred_buf_id,WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)894 static WORD8 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
895     void **ppv_pred_buf_list,
896     cu_inter_cand_t *ps_cand,
897     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
898     UWORD8 *pu1_merge_pred_buf_idx_array,
899     UWORD8 u1_me_pred_buf_id,
900     UWORD8 u1_merge_pred_buf_id,
901     UWORD8 u1_mixed_tyep1_pred_buf_id,
902     WORD32 i4_pred_stride,
903     UWORD8 u1_cu_size,
904     UWORD8 u1_part_type,
905     UWORD8 u1_num_bytes_per_pel,
906     FT_COPY_2D *pf_copy_2d)
907 {
908     WORD32 i4_part_wd;
909     WORD32 i4_part_ht;
910     WORD32 i4_part_wd_pu2;
911     WORD32 i4_part_ht_pu2;
912     WORD32 i4_buf_offset;
913     UWORD8 *pu1_pred_src;
914     UWORD8 *pu1_pred_dst = NULL;
915     WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
916 
917     WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
918 
919     ASSERT(0 != u1_part_type);
920 
921     i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
922     i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
923     i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
924     i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
925 
926     i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
927                     (i4_part_wd < u1_cu_size) * i4_part_wd;
928 
929     i4_buf_offset *= u1_num_bytes_per_pel;
930 
931     if(pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
932     {
933         ps_cand->pu1_pred_data =
934             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
935         ps_cand->pu2_pred_data =
936             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
937         ps_cand->i4_pred_data_stride = i4_pred_stride;
938 
939         i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
940     }
941     else
942     {
943         UWORD8 u1_bitfield =
944             ((u1_merge_pred_buf_id == UCHAR_MAX) << 2) | ((u1_me_pred_buf_id == UCHAR_MAX) << 1) |
945             (u1_mixed_tyep1_pred_buf_id != pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]);
946 
947         switch(u1_bitfield)
948         {
949         case 7:
950         {
951             switch((PART_TYPE_T)u1_part_type)
952             {
953             case PRT_2NxN:
954             case PRT_Nx2N:
955             case PRT_2NxnU:
956             case PRT_nLx2N:
957             {
958                 pu1_pred_src =
959                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
960                 pu1_pred_dst =
961                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
962 
963                 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][1];
964 
965                 break;
966             }
967             case PRT_nRx2N:
968             case PRT_2NxnD:
969             {
970                 pu1_pred_src =
971                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
972                     i4_buf_offset;
973                 pu1_pred_dst =
974                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
975                     i4_buf_offset;
976 
977                 i4_part_wd = i4_part_wd_pu2;
978                 i4_part_ht = i4_part_ht_pu2;
979 
980                 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
981 
982                 break;
983             }
984             }
985 
986             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
987             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
988             ps_cand->i4_pred_data_stride = i4_pred_stride;
989 
990             pf_copy_2d(
991                 pu1_pred_dst,
992                 i4_stride,
993                 pu1_pred_src,
994                 i4_stride,
995                 i4_part_wd * u1_num_bytes_per_pel,
996                 i4_part_ht);
997 
998             break;
999         }
1000         case 6:
1001         case 5:
1002         case 4:
1003         {
1004             pu1_pred_src =
1005                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1006                 i4_buf_offset;
1007             pu1_pred_dst =
1008                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
1009                 i4_buf_offset;
1010 
1011             i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
1012 
1013             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1014             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1015             ps_cand->i4_pred_data_stride = i4_pred_stride;
1016 
1017             pf_copy_2d(
1018                 pu1_pred_dst,
1019                 i4_stride,
1020                 pu1_pred_src,
1021                 i4_stride,
1022                 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1023                 i4_part_ht_pu2);
1024             break;
1025         }
1026         case 3:
1027         {
1028             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
1029             pu1_pred_dst =
1030                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
1031 
1032             i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
1033 
1034             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1035             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1036             ps_cand->i4_pred_data_stride = i4_pred_stride;
1037 
1038             pf_copy_2d(
1039                 pu1_pred_dst,
1040                 i4_stride,
1041                 pu1_pred_src,
1042                 i4_stride,
1043                 i4_part_wd * u1_num_bytes_per_pel,
1044                 i4_part_ht);
1045 
1046             break;
1047         }
1048         case 2:
1049         case 1:
1050         case 0:
1051         {
1052             if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
1053                (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
1054             {
1055                 pu1_pred_src =
1056                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1057                     i4_buf_offset;
1058                 pu1_pred_dst =
1059                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
1060                     i4_buf_offset;
1061 
1062                 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
1063 
1064                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1065                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1066                 ps_cand->i4_pred_data_stride = i4_pred_stride;
1067 
1068                 pf_copy_2d(
1069                     pu1_pred_dst,
1070                     i4_stride,
1071                     pu1_pred_src,
1072                     i4_stride,
1073                     i4_part_wd_pu2 * u1_num_bytes_per_pel,
1074                     i4_part_ht_pu2);
1075             }
1076             else
1077             {
1078                 UWORD8 i;
1079 
1080                 for(i = 0; i < 3; i++)
1081                 {
1082                     if((pu1_merge_pred_buf_idx_array[i] != u1_merge_pred_buf_id) &&
1083                        (pu1_merge_pred_buf_idx_array[i] != u1_mixed_tyep1_pred_buf_id))
1084                     {
1085                         pu1_pred_dst =
1086                             (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
1087                             i4_buf_offset;
1088 
1089                         i1_retval = pu1_merge_pred_buf_idx_array[i];
1090 
1091                         break;
1092                     }
1093                 }
1094 
1095                 pu1_pred_src =
1096                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1097                     i4_buf_offset;
1098 
1099                 pf_copy_2d(
1100                     pu1_pred_dst,
1101                     i4_stride,
1102                     pu1_pred_src,
1103                     i4_stride,
1104                     i4_part_wd_pu2 * u1_num_bytes_per_pel,
1105                     i4_part_ht_pu2);
1106 
1107                 /* Copy PU1 */
1108                 pu1_pred_src =
1109                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
1110                 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1111 
1112                 pf_copy_2d(
1113                     pu1_pred_dst,
1114                     i4_stride,
1115                     pu1_pred_src,
1116                     i4_stride,
1117                     i4_part_wd * u1_num_bytes_per_pel,
1118                     i4_part_ht);
1119 
1120                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1121                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1122                 ps_cand->i4_pred_data_stride = i4_pred_stride;
1123 
1124                 break;
1125             }
1126         }
1127         }
1128     }
1129 
1130     return i1_retval;
1131 }
1132 
ihevce_find_idx_of_worst_cost(UWORD32 * pu4_cost_array,UWORD8 u1_array_size)1133 static UWORD8 ihevce_find_idx_of_worst_cost(UWORD32 *pu4_cost_array, UWORD8 u1_array_size)
1134 {
1135     WORD32 i;
1136 
1137     UWORD8 u1_worst_cost_idx = 0;
1138 
1139     for(i = 1; i < u1_array_size; i++)
1140     {
1141         if(pu4_cost_array[i] > pu4_cost_array[u1_worst_cost_idx])
1142         {
1143             u1_worst_cost_idx = i;
1144         }
1145     }
1146 
1147     return u1_worst_cost_idx;
1148 }
1149 
ihevce_free_unused_buf_indices(UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_buf_id_in_use,UWORD8 * pu1_buf_id_to_free,UWORD8 u1_me_buf_id,UWORD8 u1_num_available_cands,UWORD8 u1_num_bufs_to_free,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_part_type)1150 static void ihevce_free_unused_buf_indices(
1151     UWORD32 *pu4_pred_buf_usage_indicator,
1152     UWORD8 *pu1_merge_pred_buf_idx_array,
1153     UWORD8 *pu1_buf_id_in_use,
1154     UWORD8 *pu1_buf_id_to_free,
1155     UWORD8 u1_me_buf_id,
1156     UWORD8 u1_num_available_cands,
1157     UWORD8 u1_num_bufs_to_free,
1158     UWORD8 u1_eval_merge,
1159     UWORD8 u1_eval_skip,
1160     UWORD8 u1_part_type)
1161 {
1162     UWORD8 i;
1163 
1164     if(u1_eval_skip)
1165     {
1166         if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[0])
1167         {
1168             ihevce_set_pred_buf_as_free(
1169                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1170         }
1171         else if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[1])
1172         {
1173             ihevce_set_pred_buf_as_free(
1174                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1175         }
1176         else
1177         {
1178             ihevce_set_pred_buf_as_free(
1179                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1180 
1181             ihevce_set_pred_buf_as_free(
1182                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1183         }
1184 
1185         for(i = 0; i < u1_num_bufs_to_free; i++)
1186         {
1187             if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1188             {
1189                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1190             }
1191         }
1192     }
1193     else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == UCHAR_MAX))
1194     {
1195         ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, u1_me_buf_id);
1196 
1197         for(i = 0; i < u1_num_bufs_to_free; i++)
1198         {
1199             if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1200             {
1201                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1202             }
1203         }
1204     }
1205     else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] != UCHAR_MAX))
1206     {
1207         for(i = 0; i < u1_num_bufs_to_free; i++)
1208         {
1209             if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1210             {
1211                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1212             }
1213         }
1214     }
1215     else if((u1_eval_merge) && (0 == u1_part_type))
1216     {
1217         /* ME pred buf */
1218         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1219             u1_me_buf_id,
1220             pu1_buf_id_in_use,
1221             pu1_buf_id_to_free,
1222             4,
1223             u1_num_bufs_to_free,
1224             pu4_pred_buf_usage_indicator);
1225 
1226         /* Merge pred buf 0 */
1227         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1228             pu1_merge_pred_buf_idx_array[0],
1229             pu1_buf_id_in_use,
1230             pu1_buf_id_to_free,
1231             4,
1232             u1_num_bufs_to_free,
1233             pu4_pred_buf_usage_indicator);
1234 
1235         /* Merge pred buf 1 */
1236         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1237             pu1_merge_pred_buf_idx_array[1],
1238             pu1_buf_id_in_use,
1239             pu1_buf_id_to_free,
1240             4,
1241             u1_num_bufs_to_free,
1242             pu4_pred_buf_usage_indicator);
1243 
1244         for(i = 0; i < u1_num_bufs_to_free; i++)
1245         {
1246             if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1247                (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1248                (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1249             {
1250                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1251             }
1252         }
1253     }
1254     else if((u1_eval_merge) || (u1_eval_skip))
1255     {
1256         /* ME pred buf */
1257         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1258             u1_me_buf_id,
1259             pu1_buf_id_in_use,
1260             pu1_buf_id_to_free,
1261             4,
1262             u1_num_bufs_to_free,
1263             pu4_pred_buf_usage_indicator);
1264 
1265         /* Merge pred buf 0 */
1266         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1267             pu1_merge_pred_buf_idx_array[0],
1268             pu1_buf_id_in_use,
1269             pu1_buf_id_to_free,
1270             4,
1271             u1_num_bufs_to_free,
1272             pu4_pred_buf_usage_indicator);
1273 
1274         /* Merge pred buf 1 */
1275         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1276             pu1_merge_pred_buf_idx_array[1],
1277             pu1_buf_id_in_use,
1278             pu1_buf_id_to_free,
1279             4,
1280             u1_num_bufs_to_free,
1281             pu4_pred_buf_usage_indicator);
1282 
1283         /* Merge pred buf 2 */
1284         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1285             pu1_merge_pred_buf_idx_array[2],
1286             pu1_buf_id_in_use,
1287             pu1_buf_id_to_free,
1288             4,
1289             u1_num_bufs_to_free,
1290             pu4_pred_buf_usage_indicator);
1291 
1292         for(i = 0; i < u1_num_bufs_to_free; i++)
1293         {
1294             if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1295                (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1296                (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1297             {
1298                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1299             }
1300         }
1301     }
1302 }
1303 
ihevce_check_if_buf_can_be_freed(UWORD8 * pu1_pred_id_of_winners,UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,UWORD8 u1_num_cands_previously_added)1304 static UWORD8 ihevce_check_if_buf_can_be_freed(
1305     UWORD8 *pu1_pred_id_of_winners,
1306     UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,
1307     UWORD8 u1_num_cands_previously_added)
1308 {
1309     UWORD8 i;
1310 
1311     UWORD8 u1_num_trysts = 0;
1312 
1313     for(i = 0; i < u1_num_cands_previously_added; i++)
1314     {
1315         if(u1_idx_of_worst_cost_in_pred_buf_array == pu1_pred_id_of_winners[i])
1316         {
1317             u1_num_trysts++;
1318 
1319             if(u1_num_trysts > 1)
1320             {
1321                 return 0;
1322             }
1323         }
1324     }
1325 
1326     ASSERT(u1_num_trysts > 0);
1327 
1328     return 1;
1329 }
1330 
ihevce_get_worst_costs_and_indices(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,UWORD8 * pu1_worst_dst_cand_idx,UWORD8 u1_src_array_length,UWORD8 u1_num_cands_to_pick,UWORD8 u1_worst_cost_idx_in_dst_array)1331 static void ihevce_get_worst_costs_and_indices(
1332     UWORD32 *pu4_cost_src,
1333     UWORD32 *pu4_cost_dst,
1334     UWORD8 *pu1_worst_dst_cand_idx,
1335     UWORD8 u1_src_array_length,
1336     UWORD8 u1_num_cands_to_pick,
1337     UWORD8 u1_worst_cost_idx_in_dst_array)
1338 {
1339     WORD32 i;
1340 
1341     pu4_cost_dst[0] = pu4_cost_src[u1_worst_cost_idx_in_dst_array];
1342     pu4_cost_src[u1_worst_cost_idx_in_dst_array] = 0;
1343     pu1_worst_dst_cand_idx[0] = u1_worst_cost_idx_in_dst_array;
1344 
1345     for(i = 1; i < u1_num_cands_to_pick; i++)
1346     {
1347         pu1_worst_dst_cand_idx[i] =
1348             ihevce_find_idx_of_worst_cost(pu4_cost_src, u1_src_array_length);
1349 
1350         pu4_cost_dst[i] = pu4_cost_src[pu1_worst_dst_cand_idx[i]];
1351         pu4_cost_src[pu1_worst_dst_cand_idx[i]] = 0;
1352     }
1353 
1354     for(i = 0; i < u1_num_cands_to_pick; i++)
1355     {
1356         pu4_cost_src[pu1_worst_dst_cand_idx[i]] = pu4_cost_dst[i];
1357     }
1358 }
1359 
ihevce_select_cands_to_replace_previous_worst(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,INTER_CANDIDATE_ID_T * pe_cand_id,UWORD8 * pu1_cand_idx_in_dst_array,UWORD8 * pu1_buf_id_to_free,UWORD8 * pu1_pred_id_of_winners,UWORD8 * pu1_num_bufs_to_free,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_num_cands_previously_added,UWORD8 u1_num_available_cands,UWORD8 u1_worst_cost_idx_in_dst_array)1360 static UWORD8 ihevce_select_cands_to_replace_previous_worst(
1361     UWORD32 *pu4_cost_src,
1362     UWORD32 *pu4_cost_dst,
1363     INTER_CANDIDATE_ID_T *pe_cand_id,
1364     UWORD8 *pu1_cand_idx_in_dst_array,
1365     UWORD8 *pu1_buf_id_to_free,
1366     UWORD8 *pu1_pred_id_of_winners,
1367     UWORD8 *pu1_num_bufs_to_free,
1368     WORD32 i4_max_num_inter_rdopt_cands,
1369     UWORD8 u1_num_cands_previously_added,
1370     UWORD8 u1_num_available_cands,
1371     UWORD8 u1_worst_cost_idx_in_dst_array)
1372 {
1373     WORD32 i, j, k;
1374     UWORD32 au4_worst_dst_costs[4];
1375     UWORD8 au1_worst_dst_cand_idx[4];
1376 
1377     INTER_CANDIDATE_ID_T ae_default_cand_id[4] = {
1378         ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1379     };
1380 
1381     UWORD8 u1_num_cands_to_add_wo_comparisons =
1382         i4_max_num_inter_rdopt_cands - u1_num_cands_previously_added;
1383     UWORD8 u1_num_cands_to_add_after_comparisons =
1384         u1_num_available_cands - u1_num_cands_to_add_wo_comparisons;
1385     UWORD8 u1_num_cands_to_add = 0;
1386     UWORD8 au1_valid_src_cands[4] = { 0, 0, 0, 0 };
1387 
1388     ASSERT(u1_num_cands_to_add_after_comparisons >= 0);
1389 
1390     /* Sorting src costs */
1391     SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
1392         pu4_cost_src, pe_cand_id, u1_num_available_cands, INTER_CANDIDATE_ID_T);
1393 
1394     for(i = 0; i < u1_num_cands_to_add_wo_comparisons; i++)
1395     {
1396         pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] = u1_num_cands_previously_added + i;
1397         au1_valid_src_cands[pe_cand_id[i]] = 1;
1398     }
1399 
1400     if(u1_num_cands_previously_added)
1401     {
1402         WORD8 i1_last_index = 0;
1403 
1404         ihevce_get_worst_costs_and_indices(
1405             pu4_cost_dst,
1406             au4_worst_dst_costs,
1407             au1_worst_dst_cand_idx,
1408             u1_num_cands_previously_added,
1409             u1_num_cands_to_add_after_comparisons,
1410             u1_worst_cost_idx_in_dst_array);
1411 
1412         for(i = u1_num_available_cands - 1; i >= u1_num_cands_to_add_wo_comparisons; i--)
1413         {
1414             for(j = u1_num_cands_to_add_after_comparisons - 1; j >= i1_last_index; j--)
1415             {
1416                 if((pu4_cost_src[i] < au4_worst_dst_costs[j]))
1417                 {
1418                     if((i - u1_num_cands_to_add_wo_comparisons) <= j)
1419                     {
1420                         for(k = 0; k <= (i - u1_num_cands_to_add_wo_comparisons); k++)
1421                         {
1422                             pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] =
1423                                 au1_worst_dst_cand_idx[k];
1424                             au1_valid_src_cands[pe_cand_id[u1_num_cands_to_add_wo_comparisons + k]] =
1425                                 1;
1426 
1427                             if(1 == ihevce_check_if_buf_can_be_freed(
1428                                         pu1_pred_id_of_winners,
1429                                         pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]],
1430                                         u1_num_cands_previously_added))
1431                             {
1432                                 pu1_buf_id_to_free[(*pu1_num_bufs_to_free)++] =
1433                                     pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]];
1434                             }
1435                             else
1436                             {
1437                                 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]] = UCHAR_MAX;
1438                             }
1439                         }
1440 
1441                         i1_last_index = -1;
1442                     }
1443                     else
1444                     {
1445                         i1_last_index = j;
1446                     }
1447 
1448                     break;
1449                 }
1450             }
1451 
1452             if(-1 == i1_last_index)
1453             {
1454                 break;
1455             }
1456         }
1457     }
1458 
1459     for(i = 0, j = 0; i < u1_num_available_cands; i++)
1460     {
1461         if(au1_valid_src_cands[ae_default_cand_id[i]])
1462         {
1463             pe_cand_id[j++] = ae_default_cand_id[i];
1464         }
1465     }
1466 
1467     return u1_num_cands_to_add;
1468 }
1469 
ihevce_merge_cands_with_existing_best(inter_cu_mode_info_t * ps_mode_info,cu_inter_cand_t ** pps_cand_src,pu_mv_t (* pas_mvp_winner)[NUM_INTER_PU_PARTS],UWORD32 (* pau4_cost)[MAX_NUM_INTER_PARTS],void ** ppv_pred_buf_list,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_num_merge_cands,UWORD8 * pu1_num_skip_cands,UWORD8 * pu1_num_mixed_mode_type0_cands,UWORD8 * pu1_num_mixed_mode_type1_cands,UWORD8 * pu1_merge_pred_buf_idx_array,FT_COPY_2D * pf_copy_2d,WORD32 i4_pred_stride,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_num_bytes_per_pel)1470 static UWORD8 ihevce_merge_cands_with_existing_best(
1471     inter_cu_mode_info_t *ps_mode_info,
1472     cu_inter_cand_t **pps_cand_src,
1473     pu_mv_t (*pas_mvp_winner)[NUM_INTER_PU_PARTS],
1474     UWORD32 (*pau4_cost)[MAX_NUM_INTER_PARTS],
1475     void **ppv_pred_buf_list,
1476     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
1477     UWORD32 *pu4_pred_buf_usage_indicator,
1478     UWORD8 *pu1_num_merge_cands,
1479     UWORD8 *pu1_num_skip_cands,
1480     UWORD8 *pu1_num_mixed_mode_type0_cands,
1481     UWORD8 *pu1_num_mixed_mode_type1_cands,
1482     UWORD8 *pu1_merge_pred_buf_idx_array,
1483 
1484     FT_COPY_2D *pf_copy_2d,
1485 
1486     WORD32 i4_pred_stride,
1487     WORD32 i4_max_num_inter_rdopt_cands,
1488     UWORD8 u1_cu_size,
1489     UWORD8 u1_part_type,
1490     UWORD8 u1_eval_merge,
1491     UWORD8 u1_eval_skip,
1492     UWORD8 u1_num_bytes_per_pel)
1493 {
1494     UWORD32 au4_cost_src[4];
1495     WORD32 i;
1496     WORD32 u1_num_available_cands;
1497     UWORD8 au1_buf_id_in_use[4];
1498     UWORD8 au1_buf_id_to_free[4];
1499     UWORD8 au1_cand_idx_in_dst_array[4];
1500 
1501     INTER_CANDIDATE_ID_T ae_cand_id[4] = {
1502         ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1503     };
1504 
1505     cu_inter_cand_t **pps_cand_dst = ps_mode_info->aps_cu_data;
1506 
1507     UWORD8 u1_num_cands_previously_added = ps_mode_info->u1_num_inter_cands;
1508     UWORD8 u1_worst_cost_idx = ps_mode_info->u1_idx_of_worst_cost_in_cost_array;
1509     UWORD8 u1_idx_of_worst_cost_in_pred_buf_array =
1510         ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array;
1511     UWORD32 *pu4_cost_dst = ps_mode_info->au4_cost;
1512     UWORD8 *pu1_pred_id_of_winners = ps_mode_info->au1_pred_buf_idx;
1513     UWORD8 u1_num_bufs_to_free = 0;
1514     UWORD8 u1_skip_or_merge_cand_is_valid = 0;
1515     UWORD8 u1_num_invalid_cands = 0;
1516 
1517     memset(au1_buf_id_in_use, UCHAR_MAX, sizeof(au1_buf_id_in_use));
1518 
1519     u1_num_available_cands = (u1_eval_merge) ? 2 + ((u1_part_type != 0) + 1) : 1;
1520 
1521     for(i = 0; i < u1_num_available_cands; i++)
1522     {
1523         WORD32 i4_idx = i - u1_num_invalid_cands;
1524 
1525         if(u1_part_type == 0)
1526         {
1527             au4_cost_src[i4_idx] = pau4_cost[ae_cand_id[i4_idx]][0];
1528         }
1529         else
1530         {
1531             au4_cost_src[i4_idx] =
1532                 pau4_cost[ae_cand_id[i4_idx]][0] + pau4_cost[ae_cand_id[i4_idx]][1];
1533         }
1534 
1535         if(au4_cost_src[i4_idx] >= INT_MAX)
1536         {
1537             memmove(
1538                 &ae_cand_id[i4_idx],
1539                 &ae_cand_id[i4_idx + 1],
1540                 sizeof(INTER_CANDIDATE_ID_T) * (u1_num_available_cands - i - 1));
1541 
1542             u1_num_invalid_cands++;
1543         }
1544     }
1545 
1546     u1_num_available_cands -= u1_num_invalid_cands;
1547 
1548     if((u1_num_cands_previously_added + u1_num_available_cands) > i4_max_num_inter_rdopt_cands)
1549     {
1550         u1_num_available_cands = ihevce_select_cands_to_replace_previous_worst(
1551             au4_cost_src,
1552             pu4_cost_dst,
1553             ae_cand_id,
1554             au1_cand_idx_in_dst_array,
1555             au1_buf_id_to_free,
1556             pu1_pred_id_of_winners,
1557             &u1_num_bufs_to_free,
1558             i4_max_num_inter_rdopt_cands,
1559             u1_num_cands_previously_added,
1560             u1_num_available_cands,
1561             u1_worst_cost_idx);
1562     }
1563     else
1564     {
1565         for(i = 0; i < u1_num_available_cands; i++)
1566         {
1567             au1_cand_idx_in_dst_array[i] = u1_num_cands_previously_added + i;
1568         }
1569     }
1570 
1571     for(i = 0; i < u1_num_available_cands; i++)
1572     {
1573         UWORD8 u1_dst_array_idx = au1_cand_idx_in_dst_array[i];
1574 
1575         if(u1_part_type == 0)
1576         {
1577             au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0];
1578         }
1579         else
1580         {
1581             au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0] + pau4_cost[ae_cand_id[i]][1];
1582         }
1583 
1584         pps_cand_dst[u1_dst_array_idx] = pps_cand_src[ae_cand_id[i]];
1585 
1586         /* Adding a skip candidate identical to the merge winner */
1587         if((u1_eval_merge) && (0 == u1_part_type) && (MIXED_MODE_TYPE1 == ae_cand_id[i]))
1588         {
1589             (*pu1_num_skip_cands)++;
1590 
1591             pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1592 
1593             if(u1_num_cands_previously_added >= i4_max_num_inter_rdopt_cands)
1594             {
1595                 u1_worst_cost_idx =
1596                     ihevce_find_idx_of_worst_cost(pu4_cost_dst, u1_num_cands_previously_added);
1597 
1598                 u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1599             }
1600             else
1601             {
1602                 u1_num_cands_previously_added++;
1603             }
1604 
1605             if(u1_skip_or_merge_cand_is_valid)
1606             {
1607                 pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1608                     (UWORD8 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1609                 pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1610                     (UWORD16 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1611                 pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1612 
1613                 au1_buf_id_in_use[MIXED_MODE_TYPE1] = au1_buf_id_in_use[MERGE_DERIVED];
1614                 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1615             }
1616             else
1617             {
1618                 u1_skip_or_merge_cand_is_valid = 1;
1619 
1620                 au1_buf_id_in_use[MIXED_MODE_TYPE1] = ihevce_merge_cand_pred_buffer_preparation(
1621                     ppv_pred_buf_list,
1622                     pps_cand_dst[u1_dst_array_idx],
1623                     pau1_final_pred_buf_id,
1624                     i4_pred_stride,
1625                     u1_cu_size,
1626                     u1_part_type,
1627                     u1_num_bytes_per_pel,
1628                     pf_copy_2d);
1629 
1630                 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1631             }
1632 
1633             continue;
1634         }
1635 
1636         if(u1_num_cands_previously_added < i4_max_num_inter_rdopt_cands)
1637         {
1638             if(u1_num_cands_previously_added)
1639             {
1640                 if(au4_cost_src[i] > pu4_cost_dst[u1_worst_cost_idx])
1641                 {
1642                     u1_worst_cost_idx = u1_num_cands_previously_added;
1643                 }
1644             }
1645 
1646             pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1647 
1648             u1_num_cands_previously_added++;
1649         }
1650         else
1651         {
1652             pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1653 
1654             u1_worst_cost_idx = ihevce_find_idx_of_worst_cost(
1655                 ps_mode_info->au4_cost, u1_num_cands_previously_added);
1656 
1657             u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1658         }
1659 
1660         switch(ae_cand_id[i])
1661         {
1662         case ME_OR_SKIP_DERIVED:
1663         {
1664             (*pu1_num_skip_cands) += u1_eval_skip;
1665 
1666             pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1667                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1668             pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1669                 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1670             pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1671 
1672             if(u1_worst_cost_idx == u1_dst_array_idx)
1673             {
1674                 u1_idx_of_worst_cost_in_pred_buf_array =
1675                     pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1676             }
1677 
1678             u1_skip_or_merge_cand_is_valid = u1_eval_skip;
1679 
1680             au1_buf_id_in_use[ME_OR_SKIP_DERIVED] = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1681             pu1_pred_id_of_winners[u1_dst_array_idx] =
1682                 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1683 
1684             break;
1685         }
1686         case MERGE_DERIVED:
1687         {
1688             (*pu1_num_merge_cands)++;
1689 
1690             au1_buf_id_in_use[MERGE_DERIVED] = ihevce_merge_cand_pred_buffer_preparation(
1691                 ppv_pred_buf_list,
1692                 pps_cand_dst[u1_dst_array_idx],
1693                 pau1_final_pred_buf_id,
1694                 i4_pred_stride,
1695                 u1_cu_size,
1696                 u1_part_type,
1697                 u1_num_bytes_per_pel,
1698                 pf_copy_2d
1699 
1700             );
1701 
1702             pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1703 
1704             if(u1_worst_cost_idx == u1_dst_array_idx)
1705             {
1706                 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MERGE_DERIVED];
1707             }
1708 
1709             u1_skip_or_merge_cand_is_valid = 1;
1710 
1711             break;
1712         }
1713         case MIXED_MODE_TYPE1:
1714         {
1715             (*pu1_num_mixed_mode_type1_cands)++;
1716 
1717             au1_buf_id_in_use[MIXED_MODE_TYPE1] =
1718                 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
1719                     ppv_pred_buf_list,
1720                     pps_cand_dst[u1_dst_array_idx],
1721                     pau1_final_pred_buf_id,
1722                     pu1_merge_pred_buf_idx_array,
1723                     i4_pred_stride,
1724                     au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1725                     au1_buf_id_in_use[MERGE_DERIVED],
1726                     (u1_num_available_cands - i) > 1,
1727                     u1_cu_size,
1728                     u1_part_type,
1729                     u1_num_bytes_per_pel,
1730                     pf_copy_2d
1731 
1732                 );
1733 
1734             pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1735 
1736             if(u1_worst_cost_idx == u1_dst_array_idx)
1737             {
1738                 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1739             }
1740 
1741             break;
1742         }
1743         case MIXED_MODE_TYPE0:
1744         {
1745             (*pu1_num_mixed_mode_type0_cands)++;
1746 
1747             au1_buf_id_in_use[MIXED_MODE_TYPE0] =
1748                 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
1749                     ppv_pred_buf_list,
1750                     pps_cand_dst[u1_dst_array_idx],
1751                     pau1_final_pred_buf_id,
1752                     pu1_merge_pred_buf_idx_array,
1753                     au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1754                     au1_buf_id_in_use[MERGE_DERIVED],
1755                     au1_buf_id_in_use[MIXED_MODE_TYPE1],
1756                     i4_pred_stride,
1757                     u1_cu_size,
1758                     u1_part_type,
1759                     u1_num_bytes_per_pel,
1760                     pf_copy_2d);
1761 
1762             pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1763 
1764             if(u1_worst_cost_idx == u1_dst_array_idx)
1765             {
1766                 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1767             }
1768 
1769             break;
1770         }
1771         }
1772     }
1773 
1774     ihevce_free_unused_buf_indices(
1775         pu4_pred_buf_usage_indicator,
1776         pu1_merge_pred_buf_idx_array,
1777         au1_buf_id_in_use,
1778         au1_buf_id_to_free,
1779         pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0],
1780         u1_num_available_cands,
1781         u1_num_bufs_to_free,
1782         u1_eval_merge,
1783         u1_eval_skip,
1784         u1_part_type);
1785 
1786     ps_mode_info->u1_idx_of_worst_cost_in_cost_array = u1_worst_cost_idx;
1787     ps_mode_info->u1_num_inter_cands = u1_num_cands_previously_added;
1788     ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array = u1_idx_of_worst_cost_in_pred_buf_array;
1789 
1790     return u1_skip_or_merge_cand_is_valid;
1791 }
1792 
ihevce_prepare_cand_containers(ihevce_inter_cand_sifter_prms_t * ps_ctxt,cu_inter_cand_t ** pps_cands,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_me_pred_buf_idx,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)1793 static UWORD8 ihevce_prepare_cand_containers(
1794     ihevce_inter_cand_sifter_prms_t *ps_ctxt,
1795     cu_inter_cand_t **pps_cands,
1796     UWORD8 *pu1_merge_pred_buf_idx_array,
1797     UWORD8 *pu1_me_pred_buf_idx,
1798     UWORD8 u1_part_type,
1799     UWORD8 u1_me_cand_list_idx,
1800     UWORD8 u1_eval_merge,
1801     UWORD8 u1_eval_skip)
1802 {
1803     UWORD8 u1_num_bufs_currently_allocated;
1804 
1805     WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
1806     UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
1807     UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
1808     UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
1809     void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
1810 
1811     if(!u1_eval_merge)
1812     {
1813         if(u1_eval_skip)
1814         {
1815             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1816                 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1817 
1818             if(u1_num_bufs_currently_allocated < 2)
1819             {
1820                 return 0;
1821             }
1822 
1823             pps_cands[ME_OR_SKIP_DERIVED] =
1824                 &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1825                      [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
1826                       ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
1827 
1828             pps_cands[ME_OR_SKIP_DERIVED]->b1_skip_flag = 1;
1829             pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_mark = 1;
1830             pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize = 1;
1831             pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize_by2 = 1;
1832             pps_cands[ME_OR_SKIP_DERIVED]->b1_intra_has_won = 0;
1833             pps_cands[ME_OR_SKIP_DERIVED]->b3_part_size = 0;
1834             pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1835             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_intra_flag = 0;
1836             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_merge_flag = 1;
1837             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_x = u1_cu_pos_x >> 2;
1838             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_y = u1_cu_pos_y >> 2;
1839             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_wd = (u1_cu_size >> 2) - 1;
1840             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_ht = (u1_cu_size >> 2) - 1;
1841 
1842             pps_cands[MERGE_DERIVED] = pps_cands[ME_OR_SKIP_DERIVED];
1843         }
1844         else
1845         {
1846             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1847                 pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1848 
1849             if(u1_num_bufs_currently_allocated < 1)
1850             {
1851                 return 0;
1852             }
1853 
1854             pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1855             pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1856             pps_cands[ME_OR_SKIP_DERIVED]->pu1_pred_data =
1857                 (UWORD8 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1858             pps_cands[ME_OR_SKIP_DERIVED]->pu2_pred_data =
1859                 (UWORD16 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1860         }
1861     }
1862     else
1863     {
1864         u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1865             pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1866 
1867         if(u1_num_bufs_currently_allocated < 1)
1868         {
1869             return 0;
1870         }
1871 
1872         pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1873 
1874         if(u1_part_type > 0)
1875         {
1876             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1877                 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 3);
1878 
1879             if(u1_num_bufs_currently_allocated < 3)
1880             {
1881                 return 0;
1882             }
1883 
1884             pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1885                                             [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1886 
1887             pps_cands[MIXED_MODE_TYPE0] =
1888                 &ps_ctxt->ps_mixed_modes_datastore
1889                      ->as_cu_data[ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands];
1890 
1891             pps_cands[MIXED_MODE_TYPE1] =
1892                 &ps_ctxt->ps_mixed_modes_datastore->as_cu_data
1893                      [MAX_NUM_MIXED_MODE_INTER_RDO_CANDS - 1 -
1894                       ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands];
1895 
1896             *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1897             *pps_cands[MIXED_MODE_TYPE0] = *pps_cands[ME_OR_SKIP_DERIVED];
1898             *pps_cands[MIXED_MODE_TYPE1] = *pps_cands[ME_OR_SKIP_DERIVED];
1899         }
1900         else
1901         {
1902             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1903                 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1904 
1905             if(u1_num_bufs_currently_allocated < 2)
1906             {
1907                 return 0;
1908             }
1909 
1910             pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1911                                             [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1912 
1913             *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1914         }
1915 
1916         pps_cands[MERGE_DERIVED]->as_inter_pu[0].b1_merge_flag = 1;
1917         pps_cands[MERGE_DERIVED]->as_inter_pu[1].b1_merge_flag = 1;
1918     }
1919 
1920     return u1_num_bufs_currently_allocated;
1921 }
1922 
ihevce_merge_prms_init(merge_prms_t * ps_prms,merge_cand_list_t * ps_list,inter_pred_ctxt_t * ps_mc_ctxt,mv_pred_ctxt_t * ps_mv_pred_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_fxn,void ** ppv_pred_buf_list,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,UWORD8 * pu1_merge_pred_buf_array,UWORD8 (* pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_is_top_used,WORD32 (* pai4_noise_term)[MAX_NUM_INTER_PARTS],UWORD32 (* pau4_pred_variance)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_src_variance,WORD32 i4_alpha_stim_multiplier,WORD32 i4_src_stride,WORD32 i4_pred_stride,WORD32 i4_lambda,UWORD8 u1_is_cu_noisy,UWORD8 u1_is_hbd,UWORD8 u1_max_cands,UWORD8 u1_merge_idx_cabac_model,UWORD8 u1_use_merge_cand_from_top_row)1923 static __inline void ihevce_merge_prms_init(
1924     merge_prms_t *ps_prms,
1925     merge_cand_list_t *ps_list,
1926     inter_pred_ctxt_t *ps_mc_ctxt,
1927     mv_pred_ctxt_t *ps_mv_pred_ctxt,
1928     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
1929     PF_SAD_FXN_T pf_sad_fxn,
1930     void **ppv_pred_buf_list,
1931     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
1932     UWORD8 *pu1_merge_pred_buf_array,
1933     UWORD8 (*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],
1934     UWORD8 *pu1_is_top_used,
1935     WORD32 (*pai4_noise_term)[MAX_NUM_INTER_PARTS],
1936     UWORD32 (*pau4_pred_variance)[MAX_NUM_INTER_PARTS],
1937     UWORD32 *pu4_src_variance,
1938     WORD32 i4_alpha_stim_multiplier,
1939     WORD32 i4_src_stride,
1940     WORD32 i4_pred_stride,
1941     WORD32 i4_lambda,
1942     UWORD8 u1_is_cu_noisy,
1943     UWORD8 u1_is_hbd,
1944     UWORD8 u1_max_cands,
1945     UWORD8 u1_merge_idx_cabac_model,
1946     UWORD8 u1_use_merge_cand_from_top_row)
1947 {
1948     ps_prms->ps_list = ps_list;
1949     ps_prms->ps_mc_ctxt = ps_mc_ctxt;
1950     ps_prms->ps_mv_pred_ctxt = ps_mv_pred_ctxt;
1951     ps_prms->pf_luma_inter_pred_pu = pf_luma_inter_pred_pu;
1952     ps_prms->pf_sad_fxn = pf_sad_fxn;
1953     ps_prms->ppv_pred_buf_list = ppv_pred_buf_list;
1954     ps_prms->ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
1955 
1956     ps_prms->pu1_merge_pred_buf_array = pu1_merge_pred_buf_array;
1957     ps_prms->pau1_best_pred_buf_id = pau1_best_pred_buf_id;
1958     ps_prms->pu1_is_top_used = pu1_is_top_used;
1959     ps_prms->pai4_noise_term = pai4_noise_term;
1960     ps_prms->pau4_pred_variance = pau4_pred_variance;
1961     ps_prms->pu4_src_variance = pu4_src_variance;
1962     ps_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
1963     ps_prms->i4_src_stride = i4_src_stride;
1964     ps_prms->i4_pred_stride = i4_pred_stride;
1965     ps_prms->i4_lambda = i4_lambda;
1966     ps_prms->u1_is_cu_noisy = u1_is_cu_noisy;
1967     ps_prms->u1_is_hbd = u1_is_hbd;
1968     ps_prms->u1_max_cands = u1_max_cands;
1969     ps_prms->u1_merge_idx_cabac_model = u1_merge_idx_cabac_model;
1970     ps_prms->u1_use_merge_cand_from_top_row = u1_use_merge_cand_from_top_row;
1971 }
1972 
ihevce_merge_candidate_seive(nbr_avail_flags_t * ps_nbr,merge_cand_list_t * ps_merge_cand,UWORD8 * pu1_is_top_used,UWORD8 u1_num_merge_cands,UWORD8 u1_use_merge_cand_from_top_row)1973 static UWORD8 ihevce_merge_candidate_seive(
1974     nbr_avail_flags_t *ps_nbr,
1975     merge_cand_list_t *ps_merge_cand,
1976     UWORD8 *pu1_is_top_used,
1977     UWORD8 u1_num_merge_cands,
1978     UWORD8 u1_use_merge_cand_from_top_row)
1979 {
1980     if(!u1_use_merge_cand_from_top_row)
1981     {
1982         if(ps_nbr->u1_bot_lt_avail || ps_nbr->u1_left_avail)
1983         {
1984             return !pu1_is_top_used[0];
1985         }
1986         else
1987         {
1988             return 0;
1989         }
1990     }
1991     else
1992     {
1993         return u1_num_merge_cands;
1994     }
1995 }
1996 
ihevce_compute_pred_and_populate_modes(ihevce_inter_cand_sifter_prms_t * ps_ctxt,PF_SAD_FXN_T pf_sad_func,UWORD32 * pu4_src_variance,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)1997 static UWORD8 ihevce_compute_pred_and_populate_modes(
1998     ihevce_inter_cand_sifter_prms_t *ps_ctxt,
1999     PF_SAD_FXN_T pf_sad_func,
2000     UWORD32 *pu4_src_variance,
2001     UWORD8 u1_part_type,
2002     UWORD8 u1_me_cand_list_idx,
2003     UWORD8 u1_eval_merge,
2004     UWORD8 u1_eval_skip)
2005 {
2006     cu_inter_cand_t *aps_cands[4];
2007     pu_mv_t as_mvp_winner[4][NUM_INTER_PU_PARTS];
2008     merge_prms_t s_merge_prms;
2009     merge_cand_list_t as_merge_cand[MAX_NUM_MERGE_CAND];
2010 
2011     UWORD8 i, j;
2012     UWORD32 au4_cost[4][NUM_INTER_PU_PARTS];
2013     UWORD8 au1_final_pred_buf_id[4][NUM_INTER_PU_PARTS];
2014     UWORD8 au1_merge_pred_buf_idx_array[3];
2015     UWORD8 au1_is_top_used[MAX_NUM_MERGE_CAND];
2016     UWORD8 u1_me_pred_buf_idx;
2017     UWORD8 u1_num_bufs_currently_allocated;
2018     WORD32 i4_mean;
2019     UWORD32 au4_pred_variance[4][NUM_INTER_PU_PARTS];
2020     WORD32 ai4_noise_term[4][NUM_INTER_PU_PARTS];
2021 
2022     UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
2023     UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
2024 
2025     inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2026     inter_pred_ctxt_t *ps_mc_ctxt = ps_ctxt->ps_mc_ctxt;
2027     nbr_4x4_t *ps_cu_nbr_buf = ps_ctxt->aps_cu_nbr_buf[0];
2028     nbr_4x4_t *ps_pu_left_nbr = ps_ctxt->ps_left_nbr_4x4;
2029     nbr_4x4_t *ps_pu_top_nbr = ps_ctxt->ps_top_nbr_4x4;
2030     nbr_4x4_t *ps_pu_topleft_nbr = ps_ctxt->ps_topleft_nbr_4x4;
2031 
2032     ihevce_inter_pred_buf_data_t *ps_pred_buf_info = ps_ctxt->ps_pred_buf_data;
2033     mv_pred_ctxt_t *ps_mv_pred_ctxt = ps_ctxt->ps_mv_pred_ctxt;
2034 
2035     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_ctxt->pf_luma_inter_pred_pu;
2036 
2037     void *pv_src = ps_ctxt->pv_src;
2038     WORD32 i4_src_stride = ps_ctxt->i4_src_strd;
2039     WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
2040     UWORD8 u1_num_parts = (u1_part_type != PRT_2Nx2N) + 1;
2041     UWORD8 u1_num_bytes_per_pel = ps_ctxt->u1_is_hbd + 1;
2042     void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
2043     UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
2044     WORD32 i4_nbr_4x4_left_stride = ps_ctxt->i4_nbr_4x4_left_strd;
2045     UWORD8 *pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
2046     WORD32 i4_nbr_map_stride = ps_ctxt->i4_ctb_nbr_map_stride;
2047     UWORD8 u1_max_merge_candidates = ps_ctxt->u1_max_merge_candidates;
2048     WORD32 i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
2049     WORD32 i4_pred_buf_offset = 0;
2050     WORD32 i4_src_buf_offset = 0;
2051     UWORD8 u1_single_mcl_flag =
2052         ((8 == u1_cu_size) && (ps_mv_pred_ctxt->i4_log2_parallel_merge_level_minus2 > 0));
2053     UWORD8 u1_skip_or_merge_cand_is_valid = 0;
2054     WORD32 i4_lambda_qf = ps_ctxt->i4_lambda_qf;
2055     UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2056 
2057     ASSERT(0 == (u1_eval_skip && u1_eval_merge));
2058     ASSERT(u1_me_cand_list_idx < ps_ctxt->u1_num_me_cands);
2059 
2060     /*
2061     Algorithm -
2062     1. Determine pred and satd for ME cand.
2063     2. Determine merge winner for PU1.
2064     3. Determine pred and satd for mixed_type0 cand.
2065     4. Determine merge winner for PU2 and hence derive pred and satd for merge cand.
2066     5. Determine merge winner for PU2 assuming ME cand as PU1 winner and hence derive
2067     pred and satd for mixed_type1 cand.
2068     6. Sort the 4 preceding costs and hence, the cand list.
2069     7. Merge the sorted lists with the final cand list.
2070 
2071     PS : 2 - 7 will be relevant only if u1_eval_merge = 1 and u1_eval_skip = 0
2072     PPS : 1 will not be relevant if u1_eval_skip = 1
2073     */
2074 
2075     /*
2076     Explanatory notes -
2077     1. Motion Vector Merge candidates and nbr's in all merge mode (RealD)
2078     2. Motion Vector Merge candidates and nbr's in mixed mode (AltD)
2079     */
2080 
2081     u1_num_bufs_currently_allocated = ihevce_prepare_cand_containers(
2082         ps_ctxt,
2083         aps_cands,
2084         au1_merge_pred_buf_idx_array,
2085         &u1_me_pred_buf_idx,
2086         u1_part_type,
2087         u1_me_cand_list_idx,
2088         u1_eval_merge,
2089         u1_eval_skip);
2090 
2091     if(0 == u1_num_bufs_currently_allocated)
2092     {
2093         return 0;
2094     }
2095 
2096     if((u1_eval_merge) || (u1_eval_skip))
2097     {
2098         ihevce_merge_prms_init(
2099             &s_merge_prms,
2100             as_merge_cand,
2101             ps_mc_ctxt,
2102             ps_mv_pred_ctxt,
2103             pf_luma_inter_pred_pu,
2104             pf_sad_func,
2105             ppv_pred_buf_list,
2106             ps_ctxt->ps_cmn_utils_optimised_function_list,
2107             au1_merge_pred_buf_idx_array,
2108             au1_final_pred_buf_id,
2109             au1_is_top_used,
2110             ai4_noise_term,
2111             au4_pred_variance,
2112             pu4_src_variance,
2113             ps_ctxt->i4_alpha_stim_multiplier,
2114             i4_src_stride,
2115             i4_pred_stride,
2116             i4_lambda_qf,
2117             u1_is_cu_noisy,
2118             ps_ctxt->u1_is_hbd,
2119             u1_max_merge_candidates,
2120             ps_ctxt->u1_merge_idx_cabac_model,
2121             ps_ctxt->u1_use_merge_cand_from_top_row);
2122     }
2123 
2124     for(i = 0; i < u1_num_parts; i++)
2125     {
2126         nbr_avail_flags_t s_nbr;
2127 
2128         UWORD8 u1_part_wd;
2129         UWORD8 u1_part_ht;
2130         UWORD8 u1_pu_pos_x_4x4;
2131         UWORD8 u1_pu_pos_y_4x4;
2132 
2133         pu_t *ps_pu = &aps_cands[MERGE_DERIVED]->as_inter_pu[i];
2134 
2135         PART_SIZE_E e_part_size = (PART_SIZE_E)aps_cands[ME_OR_SKIP_DERIVED]->b3_part_size;
2136 
2137         void *pv_pu_src = (UWORD8 *)pv_src + i4_src_buf_offset;
2138         UWORD8 u1_num_merge_cands = 0;
2139 
2140         u1_part_wd = (aps_cands[0]->as_inter_pu[i].b4_wd + 1) << 2;
2141         u1_part_ht = (aps_cands[0]->as_inter_pu[i].b4_ht + 1) << 2;
2142         u1_pu_pos_x_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_x;
2143         u1_pu_pos_y_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_y;
2144 
2145         /* Inter cand pred and satd */
2146         if(!u1_eval_skip)
2147         {
2148             void *pv_pu_pred = (UWORD8 *)ppv_pred_buf_list[u1_me_pred_buf_idx] + i4_pred_buf_offset;
2149 
2150             if(ps_ctxt->u1_reuse_me_sad)
2151             {
2152                 ihevce_compute_inter_pred_and_cost(
2153                     ps_mc_ctxt,
2154                     pf_luma_inter_pred_pu,
2155                     pf_sad_func,
2156                     &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2157                     pv_pu_src,
2158                     pv_pu_pred,
2159                     i4_src_stride,
2160                     i4_pred_stride,
2161                     0,
2162                     ps_ctxt->ps_cmn_utils_optimised_function_list);
2163 
2164                 au4_cost[ME_OR_SKIP_DERIVED][i] =
2165                     ps_ctxt->pai4_me_err_metric[u1_me_cand_list_idx][i];
2166             }
2167             else
2168             {
2169                 au4_cost[ME_OR_SKIP_DERIVED][i] = ihevce_compute_inter_pred_and_cost(
2170                     ps_mc_ctxt,
2171                     pf_luma_inter_pred_pu,
2172                     pf_sad_func,
2173                     &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2174                     pv_pu_src,
2175                     pv_pu_pred,
2176                     i4_src_stride,
2177                     i4_pred_stride,
2178                     1,
2179                     ps_ctxt->ps_cmn_utils_optimised_function_list);
2180             }
2181 
2182             au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] = u1_me_pred_buf_idx;
2183 
2184             if(u1_is_cu_noisy && ps_ctxt->i4_alpha_stim_multiplier)
2185             {
2186                 ihevce_calc_variance(
2187                     pv_pu_pred,
2188                     i4_pred_stride,
2189                     &i4_mean,
2190                     &au4_pred_variance[ME_OR_SKIP_DERIVED][i],
2191                     u1_part_ht,
2192                     u1_part_wd,
2193                     ps_ctxt->u1_is_hbd,
2194                     0);
2195 
2196                 ai4_noise_term[ME_OR_SKIP_DERIVED][i] = ihevce_compute_noise_term(
2197                     ps_ctxt->i4_alpha_stim_multiplier,
2198                     pu4_src_variance[i],
2199                     au4_pred_variance[ME_OR_SKIP_DERIVED][i]);
2200 
2201                 MULTIPLY_STIM_WITH_DISTORTION(
2202                     au4_cost[ME_OR_SKIP_DERIVED][i],
2203                     ai4_noise_term[ME_OR_SKIP_DERIVED][i],
2204                     STIM_Q_FORMAT,
2205                     ALPHA_Q_FORMAT);
2206             }
2207         }
2208 
2209         if(u1_eval_skip || u1_eval_merge)
2210         {
2211             pu_t s_pu, *ps_pu_merge;
2212 
2213             UWORD8 u1_is_any_top_available = 1;
2214             UWORD8 u1_are_valid_merge_cands_available = 1;
2215 
2216             /* get the neighbour availability flags */
2217             if((u1_num_parts > 1) && u1_single_mcl_flag)
2218             { /* 8x8 SMPs take the 2Nx2N neighbours */
2219                 ihevce_get_only_nbr_flag(
2220                     &s_nbr,
2221                     pu1_ctb_nbr_map,
2222                     i4_nbr_map_stride,
2223                     aps_cands[0]->as_inter_pu[0].b4_pos_x,
2224                     aps_cands[0]->as_inter_pu[0].b4_pos_y,
2225                     u1_cu_size >> 2,
2226                     u1_cu_size >> 2);
2227 
2228                 /* Make the PU width and height as 8 */
2229                 memcpy(&s_pu, ps_pu, sizeof(pu_t));
2230                 s_pu.b4_pos_x = u1_cu_pos_x >> 2;
2231                 s_pu.b4_pos_y = u1_cu_pos_y >> 2;
2232                 s_pu.b4_wd = (u1_cu_size >> 2) - 1;
2233                 s_pu.b4_ht = (u1_cu_size >> 2) - 1;
2234 
2235                 /* Give the local PU structure to MV merge */
2236                 ps_pu_merge = &s_pu;
2237             }
2238             else
2239             {
2240                 ihevce_get_only_nbr_flag(
2241                     &s_nbr,
2242                     pu1_ctb_nbr_map,
2243                     i4_nbr_map_stride,
2244                     u1_pu_pos_x_4x4,
2245                     u1_pu_pos_y_4x4,
2246                     u1_part_wd >> 2,
2247                     u1_part_ht >> 2);
2248 
2249                 u1_is_any_top_available = s_nbr.u1_top_avail || s_nbr.u1_top_rt_avail ||
2250                                           s_nbr.u1_top_lt_avail;
2251 
2252                 if(!ps_ctxt->u1_use_merge_cand_from_top_row)
2253                 {
2254                     if(u1_is_any_top_available)
2255                     {
2256                         if(s_nbr.u1_left_avail || s_nbr.u1_bot_lt_avail)
2257                         {
2258                             s_nbr.u1_top_avail = 0;
2259                             s_nbr.u1_top_rt_avail = 0;
2260                             s_nbr.u1_top_lt_avail = 0;
2261                         }
2262                         else
2263                         {
2264                             u1_are_valid_merge_cands_available = 0;
2265                         }
2266                     }
2267                 }
2268 
2269                 /* Actual PU passed to MV merge */
2270                 ps_pu_merge = ps_pu;
2271             }
2272             if(u1_are_valid_merge_cands_available)
2273             {
2274                 u1_num_merge_cands = ihevce_mv_pred_merge(
2275                     ps_mv_pred_ctxt,
2276                     ps_pu_top_nbr,
2277                     ps_pu_left_nbr,
2278                     ps_pu_topleft_nbr,
2279                     i4_nbr_4x4_left_stride,
2280                     &s_nbr,
2281                     NULL,
2282                     ps_pu_merge,
2283                     e_part_size,
2284                     i,
2285                     u1_single_mcl_flag,
2286                     as_merge_cand,
2287                     au1_is_top_used);
2288 
2289                 if(u1_num_merge_cands > u1_max_merge_candidates)
2290                 {
2291                     u1_num_merge_cands = u1_max_merge_candidates;
2292                 }
2293 
2294                 u1_num_merge_cands = ihevce_merge_candidate_seive(
2295                     &s_nbr,
2296                     as_merge_cand,
2297                     au1_is_top_used,
2298                     u1_num_merge_cands,
2299                     ps_ctxt->u1_use_merge_cand_from_top_row || !u1_is_any_top_available);
2300 
2301                 for(j = 0; j < u1_num_merge_cands; j++)
2302                 {
2303                     s_merge_prms.au1_valid_merge_indices[j] = j;
2304                 }
2305 
2306                 au4_cost[MERGE_DERIVED][i] = ihevce_determine_best_merge_pu(
2307                     &s_merge_prms,
2308                     &aps_cands[MERGE_DERIVED]->as_inter_pu[i],
2309                     &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2310                     pv_pu_src,
2311                     au4_cost[ME_OR_SKIP_DERIVED][i],
2312                     i4_pred_buf_offset,
2313                     u1_num_merge_cands,
2314                     i,
2315                     u1_eval_skip);
2316             }
2317             else
2318             {
2319                 au4_cost[MERGE_DERIVED][i] = INT_MAX;
2320             }
2321 
2322             au4_cost[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] = au4_cost[MERGE_DERIVED][i];
2323 
2324             if(u1_eval_skip)
2325             {
2326                 /* This statement ensures that the skip candidate is always added */
2327                 au4_cost[ME_OR_SKIP_DERIVED][i] =
2328                     (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST : INT_MAX;
2329                 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] =
2330                     au1_final_pred_buf_id[MERGE_DERIVED][i];
2331             }
2332             else
2333             {
2334                 au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2335                 au4_cost[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2336                     au4_cost[ME_OR_SKIP_DERIVED][i];
2337             }
2338 
2339             au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] =
2340                 au1_final_pred_buf_id[MERGE_DERIVED][i];
2341             au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2342                 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i];
2343         }
2344         else
2345         {
2346             au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2347         }
2348 
2349         if(!i && (u1_num_parts > 1) && u1_eval_merge)
2350         {
2351             ihevce_set_inter_nbr_map(
2352                 pu1_ctb_nbr_map,
2353                 i4_nbr_map_stride,
2354                 u1_pu_pos_x_4x4,
2355                 u1_pu_pos_y_4x4,
2356                 (u1_part_wd >> 2),
2357                 (u1_part_ht >> 2),
2358                 1);
2359             ihevce_populate_nbr_4x4_with_pu_data(
2360                 ps_cu_nbr_buf, &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i], u1_cu_size >> 2);
2361 
2362             if(u1_part_wd < u1_cu_size)
2363             {
2364                 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2365 
2366                 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2367                 {
2368                     ps_cu_nbr_buf += (u1_part_wd >> 2);
2369                     ps_pu_left_nbr = ps_cu_nbr_buf - 1;
2370                     ps_pu_top_nbr += (u1_part_wd >> 2);
2371                     ps_pu_topleft_nbr = ps_pu_top_nbr - 1;
2372 
2373                     i4_nbr_4x4_left_stride = (u1_cu_size >> 2);
2374                 }
2375             }
2376             else if(u1_part_ht < u1_cu_size)
2377             {
2378                 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2379                 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2380 
2381                 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2382                 {
2383                     ps_cu_nbr_buf += (u1_part_ht >> 2) * (u1_cu_size >> 2);
2384                     ps_pu_left_nbr += (u1_part_ht >> 2) * i4_nbr_4x4_left_stride;
2385                     ps_pu_top_nbr = ps_cu_nbr_buf - (u1_cu_size >> 2);
2386                     ps_pu_topleft_nbr = ps_pu_left_nbr - i4_nbr_4x4_left_stride;
2387                 }
2388             }
2389 
2390             i4_pred_buf_offset *= u1_num_bytes_per_pel;
2391             i4_src_buf_offset *= u1_num_bytes_per_pel;
2392 
2393             aps_cands[MIXED_MODE_TYPE0]->as_inter_pu[0] = aps_cands[MERGE_DERIVED]->as_inter_pu[0];
2394         }
2395         else if(!i && (u1_num_parts > 1) && (!u1_eval_merge))
2396         {
2397             if(u1_part_wd < u1_cu_size)
2398             {
2399                 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2400             }
2401             else if(u1_part_ht < u1_cu_size)
2402             {
2403                 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2404                 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2405             }
2406 
2407             i4_pred_buf_offset *= u1_num_bytes_per_pel;
2408             i4_src_buf_offset *= u1_num_bytes_per_pel;
2409         }
2410         else if(i && (u1_num_parts > 1) && u1_eval_merge)
2411         {
2412             aps_cands[MIXED_MODE_TYPE1]->as_inter_pu[1] = aps_cands[MERGE_DERIVED]->as_inter_pu[1];
2413         }
2414     }
2415 
2416     /* Adding a skip candidate */
2417     if((u1_eval_merge) && (0 == u1_part_type))
2418     {
2419         cu_inter_cand_t *ps_cand = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
2420                                         [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
2421                                          ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
2422 
2423         (*ps_cand) = (*aps_cands[MERGE_DERIVED]);
2424 
2425         ps_cand->b1_skip_flag = 1;
2426 
2427         aps_cands[MIXED_MODE_TYPE1] = ps_cand;
2428         au4_cost[MIXED_MODE_TYPE1][0] = (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST
2429                                                                                : INT_MAX;
2430     }
2431 
2432     /* Sort and populate */
2433     u1_skip_or_merge_cand_is_valid = ihevce_merge_cands_with_existing_best(
2434         ps_cu_mode_info,
2435         aps_cands,
2436         as_mvp_winner,
2437         au4_cost,
2438         ppv_pred_buf_list,
2439         au1_final_pred_buf_id,
2440         &ps_pred_buf_info->u4_is_buf_in_use,
2441         &ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands,
2442         &ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands,
2443         &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands,
2444         &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands,
2445         au1_merge_pred_buf_idx_array,
2446         ps_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d,
2447 
2448         i4_pred_stride,
2449         i4_max_num_inter_rdopt_cands,
2450         u1_cu_size,
2451         u1_part_type,
2452         u1_eval_merge,
2453         u1_eval_skip,
2454         u1_num_bytes_per_pel);
2455 
2456     return u1_skip_or_merge_cand_is_valid;
2457 }
2458 
ihevce_redundant_candidate_pruner(inter_cu_mode_info_t * ps_inter_cu_mode_info)2459 static __inline void ihevce_redundant_candidate_pruner(inter_cu_mode_info_t *ps_inter_cu_mode_info)
2460 {
2461     WORD8 i, j;
2462     WORD8 i1_num_merge_vs_mvds;
2463 
2464     UWORD8 au1_redundant_cand_indices[MAX_NUM_INTER_RDO_CANDS] = { 0 };
2465 
2466     for(i = 0; i < (ps_inter_cu_mode_info->u1_num_inter_cands - 1); i++)
2467     {
2468         if(au1_redundant_cand_indices[i] || ps_inter_cu_mode_info->aps_cu_data[i]->b1_skip_flag)
2469         {
2470             continue;
2471         }
2472 
2473         for(j = i + 1; j < ps_inter_cu_mode_info->u1_num_inter_cands; j++)
2474         {
2475             if(au1_redundant_cand_indices[j] || ps_inter_cu_mode_info->aps_cu_data[j]->b1_skip_flag)
2476             {
2477                 continue;
2478             }
2479 
2480             i1_num_merge_vs_mvds = 0;
2481 
2482             if(ps_inter_cu_mode_info->aps_cu_data[j]->b3_part_size ==
2483                ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2484             {
2485                 if(ihevce_compare_pu_mv_t(
2486                        &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->mv,
2487                        &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->mv,
2488                        ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b2_pred_mode,
2489                        ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b2_pred_mode))
2490                 {
2491                     i1_num_merge_vs_mvds +=
2492                         ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b1_merge_flag -
2493                         ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b1_merge_flag;
2494 
2495                     if(ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2496                     {
2497                         if(ihevce_compare_pu_mv_t(
2498                                &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].mv,
2499                                &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].mv,
2500                                ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b2_pred_mode,
2501                                ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b2_pred_mode))
2502                         {
2503                             i1_num_merge_vs_mvds +=
2504                                 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b1_merge_flag -
2505                                 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b1_merge_flag;
2506                         }
2507                     }
2508                 }
2509             }
2510 
2511             if(i1_num_merge_vs_mvds != 0)
2512             {
2513                 au1_redundant_cand_indices[(i1_num_merge_vs_mvds > 0) ? j : i] = 1;
2514             }
2515         }
2516     }
2517 
2518     for(i = 0; i < ps_inter_cu_mode_info->u1_num_inter_cands; i++)
2519     {
2520         if(au1_redundant_cand_indices[i])
2521         {
2522             memmove(
2523                 &ps_inter_cu_mode_info->aps_cu_data[i],
2524                 &ps_inter_cu_mode_info->aps_cu_data[i + 1],
2525                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2526                     sizeof(ps_inter_cu_mode_info->aps_cu_data[i]));
2527 
2528             memmove(
2529                 &ps_inter_cu_mode_info->au4_cost[i],
2530                 &ps_inter_cu_mode_info->au4_cost[i + 1],
2531                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2532                     sizeof(ps_inter_cu_mode_info->au4_cost[i]));
2533 
2534             memmove(
2535                 &ps_inter_cu_mode_info->au1_pred_buf_idx[i],
2536                 &ps_inter_cu_mode_info->au1_pred_buf_idx[i + 1],
2537                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2538                     sizeof(ps_inter_cu_mode_info->au1_pred_buf_idx[i]));
2539 
2540             memmove(
2541                 &au1_redundant_cand_indices[i],
2542                 &au1_redundant_cand_indices[i + 1],
2543                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2544                     sizeof(au1_redundant_cand_indices[i]));
2545 
2546             ps_inter_cu_mode_info->u1_num_inter_cands--;
2547             i--;
2548         }
2549     }
2550 }
2551 
2552 /*!
2553 ******************************************************************************
2554 * \if Function name : ihevce_inter_cand_sifter \endif
2555 *
2556 * \brief
2557 *    Selects the best inter candidate modes amongst ME, merge,
2558 *    skip and mixed modes. Also computes corresponding preds
2559 *
2560 * \author
2561 *  Ittiam
2562 *
2563 *****************************************************************************
2564 */
ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t * ps_ctxt)2565 void ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t *ps_ctxt)
2566 {
2567     PF_SAD_FXN_T pf_sad_func;
2568 
2569     UWORD8 au1_final_cand_idx[MAX_INTER_CU_CANDIDATES];
2570     UWORD8 au1_part_types_evaluated[MAX_INTER_CU_CANDIDATES];
2571     UWORD8 u1_num_unique_parts;
2572     UWORD8 i, j;
2573     UWORD32 au4_src_variance[NUM_INTER_PU_PARTS];
2574     WORD32 i4_mean;
2575 
2576     cu_inter_cand_t *ps_me_cands = ps_ctxt->ps_me_cands;
2577     inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2578 
2579     UWORD8 u1_diff_skip_cand_flag = 1;
2580     WORD8 i1_skip_cand_from_merge_idx = -1;
2581     WORD8 i1_final_skip_cand_merge_idx = -1;
2582     UWORD8 u1_max_num_part_types_to_select = MAX_INTER_CU_CANDIDATES;
2583     UWORD8 u1_num_me_cands = ps_ctxt->u1_num_me_cands;
2584     UWORD8 u1_num_parts_evaluated_for_merge = 0;
2585     UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2586 
2587     if((ps_ctxt->u1_quality_preset >= IHEVCE_QUALITY_P3) && (ps_ctxt->i1_slice_type == BSLICE))
2588     {
2589         u1_max_num_part_types_to_select = 1;
2590     }
2591 
2592     {
2593         pf_sad_func = (ps_ctxt->u1_use_satd_for_merge_eval) ? compute_satd_8bit
2594                                                             : ps_ctxt->pf_evalsad_pt_npu_mxn_8bit;
2595     }
2596 
2597     u1_num_unique_parts = ihevce_get_num_part_types_in_me_cand_list(
2598         ps_me_cands,
2599         au1_part_types_evaluated,
2600         au1_final_cand_idx,
2601         &u1_diff_skip_cand_flag,
2602         &i1_skip_cand_from_merge_idx,
2603         &i1_final_skip_cand_merge_idx,
2604         u1_max_num_part_types_to_select,
2605         u1_num_me_cands);
2606 
2607     if((u1_num_me_cands + u1_diff_skip_cand_flag) && u1_is_cu_noisy &&
2608        ps_ctxt->i4_alpha_stim_multiplier)
2609     {
2610         ihevce_calc_variance(
2611             ps_ctxt->pv_src,
2612             ps_ctxt->i4_src_strd,
2613             &i4_mean,
2614             &ps_cu_mode_info->u4_src_variance,
2615             ps_ctxt->u1_cu_size,
2616             ps_ctxt->u1_cu_size,
2617             ps_ctxt->u1_is_hbd,
2618             0);
2619     }
2620 
2621     if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2622     {
2623         u1_diff_skip_cand_flag = 0;
2624     }
2625     else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2626     {
2627         if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2628         {
2629             u1_diff_skip_cand_flag = 0;
2630         }
2631     }
2632 
2633     for(i = 0; i < u1_num_me_cands + u1_diff_skip_cand_flag; i++)
2634     {
2635         UWORD8 u1_part_type;
2636         UWORD8 u1_eval_skip;
2637         UWORD8 u1_eval_merge;
2638         UWORD8 u1_valid_cand;
2639 
2640         if(i == u1_num_me_cands)
2641         {
2642             u1_eval_skip = 1;
2643             u1_eval_merge = 0;
2644             u1_part_type = 0;
2645         }
2646         else
2647         {
2648             u1_eval_skip = 0;
2649             u1_part_type = ps_me_cands[i].b3_part_size;
2650 
2651             if(u1_num_parts_evaluated_for_merge >= u1_num_unique_parts)
2652             {
2653                 u1_eval_merge = 0;
2654                 u1_num_parts_evaluated_for_merge = u1_num_unique_parts;
2655             }
2656             else
2657             {
2658                 u1_eval_merge = (i == au1_final_cand_idx[u1_num_parts_evaluated_for_merge]);
2659             }
2660 
2661             for(j = 0; (j < u1_num_parts_evaluated_for_merge) && (u1_eval_merge); j++)
2662             {
2663                 if(u1_part_type == au1_part_types_evaluated[j])
2664                 {
2665                     u1_eval_merge = 0;
2666                     break;
2667                 }
2668             }
2669         }
2670 
2671         if(u1_is_cu_noisy && u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2672         {
2673             void *pv_src = ps_ctxt->pv_src;
2674             UWORD8 u1_pu_wd = (ps_me_cands[i].as_inter_pu[0].b4_wd + 1) << 2;
2675             UWORD8 u1_pu_ht = (ps_me_cands[i].as_inter_pu[0].b4_ht + 1) << 2;
2676 
2677             ihevce_calc_variance(
2678                 pv_src,
2679                 ps_ctxt->i4_src_strd,
2680                 &i4_mean,
2681                 &au4_src_variance[0],
2682                 u1_pu_ht,
2683                 u1_pu_wd,
2684                 ps_ctxt->u1_is_hbd,
2685                 0);
2686 
2687             pv_src = (void *) (((UWORD8 *) pv_src) +
2688                 ((ps_ctxt->u1_cu_size == u1_pu_wd) ? ps_ctxt->i4_src_strd * u1_pu_ht : u1_pu_wd)
2689                 * (ps_ctxt->u1_is_hbd + 1));
2690             u1_pu_wd = (ps_me_cands[i].as_inter_pu[1].b4_wd + 1) << 2;
2691             u1_pu_ht = (ps_me_cands[i].as_inter_pu[1].b4_ht + 1) << 2;
2692 
2693             ihevce_calc_variance(
2694                 pv_src,
2695                 ps_ctxt->i4_src_strd,
2696                 &i4_mean,
2697                 &au4_src_variance[1],
2698                 u1_pu_ht,
2699                 u1_pu_wd,
2700                 ps_ctxt->u1_is_hbd,
2701                 0);
2702         }
2703         else if(u1_is_cu_noisy && !u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2704         {
2705             au4_src_variance[0] = ps_cu_mode_info->u4_src_variance;
2706         }
2707 
2708         if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2709         {
2710             u1_eval_merge = 0;
2711         }
2712         else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2713         {
2714             if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2715             {
2716                 u1_eval_merge = 0;
2717             }
2718         }
2719 
2720         u1_valid_cand = ihevce_compute_pred_and_populate_modes(
2721             ps_ctxt,
2722             pf_sad_func,
2723             au4_src_variance,
2724             u1_part_type,
2725             MIN(i, (u1_num_me_cands - 1)),
2726             u1_eval_merge,
2727             u1_eval_skip);
2728 
2729         u1_num_parts_evaluated_for_merge += u1_eval_merge;
2730 
2731         /* set the neighbour map to 0 */
2732         if(u1_part_type)
2733         {
2734             ihevce_set_nbr_map(
2735                 ps_ctxt->pu1_ctb_nbr_map,
2736                 ps_ctxt->i4_ctb_nbr_map_stride,
2737                 (ps_ctxt->u1_cu_pos_x >> 2),
2738                 (ps_ctxt->u1_cu_pos_y >> 2),
2739                 (ps_ctxt->u1_cu_size >> 2),
2740                 0);
2741         }
2742     }
2743 
2744     ihevce_redundant_candidate_pruner(ps_ctxt->ps_inter_cu_mode_info);
2745 }
2746