1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*!
22 ******************************************************************************
23 * \file ihevce_me_pass.c
24 *
25 * \brief
26 *    Converts the language of the encoder to language of me. This is an i/f
27 *    between the encoder style APIs and ME style APIs. This is basically
28 *    a memoryless glue layer.
29 *
30 * \date
31 *    22/10/2012
32 *
33 * \author
34 *    Ittiam
35 *
36 *
37 * List of Functions
38 *
39 *
40 ******************************************************************************
41 */
42 
43 /*****************************************************************************/
44 /* File Includes                                                             */
45 /*****************************************************************************/
46 /* System include files */
47 #include <stdio.h>
48 #include <string.h>
49 #include <stdlib.h>
50 #include <assert.h>
51 #include <stdarg.h>
52 #include <math.h>
53 
54 /* User include files */
55 #include "ihevc_typedefs.h"
56 #include "itt_video_api.h"
57 #include "ihevce_api.h"
58 
59 #include "rc_cntrl_param.h"
60 #include "rc_frame_info_collector.h"
61 #include "rc_look_ahead_params.h"
62 
63 #include "ihevc_debug.h"
64 #include "ihevc_defs.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 
81 #include "ihevce_defs.h"
82 #include "ihevce_lap_enc_structs.h"
83 #include "ihevce_multi_thrd_structs.h"
84 #include "ihevce_me_common_defs.h"
85 #include "ihevce_had_satd.h"
86 #include "ihevce_error_codes.h"
87 #include "ihevce_bitstream.h"
88 #include "ihevce_cabac.h"
89 #include "ihevce_rdoq_macros.h"
90 #include "ihevce_function_selector.h"
91 #include "ihevce_enc_structs.h"
92 #include "ihevce_entropy_structs.h"
93 #include "ihevce_cmn_utils_instr_set_router.h"
94 #include "ihevce_enc_loop_structs.h"
95 #include "ihevce_inter_pred.h"
96 
97 #include "hme_datatype.h"
98 #include "hme_interface.h"
99 #include "hme_common_defs.h"
100 #include "hme_defs.h"
101 #include "ihevce_me_instr_set_router.h"
102 #include "hme_utils.h"
103 #include "hme_coarse.h"
104 #include "hme_refine.h"
105 #include "hme_function_selector.h"
106 #include "ihevce_me_pass.h"
107 
108 #include "cast_types.h"
109 #include "osal.h"
110 #include "osal_defaults.h"
111 
112 /*****************************************************************************/
113 /* Macros                                                                    */
114 /*****************************************************************************/
115 
116 /** orig simple five tap scaler */
117 #define FIVE_TAP_ORIG_SCALER 0
118 
119 /** simple gaussian filter, blurs the image a bit */
120 #define SIMPLE_GAUSSIAN_SCALER 0
121 
122 /** lanczos scaler gives sharper images           */
123 #define LANCZOS_SCALER 1
124 
125 // Saturated addition z = x + y
126 // overflow condition: z<x or z<y
127 #define SATURATED_ADD(z, x, y)                                                                     \
128     {                                                                                              \
129         (z) = (x) + (y);                                                                           \
130         if(((z) < (x)) || ((z) < (y)))                                                             \
131             (z) = MAX_INTRA_COST_IPE;                                                              \
132     }
133 
134 #define SATURATED_SUB(z, x, y)                                                                     \
135     {                                                                                              \
136         (z) = (x) - (y);                                                                           \
137         if((z) < 0) /*if (((z) > (x)) || ((z) > (y))) */                                           \
138             (z) = 0;                                                                               \
139     }
140 
141 #if(FIVE_TAP_ORIG_SCALER + SIMPLE_GAUSSIAN_SCALER + LANCZOS_SCALER) > 1
142 #error "HME ERROR: Only one scaler can be enabled at a time"
143 #endif
144 
145 /*****************************************************************************/
146 /* Function Definitions                                                      */
147 /*****************************************************************************/
148 
149 /*!
150 ******************************************************************************
151 * \if Function name : ihevce_me_get_num_mem_recs \endif
152 *
153 * \brief
154 *    Number of memory records are returned for ME module
155 *    Note : Include TOT MEM. req. for ME + TOT MEM. req. for Dep Mngr for L0 ME
156 *
157 * \return
158 *    Number of memory records
159 *
160 * \author
161 *  Ittiam
162 *
163 *****************************************************************************
164 */
ihevce_me_get_num_mem_recs(WORD32 i4_num_me_frm_pllel)165 WORD32 ihevce_me_get_num_mem_recs(WORD32 i4_num_me_frm_pllel)
166 {
167     WORD32 me_mem_recs = hme_enc_num_alloc(i4_num_me_frm_pllel);
168 
169     return (me_mem_recs);
170 }
171 
ihevce_derive_me_init_prms(ihevce_static_cfg_params_t * ps_init_prms,hme_init_prms_t * ps_hme_init_prms,S32 i4_num_proc_thrds,S32 i4_resolution_id)172 void ihevce_derive_me_init_prms(
173     ihevce_static_cfg_params_t *ps_init_prms,
174     hme_init_prms_t *ps_hme_init_prms,
175     S32 i4_num_proc_thrds,
176     S32 i4_resolution_id)
177 {
178     WORD32 i4_field_pic = ps_init_prms->s_src_prms.i4_field_pic;
179     WORD32 min_cu_size;
180 
181     /* max number of ref frames. This should be > ref frms sent any frm */
182     ps_hme_init_prms->max_num_ref = ((DEFAULT_MAX_REFERENCE_PICS) << i4_field_pic);
183 
184     /* get the min cu size from config params */
185     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
186 
187     min_cu_size = 1 << min_cu_size;
188 
189     /* Width and height for the layer being encoded */
190     ps_hme_init_prms->a_wd[0] =
191         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
192         SET_CTB_ALIGN(
193             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
194 
195     ps_hme_init_prms->a_ht[0] =
196         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
197         SET_CTB_ALIGN(
198             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
199 
200     /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
201     ps_hme_init_prms->max_num_results_coarse = 4;
202 
203     /* Every refinement layer stores a max of 2 results per partition */
204     ps_hme_init_prms->max_num_results = 2;
205 
206     /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
207     /* frames in all but final layer In final layer, it could be 1/2 */
208     ps_hme_init_prms->num_layers_explicit_search = 3;
209 
210     /* Populate the max_tr_depth for Inter */
211     ps_hme_init_prms->u1_max_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
212 
213     ps_hme_init_prms->log_ctb_size = ps_init_prms->s_config_prms.i4_max_log2_cu_size;
214     ASSERT(ps_hme_init_prms->log_ctb_size == 6);
215 
216     /* currently encoding only 1 layer */
217     ps_hme_init_prms->num_simulcast_layers = 1;
218 
219     /* this feature not yet supported */
220     ps_hme_init_prms->segment_higher_layers = 0;
221 
222     /* Allow 4x4 in refinement layers. Unconditionally enabled in coarse lyr */
223     /* And not enabled in encode layers, this is just for intermediate refine*/
224     /* layers, where it could be used for better accuracy of motion.         */
225 
226 #if !OLD_XTREME_SPEED
227     if((IHEVCE_QUALITY_P6 ==
228         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
229        (IHEVCE_QUALITY_P7 ==
230         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
231        (IHEVCE_QUALITY_P5 ==
232         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
233        (IHEVCE_QUALITY_P4 ==
234         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset))
235         ps_hme_init_prms->use_4x4 = 0;
236     else
237         ps_hme_init_prms->use_4x4 = 1;
238 #else
239     ps_hme_init_prms->use_4x4 = 1;
240 #endif
241 
242     ps_hme_init_prms->num_b_frms =
243         (1 << ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1;
244 
245     ps_hme_init_prms->i4_num_proc_thrds = i4_num_proc_thrds;
246 
247     if(IHEVCE_QUALITY_P0 ==
248        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
249     {
250         ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_PRISTINE_QUALITY;
251         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
252         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
253     }
254     else if(
255         IHEVCE_QUALITY_P2 ==
256         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
257     {
258         ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_QUALITY;
259         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
260         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
261     }
262     else if(
263         IHEVCE_QUALITY_P3 ==
264         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
265     {
266         ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_MEDIUM_SPEED;
267         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 2;
268         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 2;
269     }
270     else if(
271         IHEVCE_QUALITY_P4 ==
272         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
273     {
274         ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_SPEED;
275         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
276         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
277     }
278     else if(
279         IHEVCE_QUALITY_P5 ==
280         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
281     {
282         ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED;
283         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
284         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
285     }
286     else if(
287         IHEVCE_QUALITY_P6 ==
288         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
289     {
290         ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
291         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
292         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
293     }
294     else if(
295         IHEVCE_QUALITY_P7 ==
296         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
297     {
298         ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
299         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
300         ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 0;
301     }
302 
303     ps_hme_init_prms->s_me_coding_tools.u1_l0_me_controlled_via_cmd_line = 0;
304 
305     /* Register the search range params from static params */
306     ps_hme_init_prms->max_horz_search_range = ps_init_prms->s_config_prms.i4_max_search_range_horz;
307     ps_hme_init_prms->max_vert_search_range = ps_init_prms->s_config_prms.i4_max_search_range_vert;
308     ps_hme_init_prms->e_arch_type = ps_init_prms->e_arch_type;
309     ps_hme_init_prms->is_interlaced = (ps_init_prms->s_src_prms.i4_field_pic == IV_INTERLACED);
310 
311     ps_hme_init_prms->u1_is_stasino_enabled =
312         ((ps_init_prms->s_coding_tools_prms.i4_vqet &
313           (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
314          (ps_init_prms->s_coding_tools_prms.i4_vqet &
315           (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
316 }
317 
318 /*!
319 ******************************************************************************
320 * \if Function name : ihevce_me_get_mem_recs \endif
321 *
322 * \brief
323 *    Memory requirements are returned for ME.
324 *
325 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
326 * \param[in] ps_init_prms : Create time static parameters
327 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
328 * \param[in] i4_mem_space : memspace in whihc memory request should be done
329 *
330 * \return
331 *    Number of records
332 *
333 * \author
334 *  Ittiam
335 *
336 *****************************************************************************
337 */
ihevce_me_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_mem_space,WORD32 i4_resolution_id,WORD32 i4_num_me_frm_pllel)338 WORD32 ihevce_me_get_mem_recs(
339     iv_mem_rec_t *ps_mem_tab,
340     ihevce_static_cfg_params_t *ps_init_prms,
341     WORD32 i4_num_proc_thrds,
342     WORD32 i4_mem_space,
343     WORD32 i4_resolution_id,
344     WORD32 i4_num_me_frm_pllel)
345 {
346     hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
347     WORD32 n_tabs, i;
348 
349     /* Init prms structure specific to HME */
350     hme_init_prms_t s_hme_init_prms;
351 
352     /*************************************************************************/
353     /* code flow: we call hme alloc function and then remap those memtabs    */
354     /* to a different type of memtab structure.                              */
355     /*************************************************************************/
356     if(i4_num_me_frm_pllel > 1)
357     {
358         ASSERT(MAX_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
359     }
360     else
361     {
362         ASSERT(MIN_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
363     }
364 
365     /*************************************************************************/
366     /* POPULATE THE HME INIT PRMS                                            */
367     /*************************************************************************/
368     ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
369 
370     /*************************************************************************/
371     /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
372     /*************************************************************************/
373     n_tabs = hme_enc_alloc(&as_memtabs[0], &s_hme_init_prms, i4_num_me_frm_pllel);
374     ASSERT(n_tabs == hme_enc_num_alloc(i4_num_me_frm_pllel));
375 
376     /*************************************************************************/
377     /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE                             */
378     /*************************************************************************/
379     for(i = 0; i < n_tabs; i++)
380     {
381         ps_mem_tab[i].i4_mem_size = as_memtabs[i].size;
382         ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align;
383         ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
384         ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t);
385     }
386 
387     /*************************************************************************/
388     /* --- L0 ME sync Dep Mngr Mem requests --                               */
389     /*************************************************************************/
390     ps_mem_tab += n_tabs;
391 
392     return (n_tabs);
393 }
394 
395 /*!
396 ******************************************************************************
397 * \if Function name : ihevce_me_init \endif
398 *
399 * \brief
400 *    Intialization for ME context state structure .
401 *
402 * \param[in] ps_mem_tab : pointer to memory descriptors table
403 * \param[in] ps_init_prms : Create time static parameters
404 * \param[in] pv_osal_handle : Osal handle
405 *
406 * \return
407 *    Handle to the ME context
408 *
409 * \author
410 *  Ittiam
411 *
412 *****************************************************************************
413 */
ihevce_me_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,rc_quant_t * ps_rc_quant_ctxt,void * pv_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_me_frm_pllel,UWORD8 u1_is_popcnt_available)414 void *ihevce_me_init(
415     iv_mem_rec_t *ps_mem_tab,
416     ihevce_static_cfg_params_t *ps_init_prms,
417     WORD32 i4_num_proc_thrds,
418     void *pv_osal_handle,
419     rc_quant_t *ps_rc_quant_ctxt,
420     void *pv_tile_params_base,
421     WORD32 i4_resolution_id,
422     WORD32 i4_num_me_frm_pllel,
423     UWORD8 u1_is_popcnt_available)
424 {
425     /* ME handle to be returned */
426     void *pv_me_ctxt;
427     WORD32 status;
428     me_master_ctxt_t *ps_me_ctxt;
429     IV_ARCH_T e_arch_type;
430 
431     /* Init prms structure specific to HME */
432     hme_init_prms_t s_hme_init_prms;
433 
434     /* memtabs to be passed to hme */
435     hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
436     WORD32 n_tabs, i;
437 
438     /*************************************************************************/
439     /* POPULATE THE HME INIT PRMS                                            */
440     /*************************************************************************/
441     ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
442 
443     /*************************************************************************/
444     /* Ensure local declaration is sufficient                                */
445     /*************************************************************************/
446     n_tabs = hme_enc_num_alloc(i4_num_me_frm_pllel);
447 
448     if(i4_num_me_frm_pllel > 1)
449     {
450         ASSERT(MAX_HME_ENC_TOT_MEMTABS >= n_tabs);
451     }
452     else
453     {
454         ASSERT(MIN_HME_ENC_TOT_MEMTABS >= n_tabs);
455     }
456 
457     /*************************************************************************/
458     /* MAP RESULTS TO HME MEMTAB STRUCTURE                                   */
459     /*************************************************************************/
460     for(i = 0; i < n_tabs; i++)
461     {
462         as_memtabs[i].size = ps_mem_tab[i].i4_mem_size;
463         as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment;
464         as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base;
465     }
466     /*************************************************************************/
467     /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
468     /*************************************************************************/
469     pv_me_ctxt = (void *)as_memtabs[0].pu1_mem;
470     ps_me_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
471     /* Store Tile params base into ME context */
472     ps_me_ctxt->pv_tile_params_base = pv_tile_params_base;
473 
474     status = hme_enc_init(
475         pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms, ps_rc_quant_ctxt, i4_num_me_frm_pllel);
476 
477     if(status == -1)
478         return NULL;
479 
480     /*************************************************************************/
481     /* --- L0 ME sync Dep Mngr Mem init --                                     */
482     /*************************************************************************/
483     /* Update numer of ME frames running in parallel in me master context */
484     ps_me_ctxt->i4_num_me_frm_pllel = i4_num_me_frm_pllel;
485 
486     e_arch_type = ps_init_prms->e_arch_type;
487 
488     hme_init_function_ptr(ps_me_ctxt, e_arch_type);
489 
490     ihevce_me_instr_set_router(
491         (ihevce_me_optimised_function_list_t *)ps_me_ctxt->pv_me_optimised_function_list,
492         e_arch_type);
493 
494     ihevce_cmn_utils_instr_set_router(
495         &ps_me_ctxt->s_cmn_opt_func, u1_is_popcnt_available, e_arch_type);
496 
497     ps_mem_tab += n_tabs;
498 
499     return (pv_me_ctxt);
500 }
501 
502 /**
503 *******************************************************************************
504 * \if Function name : ihevce_me_set_resolution \endif
505 *
506 * \brief
507 *    Sets the resolution for ME state
508 *
509 * \par Description:
510 *    ME requires information of resolution to prime up its layer descriptors
511 *    and contexts. This API is called whenever a control call from application
512 *    causes a change of resolution. Has to be called once initially before
513 *    processing any frame. Again this is just a glue function and calls the
514 *    actual ME API for the same.
515 *
516 * \param[in,out] pv_me_ctxt: Handle to the ME context
517 * \param[in] n_enc_layers: Number of layers getting encoded
518 * \param[in] p_wd : Pointer containing widths of each layer getting encoded.
519 * \param[in] p_ht : Pointer containing heights of each layer getting encoded.
520 *
521 * \returns
522 *  none
523 *
524 * \author
525 *  Ittiam
526 *
527 *******************************************************************************
528 */
ihevce_me_set_resolution(void * pv_me_ctxt,WORD32 n_enc_layers,WORD32 * p_wd,WORD32 * p_ht)529 void ihevce_me_set_resolution(void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht)
530 {
531     /* local variables */
532     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
533     WORD32 thrds;
534     WORD32 i;
535 
536     for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
537     {
538         me_ctxt_t *ps_me_thrd_ctxt;
539 
540         ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
541 
542         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
543         {
544             hme_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht, i);
545         }
546     }
547 }
548 
ihevce_populate_me_ctb_data(me_ctxt_t * ps_ctxt,me_frm_ctxt_t * ps_frm_ctxt,cur_ctb_cu_tree_t * ps_cu_tree,me_ctb_data_t * ps_me_ctb_data,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)549 void ihevce_populate_me_ctb_data(
550     me_ctxt_t *ps_ctxt,
551     me_frm_ctxt_t *ps_frm_ctxt,
552     cur_ctb_cu_tree_t *ps_cu_tree,
553     me_ctb_data_t *ps_me_ctb_data,
554     CU_POS_T e_grandparent_blk_pos,
555     CU_POS_T e_parent_blk_pos,
556     CU_POS_T e_cur_blk_pos)
557 {
558     inter_cu_results_t *ps_cu_results;
559 
560     switch(ps_cu_tree->u1_cu_size)
561     {
562     case 64:
563     {
564         block_data_64x64_t *ps_data = &ps_me_ctb_data->s_64x64_block_data;
565 
566         ps_cu_results = &ps_frm_ctxt->s_cu64x64_results;
567         ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
568                                                                 : 0;
569 
570         break;
571     }
572     case 32:
573     {
574         block_data_32x32_t *ps_data = &ps_me_ctb_data->as_32x32_block_data[e_cur_blk_pos];
575 
576         ps_cu_results = &ps_frm_ctxt->as_cu32x32_results[e_cur_blk_pos];
577         ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
578                                                                 : 0;
579 
580         break;
581     }
582     case 16:
583     {
584         WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2);
585 
586         block_data_16x16_t *ps_data = &ps_me_ctb_data->as_block_data[i4_blk_id];
587 
588         ps_cu_results = &ps_frm_ctxt->as_cu16x16_results[i4_blk_id];
589         ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
590                                                                 : 0;
591 
592         break;
593     }
594     case 8:
595     {
596         WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2) + (e_grandparent_blk_pos << 4);
597 
598         block_data_8x8_t *ps_data = &ps_me_ctb_data->as_8x8_block_data[i4_blk_id];
599 
600         ps_cu_results = &ps_frm_ctxt->as_cu8x8_results[i4_blk_id];
601         ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
602                                                                 : 0;
603 
604         break;
605     }
606     }
607 
608     if(ps_cu_tree->is_node_valid)
609     {
610         if((ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY) &&
611            (ps_cu_tree->u1_cu_size != 8))
612         {
613             ihevce_populate_me_ctb_data(
614                 ps_ctxt,
615                 ps_frm_ctxt,
616                 ps_cu_tree->ps_child_node_tl,
617                 ps_me_ctb_data,
618                 e_parent_blk_pos,
619                 e_cur_blk_pos,
620                 POS_TL);
621 
622             ihevce_populate_me_ctb_data(
623                 ps_ctxt,
624                 ps_frm_ctxt,
625                 ps_cu_tree->ps_child_node_tr,
626                 ps_me_ctb_data,
627                 e_parent_blk_pos,
628                 e_cur_blk_pos,
629                 POS_TR);
630 
631             ihevce_populate_me_ctb_data(
632                 ps_ctxt,
633                 ps_frm_ctxt,
634                 ps_cu_tree->ps_child_node_bl,
635                 ps_me_ctb_data,
636                 e_parent_blk_pos,
637                 e_cur_blk_pos,
638                 POS_BL);
639 
640             ihevce_populate_me_ctb_data(
641                 ps_ctxt,
642                 ps_frm_ctxt,
643                 ps_cu_tree->ps_child_node_br,
644                 ps_me_ctb_data,
645                 e_parent_blk_pos,
646                 e_cur_blk_pos,
647                 POS_BR);
648         }
649     }
650     else if(ps_cu_tree->u1_cu_size != 8)
651     {
652         ihevce_populate_me_ctb_data(
653             ps_ctxt,
654             ps_frm_ctxt,
655             ps_cu_tree->ps_child_node_tl,
656             ps_me_ctb_data,
657             e_parent_blk_pos,
658             e_cur_blk_pos,
659             POS_TL);
660 
661         ihevce_populate_me_ctb_data(
662             ps_ctxt,
663             ps_frm_ctxt,
664             ps_cu_tree->ps_child_node_tr,
665             ps_me_ctb_data,
666             e_parent_blk_pos,
667             e_cur_blk_pos,
668             POS_TR);
669 
670         ihevce_populate_me_ctb_data(
671             ps_ctxt,
672             ps_frm_ctxt,
673             ps_cu_tree->ps_child_node_bl,
674             ps_me_ctb_data,
675             e_parent_blk_pos,
676             e_cur_blk_pos,
677             POS_BL);
678 
679         ihevce_populate_me_ctb_data(
680             ps_ctxt,
681             ps_frm_ctxt,
682             ps_cu_tree->ps_child_node_br,
683             ps_me_ctb_data,
684             e_parent_blk_pos,
685             e_cur_blk_pos,
686             POS_BR);
687     }
688 }
689 
ihevce_me_update_ctb_results(void * pv_me_ctxt,void * pv_me_frm_ctxt,WORD32 i4_ctb_x,WORD32 i4_ctb_y)690 void ihevce_me_update_ctb_results(
691     void *pv_me_ctxt, void *pv_me_frm_ctxt, WORD32 i4_ctb_x, WORD32 i4_ctb_y)
692 {
693     ctb_analyse_t *ps_ctb_out;
694     cur_ctb_cu_tree_t *ps_cu_tree;
695     me_ctb_data_t *ps_me_ctb_data;
696 
697     me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
698     me_frm_ctxt_t *ps_frm_ctxt = (me_frm_ctxt_t *)pv_me_frm_ctxt;
699 
700     ps_ctb_out = ps_frm_ctxt->ps_ctb_analyse_curr_row + i4_ctb_x;
701 
702     ps_me_ctb_data = ps_frm_ctxt->ps_me_ctb_data_curr_row + i4_ctb_x;
703     ps_cu_tree = ps_frm_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
704 
705     ps_ctb_out->ps_cu_tree = ps_cu_tree;
706     ps_ctb_out->ps_me_ctb_data = ps_me_ctb_data;
707 
708     ihevce_populate_me_ctb_data(
709         ps_ctxt, ps_frm_ctxt, ps_cu_tree, ps_me_ctb_data, POS_NA, POS_NA, POS_NA);
710 }
711 
ihevce_me_find_poc_in_list(recon_pic_buf_t ** pps_rec_list,WORD32 poc,WORD32 i4_idr_gop_num,WORD32 num_ref)712 WORD32 ihevce_me_find_poc_in_list(
713     recon_pic_buf_t **pps_rec_list, WORD32 poc, WORD32 i4_idr_gop_num, WORD32 num_ref)
714 {
715     WORD32 i;
716 
717     for(i = 0; i < num_ref; i++)
718     {
719         if(pps_rec_list[i]->i4_poc == poc && pps_rec_list[i]->i4_idr_gop_num == i4_idr_gop_num)
720             return (i);
721     }
722 
723     /* should never come here */
724     ASSERT(0);
725     return (-1);
726 }
ihevc_me_update_ref_desc(hme_ref_desc_t * ps_ref_desc,recon_pic_buf_t * ps_recon_pic,WORD32 ref_id_l0,WORD32 ref_id_l1,WORD32 ref_id_lc,WORD32 is_fwd)727 void ihevc_me_update_ref_desc(
728     hme_ref_desc_t *ps_ref_desc,
729     recon_pic_buf_t *ps_recon_pic,
730     WORD32 ref_id_l0,
731     WORD32 ref_id_l1,
732     WORD32 ref_id_lc,
733     WORD32 is_fwd)
734 {
735     hme_ref_buf_info_t *ps_ref_info = &ps_ref_desc->as_ref_info[0];
736     iv_enc_yuv_buf_t *ps_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc;
737     iv_enc_yuv_buf_t *ps_src_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc_src;
738     S32 offset;
739 
740     /* Padding beyond 64 is not of use to ME */
741     ps_ref_info->u1_pad_x = MIN(64, PAD_HORZ);
742     ps_ref_info->u1_pad_y = MIN(64, PAD_VERT);
743 
744     /* Luma stride and offset. Assuming here that supplied ptr is */
745     /* 0, 0 position and hence setting offset to 0. In fact, it is */
746     /* not used inside ME as of now.                               */
747     ps_ref_info->luma_stride = ps_yuv_desc->i4_y_strd;
748     ps_ref_info->luma_offset = 0;
749 
750     /* 4 planes, fxfy is the direct recon buf, others are from subpel planes */
751     //offset = ps_ref_info->luma_stride * PAD_VERT + PAD_HORZ;
752     offset = 0;
753     ps_ref_info->pu1_rec_fxfy = (UWORD8 *)ps_yuv_desc->pv_y_buf + offset;
754     ps_ref_info->pu1_rec_hxfy = ps_recon_pic->apu1_y_sub_pel_planes[0] + offset;
755     ps_ref_info->pu1_rec_fxhy = ps_recon_pic->apu1_y_sub_pel_planes[1] + offset;
756     ps_ref_info->pu1_rec_hxhy = ps_recon_pic->apu1_y_sub_pel_planes[2] + offset;
757     ps_ref_info->pu1_ref_src = (UWORD8 *)ps_src_yuv_desc->pv_y_buf + offset;
758 
759     /* U V ptrs though they are not used */
760     ps_ref_info->pu1_rec_u = (U08 *)ps_yuv_desc->pv_u_buf;
761     ps_ref_info->pu1_rec_v = (U08 *)ps_yuv_desc->pv_v_buf;
762 
763     /* uv offsets and strides, same treatment sa luma */
764     ps_ref_info->chroma_offset = 0;
765     ps_ref_info->chroma_stride = ps_yuv_desc->i4_uv_strd;
766 
767     ps_ref_info->pv_dep_mngr = ps_recon_pic->pv_dep_mngr_recon;
768 
769     /* L0, L1 and LC id. */
770     ps_ref_desc->i1_ref_id_l0 = ref_id_l0;
771     ps_ref_desc->i1_ref_id_l1 = ref_id_l1;
772     ps_ref_desc->i1_ref_id_lc = ref_id_lc;
773 
774     /* POC of the ref pic */
775     ps_ref_desc->i4_poc = ps_recon_pic->i4_poc;
776 
777     /* Display num of the ref pic */
778     ps_ref_desc->i4_display_num = ps_recon_pic->i4_display_num;
779 
780     /* GOP number of the reference pic*/
781     ps_ref_desc->i4_GOP_num = ps_recon_pic->i4_idr_gop_num;
782 
783     /* Whether this picture is in past (fwd) or future (bck) */
784     ps_ref_desc->u1_is_fwd = is_fwd;
785 
786     /* store the weight and offsets fo refernce picture */
787     ps_ref_desc->i2_weight = ps_recon_pic->s_weight_offset.i2_luma_weight;
788     ps_ref_desc->i2_offset = ps_recon_pic->s_weight_offset.i2_luma_offset;
789 }
790 
791 /* Create the reference map for ME */
ihevce_me_create_ref_map(recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1,WORD32 num_ref_l0_active,WORD32 num_ref_l1_active,WORD32 num_ref,hme_ref_map_t * ps_ref_map)792 void ihevce_me_create_ref_map(
793     recon_pic_buf_t **pps_rec_list_l0,
794     recon_pic_buf_t **pps_rec_list_l1,
795     WORD32 num_ref_l0_active,
796     WORD32 num_ref_l1_active,
797     WORD32 num_ref,
798     hme_ref_map_t *ps_ref_map)
799 {
800     WORD32 min_ref, i, poc, ref_id_l0, ref_id_l1;
801 
802     /* tracks running count of ref pics */
803     WORD32 ref_count = 0, i4_idr_gop_num;
804 
805     /* points to One instance of a ref pic structure */
806     recon_pic_buf_t *ps_recon_pic;
807 
808     /* points to one instance of ref desc str used by ME */
809     hme_ref_desc_t *ps_ref_desc;
810 
811     min_ref = MIN(num_ref_l0_active, num_ref_l1_active);
812 
813     for(i = 0; i < min_ref; i++)
814     {
815         /* Create interleaved L0 and L1 entries */
816         ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
817         ps_recon_pic = pps_rec_list_l0[i];
818         poc = ps_recon_pic->i4_poc;
819         i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
820         ref_id_l0 = i;
821         ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
822         ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i, 1);
823 
824         ref_count++;
825 
826         ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
827         ps_recon_pic = pps_rec_list_l1[i];
828         poc = ps_recon_pic->i4_poc;
829         i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
830         ref_id_l1 = i;
831         ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
832         ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i + 1, 0);
833 
834         ref_count++;
835     }
836 
837     if(num_ref_l0_active > min_ref)
838     {
839         for(i = 0; i < (num_ref_l0_active - min_ref); i++)
840         {
841             ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
842             ref_id_l0 = i + min_ref;
843             ps_recon_pic = pps_rec_list_l0[ref_id_l0];
844             poc = ps_recon_pic->i4_poc;
845             i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
846             ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
847             ihevc_me_update_ref_desc(
848                 ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 1);
849             ref_count++;
850         }
851     }
852     else
853     {
854         for(i = 0; i < (num_ref_l1_active - min_ref); i++)
855         {
856             ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
857             ref_id_l1 = i + min_ref;
858             ps_recon_pic = pps_rec_list_l1[ref_id_l1];
859             poc = ps_recon_pic->i4_poc;
860             i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
861             ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
862             ihevc_me_update_ref_desc(
863                 ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 0);
864             ref_count++;
865         }
866     }
867 
868     ps_ref_map->i4_num_ref = ref_count;
869     ASSERT(ref_count == (num_ref_l0_active + num_ref_l1_active));
870 
871     /* TODO : Fill better values in lambda depending on ref dist */
872     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
873         ps_ref_map->as_ref_desc[i].lambda = 20;
874 }
875 
876 /*!
877 ******************************************************************************
878 * \if Function name : ihevce_me_process \endif
879 *
880 * \brief
881 *    Frame level ME function
882 *
883 * \par Description:
884 *    Processing of all layers starting from coarse and going
885 *    to the refinement layers, all layers
886 *    that are encoded go CTB by CTB. Outputs of this function are populated
887 *    ctb_analyse_t structures, one per CTB.
888 *
889 * \param[in] pv_ctxt : pointer to ME module
890 * \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
891 * \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
892 * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
893 * \param[in]  pd_intra_costs : pointerto intra cost buffer
894 * \param[in]  ps_multi_thrd_ctxt : pointer to multi thread ctxt
895 * \param[in]  thrd_id : Thread id of the current thrd in which function is executed
896 *
897 * \return
898 *    None
899 *
900 * \author
901 *  Ittiam
902 *
903 *****************************************************************************
904 */
ihevce_me_process(void * pv_me_ctxt,ihevce_lap_enc_buf_t * ps_enc_lap_inp,ctb_analyse_t * ps_ctb_out,me_enc_rdopt_ctxt_t * ps_cur_out_me_prms,double * pd_intra_costs,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse_ctb,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input,void * pv_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 i4_frame_parallelism_level,WORD32 thrd_id,WORD32 i4_me_frm_id)905 void ihevce_me_process(
906     void *pv_me_ctxt,
907     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
908     ctb_analyse_t *ps_ctb_out,
909     me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
910     double *pd_intra_costs,
911     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb,
912     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
913     void *pv_coarse_layer,
914     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
915     WORD32 i4_frame_parallelism_level,
916     WORD32 thrd_id,
917     WORD32 i4_me_frm_id)
918 {
919     me_ctxt_t *ps_thrd_ctxt;
920     me_frm_ctxt_t *ps_ctxt;
921 
922     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn;
923 
924     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
925     cur_ctb_cu_tree_t *ps_cu_tree_out = ps_cur_out_me_prms->ps_cur_ctb_cu_tree;
926     me_ctb_data_t *ps_me_ctb_data_out = ps_cur_out_me_prms->ps_cur_ctb_me_data;
927     layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
928 
929     pf_ext_update_fxn = (PF_EXT_UPDATE_FXN_T)ihevce_me_update_ctb_results;
930 
931     /* get the current thread ctxt pointer */
932     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id];
933     ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
934     ps_ctxt->thrd_id = thrd_id;
935 
936     /* store the ctb out and cu out base pointers */
937     ps_ctxt->ps_ctb_analyse_base = ps_ctb_out;
938 
939     ps_ctxt->ps_cu_tree_base = ps_cu_tree_out;
940     ps_ctxt->ps_ipe_l0_ctb_frm_base = ps_ipe_analyse_ctb;
941     ps_ctxt->ps_me_ctb_data_base = ps_me_ctb_data_out;
942     ps_ctxt->ps_func_selector = &ps_master_ctxt->s_func_selector;
943 
944     /** currently in master context. Copying that to me context **/
945     /* frame level processing function */
946     hme_process_frm(
947         (void *)ps_thrd_ctxt,
948         ps_l0_ipe_input,
949         &ps_master_ctxt->as_ref_map[i4_me_frm_id],
950         &pd_intra_costs,
951         &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
952         pf_ext_update_fxn,
953         ps_coarse_layer,
954         ps_multi_thrd_ctxt,
955         i4_frame_parallelism_level,
956         thrd_id,
957         i4_me_frm_id);
958 }
959 /*!
960 ******************************************************************************
961 * \if Function name : ihevce_me_frame_dpb_update \endif
962 *
963 * \brief
964 *    Frame level ME initialisation function
965 *
966 * \par Description:
967 *   Updation of ME's internal DPB
968 *    based on available ref list information
969 *
970 * \param[in] pv_ctxt : pointer to ME module
971 * \param[in] num_ref_l0 : Number of reference pics in L0 list
972 * \param[in] num_ref_l1 : Number of reference pics in L1 list
973 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
974 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
975 *
976 * \return
977 *    None
978 *
979 * \author
980 *  Ittiam
981 *
982 *****************************************************************************
983 */
ihevce_me_frame_dpb_update(void * pv_me_ctxt,WORD32 num_ref_l0,WORD32 num_ref_l1,recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1,WORD32 i4_thrd_id)984 void ihevce_me_frame_dpb_update(
985     void *pv_me_ctxt,
986     WORD32 num_ref_l0,
987     WORD32 num_ref_l1,
988     recon_pic_buf_t **pps_rec_list_l0,
989     recon_pic_buf_t **pps_rec_list_l1,
990     WORD32 i4_thrd_id)
991 {
992     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
993     me_ctxt_t *ps_thrd0_ctxt;
994     WORD32 a_pocs_to_remove[MAX_NUM_REF + 2];
995     WORD32 i, i4_is_buffer_full;
996     WORD32 i4_least_POC = 0x7FFFFFFF;
997     WORD32 i4_least_GOP_num = 0x7FFFFFFF;
998     me_ctxt_t *ps_ctxt;
999 
1000     /* All processing done using shared / common memory across */
1001     /* threads is done using thrd ctxt */
1002     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
1003 
1004     ps_ctxt = (me_ctxt_t *)ps_thrd0_ctxt;
1005     a_pocs_to_remove[0] = INVALID_POC;
1006     /*************************************************************************/
1007     /* Updation of ME's DPB list. This involves the following steps:         */
1008     /* 1. Obtain list of active POCs maintained within ME.                   */
1009     /* 2. Search each of them in the ref list. Whatever is not found goes to */
1010     /*     the list to be removed. Note: a_pocs_buffered_in_me holds the     */
1011     /*    currently active POC list within ME. a_pocs_to_remove holds the    */
1012     /*    list of POCs to be removed, terminated by -1.                      */
1013     /*************************************************************************/
1014     i4_is_buffer_full =
1015         hme_get_active_pocs_list((void *)ps_thrd0_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
1016 
1017     if(i4_is_buffer_full)
1018     {
1019         /* remove if any non-reference pictures are present */
1020         for(i = 0;
1021             i <
1022             (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1;
1023             i++)
1024         {
1025             if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_reference == 0 &&
1026                ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_non_ref_free == 1)
1027             {
1028                 i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
1029                 i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
1030             }
1031         }
1032         /* if all non reference pictures are removed, then find the least poc
1033         in the least gop number*/
1034         if(i4_least_POC == 0x7FFFFFFF)
1035         {
1036             ASSERT(i4_least_GOP_num == 0x7FFFFFFF);
1037             for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
1038                             ps_master_ctxt->i4_num_me_frm_pllel) +
1039                                1;
1040                 i++)
1041             {
1042                 if(i4_least_GOP_num > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num)
1043                 {
1044                     i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
1045                 }
1046             }
1047             for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
1048                             ps_master_ctxt->i4_num_me_frm_pllel) +
1049                                1;
1050                 i++)
1051             {
1052                 if(i4_least_POC > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc &&
1053                    ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_least_GOP_num)
1054                 {
1055                     i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
1056                 }
1057             }
1058         }
1059         ASSERT(i4_least_POC != 0x7FFFFFFF);
1060         a_pocs_to_remove[0] = i4_least_POC;
1061         a_pocs_to_remove[1] = INVALID_POC;
1062     }
1063 
1064     /* Call the ME API to remove "outdated" POCs */
1065     hme_discard_frm(
1066         ps_thrd0_ctxt, a_pocs_to_remove, i4_least_GOP_num, ps_master_ctxt->i4_num_me_frm_pllel);
1067 }
1068 /*!
1069 ******************************************************************************
1070 * \if Function name : ihevce_me_frame_init \endif
1071 *
1072 * \brief
1073 *    Frame level ME initialisation function
1074 *
1075 * \par Description:
1076 *    The following pre-conditions exist for this function: a. We have the input
1077 *    pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
1078 *    and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
1079 *    been called atleast once. Once these are supplied, the following are
1080 *    done here: a. Input pyramid creation, b. Updation of ME's internal DPB
1081 *    based on available ref list information
1082 *
1083 * \param[in] pv_ctxt : pointer to ME module
1084 * \param[in] ps_frm_ctb_prms : CTB characteristics parameters
1085 * \param[in] ps_frm_lamda : Frame level Lambda params
1086 * \param[in] num_ref_l0 : Number of reference pics in L0 list
1087 * \param[in] num_ref_l1 : Number of reference pics in L1 list
1088 * \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
1089 * \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
1090 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
1091 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
1092 * \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
1093 * \param[in] i4_frm_qp       : current picture QP
1094 *
1095 * \return
1096 *    None
1097 *
1098 * \author
1099 *  Ittiam
1100 *
1101 *****************************************************************************
1102 */
ihevce_me_frame_init(void * pv_me_ctxt,me_enc_rdopt_ctxt_t * ps_cur_out_me_prms,ihevce_static_cfg_params_t * ps_stat_prms,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,WORD32 num_ref_l0,WORD32 num_ref_l1,WORD32 num_ref_l0_active,WORD32 num_ref_l1_active,recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],func_selector_t * ps_func_selector,ihevce_lap_enc_buf_t * ps_enc_lap_inp,void * pv_coarse_layer,WORD32 i4_me_frm_id,WORD32 i4_thrd_id,WORD32 i4_frm_qp,WORD32 i4_temporal_layer_id,WORD8 i1_cu_qp_delta_enabled_flag,void * pv_dep_mngr_encloop_dep_me)1103 void ihevce_me_frame_init(
1104     void *pv_me_ctxt,
1105     me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
1106     ihevce_static_cfg_params_t *ps_stat_prms,
1107     frm_ctb_ctxt_t *ps_frm_ctb_prms,
1108     frm_lambda_ctxt_t *ps_frm_lamda,
1109     WORD32 num_ref_l0,
1110     WORD32 num_ref_l1,
1111     WORD32 num_ref_l0_active,
1112     WORD32 num_ref_l1_active,
1113     recon_pic_buf_t **pps_rec_list_l0,
1114     recon_pic_buf_t **pps_rec_list_l1,
1115     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
1116     func_selector_t *ps_func_selector,
1117     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
1118     void *pv_coarse_layer,
1119     WORD32 i4_me_frm_id,
1120     WORD32 i4_thrd_id,
1121     WORD32 i4_frm_qp,
1122     WORD32 i4_temporal_layer_id,
1123     WORD8 i1_cu_qp_delta_enabled_flag,
1124     void *pv_dep_mngr_encloop_dep_me)
1125 {
1126     me_ctxt_t *ps_thrd_ctxt;
1127     me_ctxt_t *ps_thrd0_ctxt;
1128     me_frm_ctxt_t *ps_ctxt;
1129     hme_inp_desc_t s_inp_desc;
1130 
1131     WORD32 inp_poc, num_ref;
1132     WORD32 i;
1133 
1134     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
1135     layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
1136 
1137     /* Input POC is derived from input buffer */
1138     inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
1139     num_ref = num_ref_l0 + num_ref_l1;
1140 
1141     /* All processing done using shared / common memory across */
1142     /* threads is done using thrd ctxt */
1143     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
1144 
1145     ps_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[i4_me_frm_id];
1146 
1147     /* Update the paarameters "num_ref_l0_active" and "num_ref_l1_active" in hme_frm_prms */
1148     ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l0 = num_ref_l0_active;
1149     ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l1 = num_ref_l1_active;
1150 
1151     /*************************************************************************/
1152     /* Add the current input to ME's DPB. This will also create the pyramids */
1153     /* for the HME layers tha are not "encoded".                             */
1154     /*************************************************************************/
1155     s_inp_desc.i4_poc = inp_poc;
1156     s_inp_desc.i4_idr_gop_num = ps_enc_lap_inp->s_lap_out.i4_idr_gop_num;
1157     s_inp_desc.i4_is_reference = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
1158     s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf;
1159     s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf;
1160     s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf;
1161 
1162     s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd;
1163     s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd;
1164 
1165     hme_add_inp(pv_me_ctxt, &s_inp_desc, i4_me_frm_id, i4_thrd_id);
1166 
1167     /* store the frm ctb ctxt to all the thrd ctxt */
1168     {
1169         WORD32 num_thrds;
1170 
1171         /* initialise the parameters for all the threads */
1172         for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1173         {
1174             me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1175 
1176             ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1177 
1178             ps_me_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
1179 
1180             ps_thrd_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms;
1181             ps_me_tmp_frm_ctxt->i4_l0me_qp_mod = ps_stat_prms->s_config_prms.i4_cu_level_rc & 1;
1182 
1183             /* intialize the inter pred (MC) context at frame level */
1184             ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
1185             ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_pred_flag =
1186                 ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag;
1187             ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_bipred_flag =
1188                 ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
1189             ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom =
1190                 ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
1191             ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom =
1192                 ps_enc_lap_inp->s_lap_out.i4_log2_chroma_wght_denom;
1193             ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_bit_depth = 8;
1194             ps_me_tmp_frm_ctxt->s_mc_ctxt.u1_chroma_array_type = 1;
1195             ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
1196             /* Initiallization for non-distributed mode */
1197             memset(
1198                 ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel,
1199                 0,
1200                 sizeof(ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel));
1201 
1202             ps_me_tmp_frm_ctxt->i4_pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
1203 
1204             ps_me_tmp_frm_ctxt->i4_rc_pass = ps_stat_prms->s_pass_prms.i4_pass;
1205             ps_me_tmp_frm_ctxt->i4_temporal_layer = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id;
1206             ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
1207             ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier =
1208                 ps_ctxt->i4_use_const_lamda_modifier ||
1209                 ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
1210                   (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
1211                  ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
1212                    (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
1213                   (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1214                    (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
1215                   (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1216                    (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
1217                   (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1218                    (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
1219             {
1220                 ps_me_tmp_frm_ctxt->f_i_pic_lamda_modifier =
1221                     ps_enc_lap_inp->s_lap_out.f_i_pic_lamda_modifier;
1222             }
1223             /* weighted pred enable flag */
1224             ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag =
1225                 ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag |
1226                 ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
1227 
1228             if(1 == ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag)
1229             {
1230                 /* log2 weight denom  */
1231                 ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc =
1232                     ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
1233             }
1234             else
1235             {
1236                 /* default value */
1237                 ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT;
1238             }
1239 
1240             ps_me_tmp_frm_ctxt->u1_is_curFrame_a_refFrame = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
1241 
1242             ps_thrd_ctxt->pv_me_optimised_function_list =
1243                 ps_master_ctxt->pv_me_optimised_function_list;
1244             ps_thrd_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func;
1245         }
1246     }
1247 
1248     /* Create the reference map for ME */
1249     ihevce_me_create_ref_map(
1250         pps_rec_list_l0,
1251         pps_rec_list_l1,
1252         num_ref_l0_active,
1253         num_ref_l1_active,
1254         num_ref,
1255         &ps_master_ctxt->as_ref_map[i4_me_frm_id]);
1256 
1257     /** Remember the pointers to recon list parmas for L0 and L1 lists in the context */
1258     ps_ctxt->ps_hme_ref_map->pps_rec_list_l0 = pps_rec_list_l0;
1259     ps_ctxt->ps_hme_ref_map->pps_rec_list_l1 = pps_rec_list_l1;
1260 
1261     /*************************************************************************/
1262     /* Call the ME frame level processing for further actiion.               */
1263     /* ToDo: Support Row Level API.                                          */
1264     /*************************************************************************/
1265     ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_x =
1266         ps_thrd0_ctxt->s_init_prms.max_horz_search_range;
1267     ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_y =
1268         ps_thrd0_ctxt->s_init_prms.max_vert_search_range;
1269     ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 0;
1270     ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_pic_second_field =
1271         (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^
1272            ps_enc_lap_inp->s_input_buf.i4_topfield_first));
1273     ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_temporal_layer_id = i4_temporal_layer_id;
1274     {
1275         S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
1276 
1277         /*********************************************************************/
1278         /* For I Pic, we do not call update fn at ctb level, instead we do   */
1279         /* one shot update for entire picture.                               */
1280         /*********************************************************************/
1281         if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME))
1282         {
1283             ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 1;
1284             ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
1285         }
1286 
1287         else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME))
1288         {
1289             ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
1290         }
1291         else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME))
1292         {
1293             ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 1;
1294         }
1295         else
1296         {
1297             /* not sure whether we need to handle mixed frames like IP, */
1298             /* they should ideally come as single field. */
1299             /* TODO : resolve thsi ambiguity */
1300             ASSERT(0);
1301         }
1302     }
1303     /************************************************************************/
1304     /* Lambda calculations moved outside ME and to one place, so as to have */
1305     /* consistent lambda across ME, IPE, CL RDOPT etc                       */
1306     /************************************************************************/
1307 
1308     {
1309         double d_q_factor;
1310 
1311         d_q_factor = pow(2.0, (i4_frm_qp / 6.)) * 5.0 / 8.0;
1312         ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep = (WORD32)(d_q_factor + .5);
1313         ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_frame_qp = i4_frm_qp;
1314 
1315         /* Qstep multiplied by 256, to work at higher precision:
1316         5/6 is the rounding factor. Multiplied by 2 for the Had vs DCT
1317         cost variation */
1318         ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep_ls8 =
1319             (WORD32)((((d_q_factor * 256) * 5) / 3) + .5);
1320     }
1321 
1322     /* Frame level init of all threads of ME */
1323     {
1324         WORD32 num_thrds;
1325 
1326         /* initialise the parameters for all the threads */
1327         for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1328         {
1329             me_frm_ctxt_t *ps_tmp_frm_ctxt;
1330 
1331             ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1332 
1333             ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
1334 
1335             hme_process_frm_init(
1336                 (void *)ps_thrd_ctxt,
1337                 ps_tmp_frm_ctxt->ps_hme_ref_map,
1338                 ps_tmp_frm_ctxt->ps_hme_frm_prms,
1339                 i4_me_frm_id,
1340                 ps_master_ctxt->i4_num_me_frm_pllel);
1341 
1342             ps_tmp_frm_ctxt->s_frm_lambda_ctxt = *ps_frm_lamda;
1343             ps_tmp_frm_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
1344         }
1345     }
1346 
1347     ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_sad_lambda_qf =
1348         ps_frm_lamda->i4_cl_sad_lambda_qf;
1349     ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_satd_lambda_qf =
1350         ps_frm_lamda->i4_cl_satd_lambda_qf;
1351     ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_sad_lambda_qf =
1352         ps_frm_lamda->i4_ol_sad_lambda_qf;
1353     ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_satd_lambda_qf =
1354         ps_frm_lamda->i4_ol_satd_lambda_qf;
1355     ps_master_ctxt->as_frm_prms[i4_me_frm_id].lambda_q_shift = LAMBDA_Q_SHIFT;
1356 
1357     ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_is_cu_qp_delta_enabled =
1358         i1_cu_qp_delta_enabled_flag;
1359 
1360     /*************************************************************************/
1361     /* If num ref is 0, that means that it has to be coded as I. Do nothing  */
1362     /* However mv bank update needs to happen with "intra" mv.               */
1363     /*************************************************************************/
1364     if(ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref == 0 ||
1365        ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic)
1366     {
1367         for(i = 0; i < 1; i++)
1368         {
1369             layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
1370             BLK_SIZE_T e_blk_size;
1371             S32 use_4x4;
1372 
1373             /* The mv bank is filled with "intra" mv */
1374             use_4x4 = hme_get_mv_blk_size(
1375                 ps_thrd0_ctxt->s_init_prms.use_4x4, i, ps_ctxt->num_layers, ps_ctxt->u1_encode[i]);
1376             e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8;
1377             hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]);
1378             hme_fill_mvbank_intra(ps_layer_ctxt);
1379 
1380             /* Clear out the global mvs */
1381             memset(
1382                 ps_layer_ctxt->s_global_mv,
1383                 0,
1384                 sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
1385         }
1386 
1387         return;
1388     }
1389 
1390     /*************************************************************************/
1391     /* Encode layer frame init                                               */
1392     /*************************************************************************/
1393     {
1394         refine_prms_t s_refine_prms;
1395         layer_ctxt_t *ps_curr_layer;
1396         S16 i2_max;
1397         S32 layer_id;
1398 
1399         layer_id = 0;
1400         i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
1401         i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
1402 
1403         ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[layer_id];
1404 
1405         {
1406             hme_set_refine_prms(
1407                 &s_refine_prms,
1408                 ps_ctxt->u1_encode[layer_id],
1409                 ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref,
1410                 layer_id,
1411                 ps_ctxt->num_layers,
1412                 ps_ctxt->num_layers_explicit_search,
1413                 ps_thrd0_ctxt->s_init_prms.use_4x4,
1414                 &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
1415                 NULL,
1416                 &ps_thrd0_ctxt->s_init_prms
1417                      .s_me_coding_tools); /* during frm init Intra cost Pointer is not required */
1418 
1419             hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer);
1420         }
1421     }
1422 }
1423 
1424 /*!
1425 ******************************************************************************
1426 * \if Function name : ihevce_l0_me_frame_end \endif
1427 *
1428 * \brief
1429 *    End of frame update function performs
1430 *       - Dynamic Search Range collation
1431 *
1432 * \param[in] pv_ctxt : pointer to ME module
1433 *
1434 * \return
1435 *    None
1436 *
1437 * \author
1438 *  Ittiam
1439 *
1440 *****************************************************************************
1441 */
1442 
ihevce_l0_me_frame_end(void * pv_me_ctxt,WORD32 i4_idx_dvsr_p,WORD32 i4_display_num,WORD32 me_frm_id)1443 void ihevce_l0_me_frame_end(
1444     void *pv_me_ctxt, WORD32 i4_idx_dvsr_p, WORD32 i4_display_num, WORD32 me_frm_id)
1445 {
1446     WORD32 i4_num_ref = 0, num_ref, num_thrds, cur_poc, frm_num;
1447 
1448     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
1449     me_ctxt_t *ps_thrd0_ctxt;
1450     me_frm_ctxt_t *ps_frm_ctxt;
1451     WORD32 prev_me_frm_id;
1452 
1453     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
1454     ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
1455 
1456     /* Deriving the previous poc from previous frames context */
1457     if(me_frm_id == 0)
1458         prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
1459     else
1460         prev_me_frm_id = me_frm_id - 1;
1461 
1462     /* Getting the max num references value */
1463     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1464     {
1465         i4_num_ref =
1466             MAX(i4_num_ref,
1467                 ps_master_ctxt->aps_me_ctxt[num_thrds]
1468                     ->aps_me_frm_prms[me_frm_id]
1469                     ->as_l0_dyn_range_prms[i4_idx_dvsr_p]
1470                     .i4_num_act_ref_in_l0);
1471     }
1472 
1473     /* No processing is required if current pic is I pic */
1474     if(1 == ps_master_ctxt->as_frm_prms[me_frm_id].is_i_pic)
1475     {
1476         return;
1477     }
1478 
1479     /* If a B/b pic, then the previous frame ctxts dyn search prms should be copied ito the latest ctxt */
1480     if(1 == ps_frm_ctxt->s_frm_prms.bidir_enabled)
1481     {
1482         return;
1483     }
1484 
1485     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1486     ASSERT(ps_frm_ctxt->s_frm_prms.is_i_pic == ps_frm_ctxt->s_frm_prms.bidir_enabled);
1487 
1488     /* use thrd 0 ctxt to collate the Dynamic Search Range across all threads */
1489     for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
1490     {
1491         dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1492 
1493         ps_dyn_range_prms_thrd0 =
1494             &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1495 
1496         /* run a loop over all the other threads to update the dynamical search range */
1497         for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1498         {
1499             me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1500 
1501             dyn_range_prms_t *ps_dyn_range_prms;
1502 
1503             ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
1504 
1505             /* get current thrd dynamical search range param. pointer */
1506             ps_dyn_range_prms =
1507                 &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1508 
1509             /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */
1510             hme_update_dynamic_search_params(
1511                 ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_max_y);
1512 
1513             hme_update_dynamic_search_params(
1514                 ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_min_y);
1515         }
1516     }
1517 
1518     /*************************************************************************/
1519     /* Get the MAX/MIN per POC distance based on the all the ref. pics       */
1520     /*************************************************************************/
1521     cur_poc = ps_frm_ctxt->i4_curr_poc;
1522     ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = 0;
1523     ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = 0;
1524     /*populate display num*/
1525     ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num = i4_display_num;
1526 
1527     for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
1528     {
1529         WORD16 i2_mv_per_poc;
1530         WORD32 ref_poc, poc_diff;
1531         dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1532         ps_dyn_range_prms_thrd0 =
1533             &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1534 
1535         ref_poc = ps_dyn_range_prms_thrd0->i4_poc;
1536         /* Should be cleaned up for ME llsm */
1537         poc_diff = (cur_poc - ref_poc);
1538         poc_diff = MAX(1, poc_diff);
1539 
1540         /* cur. ref. pic. max y per POC */
1541         i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff;
1542         /* update the max y per POC */
1543         ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = MAX(
1544             ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc, i2_mv_per_poc);
1545 
1546         /* cur. ref. pic. min y per POC */
1547         i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff;
1548         /* update the min y per POC */
1549         ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = MIN(
1550             ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc, i2_mv_per_poc);
1551     }
1552 
1553     /*************************************************************************/
1554     /* Populate the results to all thread ctxt                               */
1555     /*************************************************************************/
1556     for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1557     {
1558         me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1559 
1560         ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
1561 
1562         ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc =
1563             ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc;
1564 
1565         ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc =
1566             ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc;
1567 
1568         ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num =
1569             ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num;
1570     }
1571 
1572     /* Copy the dynamic search paramteres into the other Frame cotexts in parallel */
1573     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1574     {
1575         l0_dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1576 
1577         ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
1578 
1579         i4_num_ref = ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0;
1580 
1581         ps_dyn_range_prms_thrd0 = &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
1582 
1583         for(frm_num = 0; frm_num < MAX_NUM_ME_PARALLEL; frm_num++)
1584         {
1585             if(me_frm_id != frm_num)
1586             {
1587                 me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1588 
1589                 l0_dyn_range_prms_t *ps_dyn_range_prms;
1590 
1591                 ps_me_tmp_frm_ctxt =
1592                     ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[frm_num];
1593 
1594                 /* get current thrd dynamical search range param. pointer */
1595                 ps_dyn_range_prms = &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
1596 
1597                 memcpy(ps_dyn_range_prms, ps_dyn_range_prms_thrd0, sizeof(l0_dyn_range_prms_t));
1598             }
1599         }
1600     }
1601 }
1602