1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*!
22 ******************************************************************************
23 * \file ihevce_coarse_me_pass.c
24 *
25 * \brief
26 *    Converts the language of the encoder to language of me. This is an i/f
27 *    between the encoder style APIs and ME style APIs. This is basically
28 *    a memoryless glue layer.
29 *
30 * \date
31 *    22/10/2012
32 *
33 * \author
34 *    Ittiam
35 *
36 *
37 * List of Functions
38 *
39 *
40 ******************************************************************************
41 */
42 
43 /*****************************************************************************/
44 /* File Includes                                                             */
45 /*****************************************************************************/
46 /* System include files */
47 #include <stdio.h>
48 #include <string.h>
49 #include <stdlib.h>
50 #include <assert.h>
51 #include <stdarg.h>
52 #include <math.h>
53 
54 /* User include files */
55 #include "ihevc_typedefs.h"
56 #include "itt_video_api.h"
57 #include "ihevce_api.h"
58 
59 #include "rc_cntrl_param.h"
60 #include "rc_frame_info_collector.h"
61 #include "rc_look_ahead_params.h"
62 
63 #include "ihevc_defs.h"
64 #include "ihevc_structs.h"
65 #include "ihevc_platform_macros.h"
66 #include "ihevc_deblk.h"
67 #include "ihevc_itrans_recon.h"
68 #include "ihevc_chroma_itrans_recon.h"
69 #include "ihevc_chroma_intra_pred.h"
70 #include "ihevc_intra_pred.h"
71 #include "ihevc_inter_pred.h"
72 #include "ihevc_mem_fns.h"
73 #include "ihevc_padding.h"
74 #include "ihevc_weighted_pred.h"
75 #include "ihevc_sao.h"
76 #include "ihevc_resi_trans.h"
77 #include "ihevc_quant_iquant_ssd.h"
78 #include "ihevc_cabac_tables.h"
79 
80 #include "ihevce_defs.h"
81 #include "ihevce_lap_enc_structs.h"
82 #include "ihevce_multi_thrd_structs.h"
83 #include "ihevce_me_common_defs.h"
84 #include "ihevce_had_satd.h"
85 #include "ihevce_error_codes.h"
86 #include "ihevce_bitstream.h"
87 #include "ihevce_cabac.h"
88 #include "ihevce_rdoq_macros.h"
89 #include "ihevce_function_selector.h"
90 #include "ihevce_enc_structs.h"
91 #include "ihevce_entropy_structs.h"
92 #include "ihevce_cmn_utils_instr_set_router.h"
93 #include "ihevce_enc_loop_structs.h"
94 #include "ihevce_bs_compute_ctb.h"
95 #include "ihevce_global_tables.h"
96 #include "ihevce_dep_mngr_interface.h"
97 #include "hme_datatype.h"
98 #include "hme_interface.h"
99 #include "hme_common_defs.h"
100 #include "hme_defs.h"
101 #include "ihevce_me_instr_set_router.h"
102 #include "ihevce_ipe_instr_set_router.h"
103 #include "ihevce_ipe_structs.h"
104 #include "hme_globals.h"
105 #include "hme_utils.h"
106 #include "hme_coarse.h"
107 #include "hme_refine.h"
108 #include "ihevce_me_pass.h"
109 #include "ihevce_coarse_me_pass.h"
110 
111 /*****************************************************************************/
112 /* Function Definitions                                                      */
113 /*****************************************************************************/
114 
115 /*!
116 ******************************************************************************
117 * \if Function name : ihevce_coarse_me_get_num_mem_recs \endif
118 *
119 * \brief
120 *    Number of memory records are returned for ME module
121 *    Note : Include total mem. req. for HME + Total mem. req. for Dep Mngr for HME
122 *
123 * \return
124 *    Number of memory records
125 *
126 * \author
127 *  Ittiam
128 *
129 *****************************************************************************
130 */
ihevce_coarse_me_get_num_mem_recs()131 WORD32 ihevce_coarse_me_get_num_mem_recs()
132 {
133     WORD32 hme_mem_recs = hme_coarse_num_alloc();
134     WORD32 hme_dep_mngr_mem_recs = hme_coarse_dep_mngr_num_alloc();
135 
136     return ((hme_mem_recs + hme_dep_mngr_mem_recs));
137 }
138 
139 /*!
140 ******************************************************************************
141 * \if Function name : ihevce_coarse_me_get_mem_recs \endif
142 *
143 * \brief
144 *    Memory requirements are returned for coarse ME.
145 *
146 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
147 * \param[in] ps_init_prms : Create time static parameters
148 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
149 * \param[in] i4_mem_space : memspace in whihc memory request should be done
150 *
151 * \return
152 *    Number of records
153 *
154 * \author
155 *  Ittiam
156 *
157 *****************************************************************************
158 */
ihevce_coarse_me_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_mem_space,WORD32 i4_resolution_id)159 WORD32 ihevce_coarse_me_get_mem_recs(
160     iv_mem_rec_t *ps_mem_tab,
161     ihevce_static_cfg_params_t *ps_init_prms,
162     WORD32 i4_num_proc_thrds,
163     WORD32 i4_mem_space,
164     WORD32 i4_resolution_id)
165 {
166     hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
167     WORD32 n_tabs, i;
168 
169     /* Init prms structure specific to HME */
170     hme_init_prms_t s_hme_init_prms;
171 
172     //return (ihevce_coarse_me_get_num_mem_recs());
173     /*************************************************************************/
174     /* code flow: we call hme alloc function and then remap those memtabs    */
175     /* to a different type of memtab structure.                              */
176     /*************************************************************************/
177     ASSERT(HME_COARSE_TOT_MEMTABS >= hme_coarse_num_alloc());
178 
179     /*************************************************************************/
180     /* POPULATE THE HME INIT PRMS                                            */
181     /*************************************************************************/
182     ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
183 
184     /*************************************************************************/
185     /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
186     /*************************************************************************/
187     n_tabs = hme_coarse_alloc(&as_memtabs[0], &s_hme_init_prms);
188     ASSERT(n_tabs == hme_coarse_num_alloc());
189 
190     /*************************************************************************/
191     /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE                             */
192     /*************************************************************************/
193     for(i = 0; i < n_tabs; i++)
194     {
195         ps_mem_tab[i].i4_mem_size = as_memtabs[i].size;
196         ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align;
197         ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
198         ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t);
199     }
200 
201     /*************************************************************************/
202     /* --- HME Coarse sync Dep Mngr Mem requests --                          */
203     /*************************************************************************/
204     {
205         WORD32 n_dep_tabs;
206 
207         ps_mem_tab += n_tabs;
208 
209         n_dep_tabs = hme_coarse_dep_mngr_alloc(
210             ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
211 
212         ASSERT(n_dep_tabs == hme_coarse_dep_mngr_num_alloc());
213 
214         /* Update the total no. of mem tabs */
215         n_tabs += n_dep_tabs;
216     }
217 
218     return (n_tabs);
219 }
220 
221 /*!
222 ******************************************************************************
223 * \if Function name : ihevce_coarse_me_init \endif
224 *
225 * \brief
226 *    Intialization for ME context state structure .
227 *
228 * \param[in] ps_mem_tab : pointer to memory descriptors table
229 * \param[in] ps_init_prms : Create time static parameters
230 * \param[in] pv_osal_handle : Osal handle
231 *
232 * \return
233 *    Handle to the ME context
234 *
235 * \author
236 *  Ittiam
237 *
238 *****************************************************************************
239 */
ihevce_coarse_me_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,WORD32 i4_resolution_id,UWORD8 u1_is_popcnt_available)240 void *ihevce_coarse_me_init(
241     iv_mem_rec_t *ps_mem_tab,
242     ihevce_static_cfg_params_t *ps_init_prms,
243     WORD32 i4_num_proc_thrds,
244     void *pv_osal_handle,
245     WORD32 i4_resolution_id,
246     UWORD8 u1_is_popcnt_available)
247 {
248     /* ME handle to be returned */
249     void *pv_me_ctxt;
250     WORD32 status;
251     coarse_me_master_ctxt_t *ps_ctxt;
252 
253     /* Init prms structure specific to HME */
254     hme_init_prms_t s_hme_init_prms;
255 
256     /* memtabs to be passed to hme */
257     hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
258     WORD32 n_tabs, n_dep_tabs, i;
259 
260     /*************************************************************************/
261     /* POPULATE THE HME INIT PRMS                                            */
262     /*************************************************************************/
263     ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
264 
265     /*************************************************************************/
266     /* Ensure local declaration is sufficient                                */
267     /*************************************************************************/
268     n_tabs = hme_coarse_num_alloc();
269     ASSERT(HME_COARSE_TOT_MEMTABS >= n_tabs);
270 
271     /*************************************************************************/
272     /* MAP RESULTS TO HME MEMTAB STRUCTURE                                   */
273     /*************************************************************************/
274     for(i = 0; i < n_tabs; i++)
275     {
276         as_memtabs[i].size = ps_mem_tab[i].i4_mem_size;
277         as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment;
278         as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base;
279     }
280     /*************************************************************************/
281     /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
282     /*************************************************************************/
283     pv_me_ctxt = (void *)as_memtabs[0].pu1_mem;
284     status = hme_coarse_init(pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms);
285     ps_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
286     if(status == -1)
287         return NULL;
288 
289     /*************************************************************************/
290     /* --- HME sync Dep Mngr Mem init --                                     */
291     /*************************************************************************/
292 
293     ps_mem_tab += n_tabs;
294 
295     n_dep_tabs = hme_coarse_dep_mngr_init(
296         ps_mem_tab, ps_init_prms, pv_me_ctxt, pv_osal_handle, i4_num_proc_thrds, i4_resolution_id);
297     ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
298 
299     n_tabs += n_dep_tabs;
300 
301     ihevce_me_instr_set_router(
302         (ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list,
303         ps_init_prms->e_arch_type);
304 
305     ihevce_cmn_utils_instr_set_router(
306         &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
307 
308     return (pv_me_ctxt);
309 }
310 
311 /*!
312 ******************************************************************************
313 * \if Function name : ihevce_coarse_me_reg_thrds_sem \endif
314 *
315 * \brief
316 *    Intialization for ME context state structure with semaphores .
317 *
318 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
319 * \param[in] ppv_sem_hdls : Array of semaphore handles
320 * \param[in] i4_num_proc_thrds : Number of processing threads
321 *
322 * \return
323 *   none
324 *
325 * \author
326 *  Ittiam
327 *
328 *****************************************************************************
329 */
ihevce_coarse_me_reg_thrds_sem(void * pv_me_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)330 void ihevce_coarse_me_reg_thrds_sem(void *pv_me_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
331 {
332     hme_coarse_dep_mngr_reg_sem(pv_me_ctxt, ppv_sem_hdls, i4_num_proc_thrds);
333 
334     return;
335 }
336 
337 /*!
338 ******************************************************************************
339 * \if Function name : ihevce_coarse_me_delete \endif
340 *
341 * \brief
342 *    Destroy Coarse ME module
343 * Note : Only Destroys the resources allocated in the module like
344 *   semaphore,etc. Memory free is done Separately using memtabs
345 *
346 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
347 * \param[in] ps_init_prms : Create time static parameters
348 * \param[in] pv_osal_handle : Osal handle
349 *
350 * \return
351 *    None
352 *
353 * \author
354 *  Ittiam
355 *
356 *****************************************************************************
357 */
ihevce_coarse_me_delete(void * pv_me_ctxt,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_resolution_id)358 void ihevce_coarse_me_delete(
359     void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
360 {
361     /* --- HME sync Dep Mngr Delete --*/
362     hme_coarse_dep_mngr_delete(pv_me_ctxt, ps_init_prms, i4_resolution_id);
363 }
364 
365 /**
366 *******************************************************************************
367 * \if Function name : ihevce_coarse_me_set_resolution \endif
368 *
369 * \brief
370 *    Sets the resolution for ME state
371 *
372 * \par Description:
373 *    ME requires information of resolution to prime up its layer descriptors
374 *    and contexts. This API is called whenever a control call from application
375 *    causes a change of resolution. Has to be called once initially before
376 *    processing any frame. Again this is just a glue function and calls the
377 *    actual ME API for the same.
378 *
379 * \param[in,out] pv_me_ctxt: Handle to the ME context
380 * \param[in] n_enc_layers: Number of layers getting encoded
381 * \param[in] p_wd : Pointer containing widths of each layer getting encoded.
382 * \param[in] p_ht : Pointer containing heights of each layer getting encoded.
383 *
384 * \returns
385 *  none
386 *
387 * \author
388 *  Ittiam
389 *
390 *******************************************************************************
391 */
ihevce_coarse_me_set_resolution(void * pv_me_ctxt,WORD32 n_enc_layers,WORD32 * p_wd,WORD32 * p_ht)392 void ihevce_coarse_me_set_resolution(
393     void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht)
394 {
395     /* local variables */
396     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
397     WORD32 thrds;
398 
399     for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
400     {
401         coarse_me_ctxt_t *ps_me_thrd_ctxt;
402 
403         ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
404 
405         hme_coarse_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht);
406     }
407 }
ihevce_coarse_me_get_rc_param(void * pv_me_ctxt,LWORD64 * i8_acc_frame_hme_cost,LWORD64 * i8_acc_frame_hme_sad,LWORD64 * i8_acc_num_blks_higher_sad,LWORD64 * i8_total_blks,WORD32 i4_is_prev_pic_same_scene)408 void ihevce_coarse_me_get_rc_param(
409     void *pv_me_ctxt,
410     LWORD64 *i8_acc_frame_hme_cost,
411     LWORD64 *i8_acc_frame_hme_sad,
412     LWORD64 *i8_acc_num_blks_higher_sad,
413     LWORD64 *i8_total_blks,
414     WORD32 i4_is_prev_pic_same_scene)
415 {
416     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
417     WORD32 thrds;
418     coarse_me_ctxt_t *ps_me_thrd_ctxt;
419 
420     *i8_acc_frame_hme_cost = 0;
421     *i8_acc_frame_hme_sad = 0;
422 
423     for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
424     {
425         ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
426         *i8_acc_frame_hme_cost += ps_me_thrd_ctxt->i4_L1_hme_best_cost;
427 
428         /*Calculate me cost wrt. to ref only for P frame */
429         if(ps_me_thrd_ctxt->s_frm_prms.is_i_pic == ps_me_thrd_ctxt->s_frm_prms.bidir_enabled)
430         {
431             *i8_acc_num_blks_higher_sad += ps_me_thrd_ctxt->i4_num_blks_high_sad;
432             *i8_total_blks += ps_me_thrd_ctxt->i4_num_blks;
433         }
434 
435         *i8_acc_frame_hme_sad += ps_me_thrd_ctxt->i4_L1_hme_sad;
436     }
437 }
438 
439 /*!
440 ******************************************************************************
441 * \if Function name : ihevce_coarse_me_process \endif
442 *
443 * \brief
444 *    Frame level ME function
445 *
446 * \par Description:
447 *    Processing of all layers starting from coarse and going
448 *    to the refinement layers, except enocde layer
449 *
450 * \param[in] pv_ctxt : pointer to ME module
451 * \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
452 * \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
453 * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
454 * \param[in]  pd_intra_costs : pointerto intra cost buffer
455 * \param[in]  ps_multi_thrd_ctxt : pointer to multi thread ctxt
456 * \param[in]  thrd_id : Thread id of the current thrd in which function is executed
457 *
458 * \return
459 *    None
460 *
461 * \author
462 *  Ittiam
463 *
464 *****************************************************************************
465 */
ihevce_coarse_me_process(void * pv_me_ctxt,ihevce_lap_enc_buf_t * ps_enc_lap_inp,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_ping_pong)466 void ihevce_coarse_me_process(
467     void *pv_me_ctxt,
468     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
469     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
470     WORD32 thrd_id,
471     WORD32 i4_ping_pong)
472 
473 {
474     /* local variables */
475     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
476     coarse_me_ctxt_t *ps_thrd_ctxt;
477 
478     /* get the current thread ctxt pointer */
479     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id];
480     ps_thrd_ctxt->thrd_id = thrd_id;
481 
482     /* frame level processing function */
483     hme_coarse_process_frm(
484         (void *)ps_thrd_ctxt,
485         &ps_master_ctxt->s_ref_map,
486         &ps_master_ctxt->s_frm_prms,
487         ps_multi_thrd_ctxt,
488         i4_ping_pong,
489         &ps_master_ctxt->apv_dep_mngr_hme_sync[0]);
490 
491     return;
492 }
493 
494 /*!
495 ******************************************************************************
496 * \if Function name : ihevce_coarse_me_frame_end \endif
497 *
498 * \brief
499 *    End of frame update function performs
500 *       - GMV collation
501 *       - Dynamic Search Range collation
502 *
503 * \param[in] pv_ctxt : pointer to ME module
504 *
505 * \return
506 *    None
507 *
508 * \author
509 *  Ittiam
510 *
511 *****************************************************************************
512 */
ihevce_coarse_me_frame_end(void * pv_me_ctxt)513 void ihevce_coarse_me_frame_end(void *pv_me_ctxt)
514 {
515     /* local variables */
516     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
517     coarse_me_ctxt_t *ps_thrd0_ctxt;
518     layer_ctxt_t *ps_curr_layer;
519     WORD32 num_ref, num_thrds, cur_poc;
520     WORD32 coarse_layer_id;
521     WORD32 i4_num_ref;
522     ME_QUALITY_PRESETS_T e_me_quality_preset;
523 
524     /* GMV collation is done for coarse Layer only */
525     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
526     coarse_layer_id = ps_thrd0_ctxt->num_layers - 1;
527     ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[coarse_layer_id];
528     i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref;
529     e_me_quality_preset = ps_thrd0_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
530 
531     /* No processing is required if current pic is I pic */
532     if(1 == ps_master_ctxt->s_frm_prms.is_i_pic)
533     {
534         return;
535     }
536 
537     /* use thrd 0 ctxt to collate the GMVs histogram and Dynamic Search Range */
538     /* across all threads */
539     for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
540     {
541         WORD32 i4_offset, i4_lobe_size, i4_layer_id;
542         mv_hist_t *ps_hist_thrd0;
543         dyn_range_prms_t *aps_dyn_range_prms_thrd0[MAX_NUM_LAYERS];
544 
545         ps_hist_thrd0 = ps_thrd0_ctxt->aps_mv_hist[num_ref];
546 
547         /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
548         if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
549         {
550             for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
551             {
552                 aps_dyn_range_prms_thrd0[i4_layer_id] =
553                     &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
554             }
555         }
556 
557         i4_lobe_size = ps_hist_thrd0->i4_lobe1_size;
558         i4_offset = i4_lobe_size >> 1;
559 
560         /* run a loop over all the other threads to add up the histogram */
561         /* and to update the dynamical search range                      */
562         for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
563         {
564             dyn_range_prms_t *ps_dyn_range_prms;
565 
566             if(ME_XTREME_SPEED_25 != e_me_quality_preset)
567             {
568                 mv_hist_t *ps_hist;
569                 WORD32 i4_y, i4_x;
570                 /* get current thrd histogram pointer */
571                 ps_hist = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_mv_hist[num_ref];
572 
573                 /* Accumalate the Bin count for all the thread */
574                 for(i4_y = 0; i4_y < ps_hist_thrd0->i4_num_rows; i4_y++)
575                 {
576                     for(i4_x = 0; i4_x < ps_hist_thrd0->i4_num_cols; i4_x++)
577                     {
578                         S32 i4_bin_id;
579 
580                         i4_bin_id = i4_x + (i4_y * ps_hist_thrd0->i4_num_cols);
581 
582                         ps_hist_thrd0->ai4_bin_count[i4_bin_id] +=
583                             ps_hist->ai4_bin_count[i4_bin_id];
584                     }
585                 }
586             }
587 
588             /* Update the dynamical search range for each Layer              */
589             /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
590             if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
591             {
592                 for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
593                 {
594                     /* get current thrd, layer dynamical search range param. pointer */
595                     ps_dyn_range_prms =
596                         &ps_master_ctxt->aps_me_ctxt[num_thrds]
597                              ->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
598                     /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */
599                     hme_update_dynamic_search_params(
600                         aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_max_y);
601 
602                     hme_update_dynamic_search_params(
603                         aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_min_y);
604                 }
605             }
606         }
607     }
608 
609     /*************************************************************************/
610     /* Get the MAX/MIN per POC distance based on the all the ref. pics       */
611     /*************************************************************************/
612     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
613     if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
614     {
615         WORD32 i4_layer_id;
616         cur_poc = ps_thrd0_ctxt->i4_curr_poc;
617 
618         for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
619         {
620             ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 0;
621             ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 0;
622         }
623 
624         for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
625         {
626             for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
627             {
628                 WORD16 i2_mv_per_poc;
629                 WORD32 ref_poc, poc_diff;
630                 dyn_range_prms_t *ps_dyn_range_prms_thrd0;
631 
632                 ps_dyn_range_prms_thrd0 =
633                     &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
634 
635                 ref_poc = ps_dyn_range_prms_thrd0->i4_poc;
636                 ASSERT(ref_poc < cur_poc);
637                 poc_diff = (cur_poc - ref_poc);
638 
639                 /* cur. ref. pic. max y per POC */
640                 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff;
641                 /* update the max y per POC */
642                 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] =
643                     MAX(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
644                         i2_mv_per_poc);
645 
646                 /* cur. ref. pic. min y per POC */
647                 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff;
648                 /* update the min y per POC */
649                 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] =
650                     MIN(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id],
651                         i2_mv_per_poc);
652             }
653         }
654 
655         /*************************************************************************/
656         /* Populate the results to all thread ctxt                               */
657         /*************************************************************************/
658         for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
659         {
660             for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
661             {
662                 ps_master_ctxt->aps_me_ctxt[num_thrds]
663                     ->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] =
664                     ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id];
665 
666                 ps_master_ctxt->aps_me_ctxt[num_thrds]
667                     ->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] =
668                     ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id];
669             }
670         }
671     }
672 
673     if(ME_XTREME_SPEED_25 != e_me_quality_preset)
674     {
675         /* call the function which calcualtes the GMV    */
676         /* layer pointer is shared across all threads    */
677         /* hence all threads will have access to updated */
678         /* GMVs populated using thread 0 ctxt            */
679         for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
680         {
681             hme_calculate_global_mv(
682                 ps_thrd0_ctxt->aps_mv_hist[num_ref],
683                 &ps_curr_layer->s_global_mv[num_ref][GMV_THICK_LOBE],
684                 GMV_THICK_LOBE);
685         }
686     }
687     return;
688 }
689 
690 /*!
691 ******************************************************************************
692 * \if Function name : ihevce_coarse_me_frame_dpb_update \endif
693 *
694 * \brief
695 *    Frame level ME initialisation function
696 *
697 * \par Description:
698 *   Updation of ME's internal DPB
699 *    based on available ref list information
700 *
701 * \param[in] pv_ctxt : pointer to ME module
702 * \param[in] num_ref_l0 : Number of reference pics in L0 list
703 * \param[in] num_ref_l1 : Number of reference pics in L1 list
704 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
705 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
706 *
707 * \return
708 *    None
709 *
710 * \author
711 *  Ittiam
712 *
713 *****************************************************************************
714 */
ihevce_coarse_me_frame_dpb_update(void * pv_me_ctxt,WORD32 num_ref_l0,WORD32 num_ref_l1,recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1)715 void ihevce_coarse_me_frame_dpb_update(
716     void *pv_me_ctxt,
717     WORD32 num_ref_l0,
718     WORD32 num_ref_l1,
719     recon_pic_buf_t **pps_rec_list_l0,
720     recon_pic_buf_t **pps_rec_list_l1)
721 {
722     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
723     coarse_me_ctxt_t *ps_thrd0_ctxt;
724     WORD32 a_pocs_buffered_in_me[MAX_NUM_REF + 1];
725     WORD32 a_pocs_to_remove[MAX_NUM_REF + 2];
726     WORD32 poc_remove_id = 0;
727     WORD32 i, count;
728 
729     /* All processing done using shared / common memory across */
730     /* threads is done using thrd ctxt */
731     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
732 
733     /*************************************************************************/
734     /* Updation of ME's DPB list. This involves the following steps:         */
735     /* 1. Obtain list of active POCs maintained within ME.                   */
736     /* 2. Search each of them in the ref list. Whatever is not found goes to */
737     /*     the list to be removed. Note: a_pocs_buffered_in_me holds the     */
738     /*    currently active POC list within ME. a_pocs_to_remove holds the    */
739     /*    list of POCs to be removed, terminated by -1.                      */
740     /*************************************************************************/
741     hme_coarse_get_active_pocs_list((void *)ps_thrd0_ctxt, a_pocs_buffered_in_me);
742 
743     count = 0;
744     while(a_pocs_buffered_in_me[count] != -1)
745     {
746         WORD32 poc_to_search = a_pocs_buffered_in_me[count];
747         WORD32 match_found_flag = 0;
748 
749         /*********************************************************************/
750         /* Search in any one list (L0/L1) since both lists contain all the   */
751         /* active ref pics.                                                  */
752         /*********************************************************************/
753         for(i = 0; i < num_ref_l0; i++)
754         {
755             if(poc_to_search == pps_rec_list_l0[i]->i4_poc)
756             {
757                 match_found_flag = 1;
758                 break;
759             }
760         }
761         for(i = 0; i < num_ref_l1; i++)
762         {
763             if(poc_to_search == pps_rec_list_l1[i]->i4_poc)
764             {
765                 match_found_flag = 1;
766                 break;
767             }
768         }
769 
770         if(0 == match_found_flag)
771         {
772             /*****************************************************************/
773             /* POC buffered inside ME but not part of ref list given by DPB  */
774             /* Hence this needs to be flagged to ME for removal.             */
775             /*****************************************************************/
776             a_pocs_to_remove[poc_remove_id] = poc_to_search;
777             poc_remove_id++;
778         }
779         count++;
780     }
781 
782     /* List termination */
783     a_pocs_to_remove[poc_remove_id] = -1;
784 
785     /* Call the ME API to remove "outdated" POCs */
786     hme_coarse_discard_frm(ps_thrd0_ctxt, a_pocs_to_remove);
787 }
788 
789 /*!
790 ******************************************************************************
791 * \if Function name : ihevce_coarse_me_frame_init \endif
792 *
793 * \brief
794 *    Coarse Frame level ME initialisation function
795 *
796 * \par Description:
797 *    The following pre-conditions exist for this function: a. We have the input
798 *    pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
799 *    and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
800 *    been called atleast once. Once these are supplied, the following are
801 *    done here: a. Input pyramid creation, b. Updation of ME's internal DPB
802 *    based on available ref list information
803 *
804 * \param[in] pv_ctxt : pointer to ME module
805 * \param[in] ps_frm_ctb_prms : CTB characteristics parameters
806 * \param[in] ps_frm_lamda : Frame level Lambda params
807 * \param[in] num_ref_l0 : Number of reference pics in L0 list
808 * \param[in] num_ref_l1 : Number of reference pics in L1 list
809 * \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
810 * \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
811 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
812 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
813 * \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
814 * \param[in] i4_frm_qp       : current picture QP
815 *
816 * \return
817 *    None
818 *
819 * \author
820 *  Ittiam
821 *
822 *****************************************************************************
823 */
ihevce_coarse_me_frame_init(void * pv_me_ctxt,ihevce_static_cfg_params_t * ps_stat_prms,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,WORD32 num_ref_l0,WORD32 num_ref_l1,WORD32 num_ref_l0_active,WORD32 num_ref_l1_active,recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1,ihevce_lap_enc_buf_t * ps_enc_lap_inp,WORD32 i4_frm_qp,ihevce_ed_blk_t * ps_layer1_buf,ihevce_ed_ctb_l1_t * ps_ed_ctb_l1,UWORD8 * pu1_me_reverse_map_info,WORD32 i4_temporal_layer_id)824 void ihevce_coarse_me_frame_init(
825     void *pv_me_ctxt,
826     ihevce_static_cfg_params_t *ps_stat_prms,
827     frm_ctb_ctxt_t *ps_frm_ctb_prms,
828     frm_lambda_ctxt_t *ps_frm_lamda,
829     WORD32 num_ref_l0,
830     WORD32 num_ref_l1,
831     WORD32 num_ref_l0_active,
832     WORD32 num_ref_l1_active,
833     recon_pic_buf_t **pps_rec_list_l0,
834     recon_pic_buf_t **pps_rec_list_l1,
835     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
836     WORD32 i4_frm_qp,
837     ihevce_ed_blk_t *ps_layer1_buf,  //EIID
838     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
839     UWORD8 *pu1_me_reverse_map_info,
840     WORD32 i4_temporal_layer_id)
841 {
842     /* local variables */
843     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
844     coarse_me_ctxt_t *ps_ctxt;
845     coarse_me_ctxt_t *ps_thrd0_ctxt;
846     WORD32 inp_poc, num_ref;
847     WORD32 i;
848 
849     /* Input POC is derived from input buffer */
850     inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
851     num_ref = num_ref_l0 + num_ref_l1;
852 
853     /* All processing done using shared / common memory across */
854     /* threads is done using thrd 0 ctxt */
855     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
856 
857     ps_master_ctxt->s_frm_prms.u1_num_active_ref_l0 = num_ref_l0_active;
858     ps_master_ctxt->s_frm_prms.u1_num_active_ref_l1 = num_ref_l1_active;
859 
860     /* store the frm ctb ctxt to all the thrd ctxt */
861     {
862         WORD32 num_thrds;
863 
864         /* initialise the parameters for all the threads */
865         for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
866         {
867             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
868             ps_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms;
869             /*EIID: early decision buffer pointer */
870             ps_ctxt->ps_ed_blk = ps_layer1_buf;
871             ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1;
872 
873             /* weighted pred enable flag */
874             ps_ctxt->i4_wt_pred_enable_flag = ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag |
875                                               ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
876 
877             if(1 == ps_ctxt->i4_wt_pred_enable_flag)
878             {
879                 /* log2 weight denom  */
880                 ps_ctxt->s_wt_pred.wpred_log_wdc =
881                     ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
882             }
883             else
884             {
885                 /* default value */
886                 ps_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT;
887             }
888             ps_ctxt->i4_L1_hme_best_cost = 0;
889             ps_ctxt->i4_L1_hme_sad = 0;
890             ps_ctxt->i4_num_blks_high_sad = 0;
891             ps_ctxt->i4_num_blks = 0;
892 
893             ps_ctxt->pv_me_optimised_function_list = ps_master_ctxt->pv_me_optimised_function_list;
894             ps_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func;
895         }
896     }
897     /* Create the reference map for ME */
898     ihevce_me_create_ref_map(
899         pps_rec_list_l0,
900         pps_rec_list_l1,
901         num_ref_l0_active,
902         num_ref_l1_active,
903         num_ref,
904         &ps_master_ctxt->s_ref_map);
905     /*************************************************************************/
906     /* Call the ME frame level processing for further actiion.               */
907     /* ToDo: Support Row Level API.                                          */
908     /*************************************************************************/
909     ps_master_ctxt->s_frm_prms.i2_mv_range_x = ps_thrd0_ctxt->s_init_prms.max_horz_search_range;
910     ps_master_ctxt->s_frm_prms.i2_mv_range_y = ps_thrd0_ctxt->s_init_prms.max_vert_search_range;
911 
912     ps_master_ctxt->s_frm_prms.is_i_pic = 0;
913     ps_master_ctxt->s_frm_prms.i4_temporal_layer_id = i4_temporal_layer_id;
914 
915     ps_master_ctxt->s_frm_prms.is_pic_second_field =
916         (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^
917            ps_enc_lap_inp->s_input_buf.i4_topfield_first));
918     {
919         S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
920 
921         /*********************************************************************/
922         /* For I Pic, we do not call update fn at ctb level, instead we do   */
923         /* one shot update for entire picture.                               */
924         /*********************************************************************/
925         if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME))
926         {
927             ps_master_ctxt->s_frm_prms.is_i_pic = 1;
928             ps_master_ctxt->s_frm_prms.bidir_enabled = 0;
929         }
930         else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME))
931         {
932             ps_master_ctxt->s_frm_prms.bidir_enabled = 0;
933         }
934         else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME))
935         {
936             ps_master_ctxt->s_frm_prms.bidir_enabled = 1;
937         }
938         else
939         {
940             /* not sure whether we need to handle mixed frames like IP, */
941             /* they should ideally come as single field. */
942             /* TODO : resolve thsi ambiguity */
943             ASSERT(0);
944         }
945     }
946     /************************************************************************/
947     /* Lambda calculations moved outside ME and to one place, so as to have */
948     /* consistent lambda across ME, IPE, CL RDOPT etc                       */
949     /************************************************************************/
950 
951     {
952 #define CLIP3_F(min, max, val) (((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)))
953         double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 };
954         double d_b_pic_factor;
955         double d_q_factor;
956         //double d_lambda;
957         UWORD8 u1_temp_hier = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id;
958 
959         if(u1_temp_hier)
960         {
961             d_b_pic_factor = CLIP3_F(2.0, 4.0, (i4_frm_qp - 12.0) / 6.0);
962         }
963         else
964             d_b_pic_factor = 1.0;
965 
966         d_q_factor = (1 << (i4_frm_qp / 6)) * q_steps[i4_frm_qp % 6];
967         ps_master_ctxt->s_frm_prms.qstep = (WORD32)d_q_factor;
968         ps_master_ctxt->s_frm_prms.i4_frame_qp = i4_frm_qp;
969     }
970 
971     /* HME Dependency Manager : Reset the num ctb processed in every row */
972     /* for ME sync in every layer                                        */
973     {
974         WORD32 ctr;
975         for(ctr = 1; ctr < ps_thrd0_ctxt->num_layers; ctr++)
976         {
977             void *pv_dep_mngr_state;
978             pv_dep_mngr_state = ps_master_ctxt->apv_dep_mngr_hme_sync[ctr - 1];
979 
980             ihevce_dmgr_rst_row_row_sync(pv_dep_mngr_state);
981         }
982     }
983 
984     /* Frame level init of all threads of ME */
985     {
986         WORD32 num_thrds;
987 
988         /* initialise the parameters for all the threads */
989         for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
990         {
991             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
992 
993             hme_coarse_process_frm_init(
994                 (void *)ps_ctxt, ps_ctxt->ps_hme_ref_map, ps_ctxt->ps_hme_frm_prms);
995         }
996     }
997 
998     ps_master_ctxt->s_frm_prms.i4_cl_sad_lambda_qf = ps_frm_lamda->i4_cl_sad_lambda_qf;
999     ps_master_ctxt->s_frm_prms.i4_cl_satd_lambda_qf = ps_frm_lamda->i4_cl_satd_lambda_qf;
1000     ps_master_ctxt->s_frm_prms.i4_ol_sad_lambda_qf = ps_frm_lamda->i4_ol_sad_lambda_qf;
1001     ps_master_ctxt->s_frm_prms.i4_ol_satd_lambda_qf = ps_frm_lamda->i4_ol_satd_lambda_qf;
1002     ps_master_ctxt->s_frm_prms.lambda_q_shift = LAMBDA_Q_SHIFT;
1003 
1004     ps_master_ctxt->s_frm_prms.pf_interp_fxn = NULL;
1005 
1006     /*************************************************************************/
1007     /* If num ref is 0, that means that it has to be coded as I. Do nothing  */
1008     /* However mv bank update needs to happen with "intra" mv.               */
1009     /*************************************************************************/
1010     if(ps_master_ctxt->s_ref_map.i4_num_ref == 0 || ps_master_ctxt->s_frm_prms.is_i_pic)
1011     {
1012         for(i = 1; i < ps_thrd0_ctxt->num_layers; i++)
1013         {
1014             layer_ctxt_t *ps_layer_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[i];
1015             BLK_SIZE_T e_blk_size;
1016             S32 use_4x4;
1017 
1018             /* The mv bank is filled with "intra" mv */
1019             use_4x4 = hme_get_mv_blk_size(
1020                 ps_thrd0_ctxt->s_init_prms.use_4x4,
1021                 i,
1022                 ps_thrd0_ctxt->num_layers,
1023                 ps_thrd0_ctxt->u1_encode[i]);
1024             e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8;
1025             hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]);
1026             hme_fill_mvbank_intra(ps_layer_ctxt);
1027 
1028             /* Clear out the global mvs */
1029             memset(
1030                 ps_layer_ctxt->s_global_mv,
1031                 0,
1032                 sizeof(hme_mv_t) * ps_thrd0_ctxt->max_num_ref * NUM_GMV_LOBES);
1033         }
1034 
1035         return;
1036     }
1037 
1038     /*************************************************************************/
1039     /* Coarse & refine Layer frm init (layer mem is common across thrds)     */
1040     /*************************************************************************/
1041     {
1042         coarse_prms_t s_coarse_prms;
1043         refine_prms_t s_refine_prms;
1044         S16 i2_max;
1045         S32 layer_id;
1046 
1047         layer_id = ps_thrd0_ctxt->num_layers - 1;
1048         i2_max = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
1049         i2_max = MAX(i2_max, ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
1050         s_coarse_prms.i4_layer_id = layer_id;
1051 
1052         {
1053             S32 log_start_step;
1054             /* Based on Preset, set the starting step size for Refinement */
1055             if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
1056             {
1057                 log_start_step = 0;
1058             }
1059             else
1060             {
1061                 log_start_step = 1;
1062             }
1063             s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
1064             s_coarse_prms.i4_start_step = 1 << log_start_step;
1065         }
1066         s_coarse_prms.i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref;
1067         s_coarse_prms.do_full_search = 1;
1068         s_coarse_prms.num_results = ps_thrd0_ctxt->max_num_results_coarse;
1069 
1070         hme_coarse_frm_init(ps_thrd0_ctxt, &s_coarse_prms);
1071 
1072         layer_id--;
1073 
1074         /*************************************************************************/
1075         /* This loop will run for all refine layers (non- encode layers)          */
1076         /*************************************************************************/
1077         while(layer_id > 0)
1078         {
1079             layer_ctxt_t *ps_curr_layer;
1080             layer_ctxt_t *ps_coarse_layer;
1081 
1082             ps_coarse_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id + 1];
1083 
1084             ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id];
1085 
1086             hme_set_refine_prms(
1087                 &s_refine_prms,
1088                 ps_thrd0_ctxt->u1_encode[layer_id],
1089                 ps_master_ctxt->s_ref_map.i4_num_ref,
1090                 layer_id,
1091                 ps_thrd0_ctxt->num_layers,
1092                 ps_thrd0_ctxt->num_layers_explicit_search,
1093                 ps_thrd0_ctxt->s_init_prms.use_4x4,
1094                 &ps_master_ctxt->s_frm_prms,
1095                 NULL,
1096                 &ps_thrd0_ctxt->s_init_prms.s_me_coding_tools);
1097 
1098             hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer);
1099 
1100             layer_id--;
1101         }
1102     }
1103 
1104     return;
1105 }
1106 
1107 /*!
1108 ******************************************************************************
1109 * \if Function name : ihevce_decomp_pre_intra_frame_init \endif
1110 *
1111 * \brief
1112 *    Frame Intialization for Decomp intra pre analysis.
1113 *
1114 * \param[in] pv_ctxt : pointer to module ctxt
1115 * \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers
1116 * \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides
1117 *
1118 * \return
1119 *    None
1120 *
1121 * \author
1122 *  Ittiam
1123 *
1124 *****************************************************************************
1125 */
ihevce_coarse_me_get_lyr_buf_desc(void * pv_me_ctxt,UWORD8 ** ppu1_decomp_lyr_bufs,WORD32 * pi4_lyr_buf_stride)1126 WORD32 ihevce_coarse_me_get_lyr_buf_desc(
1127     void *pv_me_ctxt, UWORD8 **ppu1_decomp_lyr_bufs, WORD32 *pi4_lyr_buf_stride)
1128 {
1129     /* local variables */
1130     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
1131     coarse_me_ctxt_t *ps_thrd0_ctxt;
1132     WORD32 lyr_no;
1133     layers_descr_t *ps_curr_descr;
1134     WORD32 i4_free_idx;
1135 
1136     /* All processing done using shared / common memory across */
1137     /* threads is done using thrd0  ctxt */
1138     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
1139 
1140     /* Obtain an empty layer descriptor */
1141     i4_free_idx = hme_coarse_find_free_descr_idx((void *)ps_thrd0_ctxt);
1142 
1143     ps_curr_descr = &ps_thrd0_ctxt->as_ref_descr[i4_free_idx];
1144 
1145     /* export all the layer buffers except Layer 0 (encode layer) */
1146     for(lyr_no = 1; lyr_no < ps_thrd0_ctxt->num_layers; lyr_no++)
1147     {
1148         pi4_lyr_buf_stride[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->i4_inp_stride;
1149         ppu1_decomp_lyr_bufs[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->pu1_inp;
1150     }
1151 
1152     return (i4_free_idx);
1153 }
1154 
1155 /*!
1156 ******************************************************************************
1157 * \if Function name : ihevce_coarse_me_get_lyr_prms_job_que \endif
1158 *
1159 * \brief Returns to the caller key attributes related to dependency between layers
1160 *          for multi-thread execution
1161 *
1162 *
1163 * \par Description:
1164 *    This function requires the precondition that the width and ht of encode
1165 *    layer is known, and ME API ihevce_me_set_resolution() API called with
1166 *    this info. Based on this, ME populates useful information for the encoder
1167 *    to execute the multi-thread (concurrent across layers) in this API.
1168 *    The number of layers, number of vertical units in each layer, and for
1169 *    each vertial unit in each layer, its dependency on previous layer's units
1170 *    From ME's perspective, a vertical unit is one which is smallest min size
1171 *    vertically (and spans the entire row horizontally). This is CTB for encode
1172 *    layer, and 8x8 / 4x4 for non encode layers.
1173 *
1174 * \param[in] pv_ctxt : ME handle
1175 * \param[in] ps_curr_inp : Input buffer descriptor
1176 * \param[out] pi4_num_hme_lyrs : Num of HME layers (ME updates)
1177 * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
1178 *                     entry has num vertical units in that particular layer
1179 * \param[in] ps_me_job_q_prms : Array of job queue prms, one for each unit in a
1180 *                 layer. Note that this is contiguous in order of processing
1181 *                 All k units of layer N-1 from top to bottom, followed by
1182 *                 all m units of layer N-2 .... ends with X units of layer 0
1183 *
1184 * \return
1185 *    None
1186 *
1187 * \author
1188 *  Ittiam
1189 *
1190 *****************************************************************************
1191 */
ihevce_coarse_me_get_lyr_prms_job_que(void * pv_me_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,WORD32 * pi4_num_hme_lyrs,WORD32 * pi4_num_vert_units_in_lyr,multi_thrd_me_job_q_prms_t * ps_me_job_q_prms)1192 void ihevce_coarse_me_get_lyr_prms_job_que(
1193     void *pv_me_ctxt,
1194     ihevce_lap_enc_buf_t *ps_curr_inp,
1195     WORD32 *pi4_num_hme_lyrs,
1196     WORD32 *pi4_num_vert_units_in_lyr,
1197     multi_thrd_me_job_q_prms_t *ps_me_job_q_prms)
1198 {
1199     coarse_me_ctxt_t *ps_ctxt;
1200     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
1201 
1202     /* These arrays and ptrs track input dependencies for units of a layer */
1203     /* This is a ping poing design, while using one part, we update other part */
1204     U08 au1_inp_dep[2][MAX_NUM_VERT_UNITS_FRM];
1205     U08 *pu1_inp_dep_c, *pu1_inp_dep_n;
1206 
1207     /* Height of current and next layers */
1208     S32 ht_c, ht_n;
1209 
1210     /* Blk ht at a given layer and next layer*/
1211     S32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
1212 
1213     /* Number of vertical units in current and next layer */
1214     S32 num_vert_c, num_vert_n;
1215 
1216     S32 ctb_size = 64, num_layers, i, j, k;
1217 
1218     /* since same layer desc pointer is stored in all thread ctxt */
1219     /* a free idx is obtained using 0th thread ctxt pointer */
1220     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
1221 
1222     /* Set the number of layers */
1223     num_layers = ps_ctxt->num_layers;
1224     *pi4_num_hme_lyrs = num_layers;
1225 
1226     pu1_inp_dep_c = &au1_inp_dep[0][0];
1227     pu1_inp_dep_n = &au1_inp_dep[1][0];
1228 
1229     ASSERT(num_layers >= 2);
1230 
1231     ht_n = ps_ctxt->a_ht[num_layers - 2];
1232     ht_c = ps_ctxt->a_ht[num_layers - 1];
1233 
1234     /* compute blk ht and unit ht for c and n */
1235     if(ps_ctxt->u1_encode[num_layers - 1])
1236     {
1237         blk_ht_c = 16;
1238         unit_ht_c = ctb_size;
1239     }
1240     else
1241     {
1242         blk_ht_c = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, num_layers - 1, num_layers, 0);
1243         unit_ht_c = blk_ht_c;
1244     }
1245 
1246     num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
1247 
1248     /* For new design in Coarsest HME layer we need */
1249     /* one additional row extra at the end of frame */
1250     /* hence num_vert_c is incremented by 1         */
1251     num_vert_c++;
1252 
1253     /* Dummy initialization outside loop, not used first time */
1254     memset(pu1_inp_dep_c, 0, num_vert_c);
1255 
1256     /*************************************************************************/
1257     /* Run through each layer, set the number of vertical units and job queue*/
1258     /* attrs for each vert unit in the layer                                 */
1259     /*************************************************************************/
1260     for(i = num_layers - 1; i > 0; i--)
1261     {
1262         /* 0th entry is actually layer id num_layers - 1 */
1263         /* and entry num_layers-1 equals the biggest layer (id = 0) */
1264         pi4_num_vert_units_in_lyr[num_layers - 1 - i] = num_vert_c;
1265         /* "n" is computed for first time */
1266         ht_n = ps_ctxt->a_ht[i - 1];
1267         blk_ht_n = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, i - 1, num_layers, 0);
1268         unit_ht_n = blk_ht_n;
1269         if(ps_ctxt->u1_encode[i - 1])
1270             unit_ht_n = ctb_size;
1271 
1272         num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
1273         /* Initialize all units' inp dep in next layer to 0 */
1274         memset(pu1_inp_dep_n, 0, num_vert_n * sizeof(U08));
1275 
1276         /* Evaluate dependencies for this layer */
1277         for(j = 0; j < num_vert_c; j++)
1278         {
1279             S32 v1, v2;
1280 
1281             /* Output dependencies. When one unit in current layer finishes, */
1282             /* how many in the next layer it affects?. Assuming that the top */
1283             /* of this vertical unit and bottom of this vertical unit project*/
1284             /* somewhere in the next layer. The top of this vertical unit    */
1285             /* becomes the bottom right point for somebody, and the bottom of*/
1286             /* this vertical unit becomes the colocated pt for somebody, this*/
1287             /* is the extremum.                                              */
1288 
1289             /* for the initial unit affected by j in "c" layer, take j-1th   */
1290             /* unit top and project it.                                      */
1291             v1 = (j - 1) * unit_ht_c * ht_n;
1292             v1 /= (ht_c * unit_ht_n);
1293             v1 -= 1;
1294 
1295             /* for the final unit affected by j in "c" layer, take jth unit  */
1296             /* bottom and project it.                                        */
1297 
1298             v2 = (j + 1) * unit_ht_c * ht_n;
1299             v2 /= (ht_c * unit_ht_n);
1300             v2 += 1;
1301 
1302             /* Clip to be within valid limits */
1303             v1 = HME_CLIP(v1, 0, (num_vert_n - 1));
1304             v2 = HME_CLIP(v2, 0, (num_vert_n - 1));
1305 
1306             /* In the layer "n", units starting at offset v1, and upto v2 are*/
1307             /* dependent on unit j of layer "c". So for each of these units  */
1308             /* increment the dependency by 1 corresponding to "jth" unit in  */
1309             /* layer "c"                                                     */
1310             ps_me_job_q_prms->i4_num_output_dep = v2 - v1 + 1;
1311             ASSERT(ps_me_job_q_prms->i4_num_output_dep <= MAX_OUT_DEP);
1312             for(k = v1; k <= v2; k++)
1313                 pu1_inp_dep_n[k]++;
1314 
1315             /* Input dependency would have been calculated in prev run */
1316             ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j];
1317             ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP);
1318 
1319             /* Offsets */
1320             for(k = v1; k <= v2; k++)
1321                 ps_me_job_q_prms->ai4_out_dep_unit_off[k - v1] = k;
1322 
1323             ps_me_job_q_prms++;
1324         }
1325 
1326         /* Compute the blk size and vert unit size in each layer             */
1327         /* "c" denotes curr layer, and "n" denotes the layer to which result */
1328         /* is projected to                                                   */
1329         ht_c = ht_n;
1330         blk_ht_c = blk_ht_n;
1331         unit_ht_c = unit_ht_n;
1332         num_vert_c = num_vert_n;
1333 
1334         /* Input dep count for next layer was computed this iteration. */
1335         /* Swap so that p_inp_dep_n becomes current for next iteration, */
1336         /* and p_inp_dep_c will become update area during next iteration */
1337         /* for next to next.                                             */
1338         {
1339             U08 *pu1_tmp = pu1_inp_dep_n;
1340             pu1_inp_dep_n = pu1_inp_dep_c;
1341             pu1_inp_dep_c = pu1_tmp;
1342         }
1343     }
1344 
1345     /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
1346 
1347     /* set the numebr of vertical units */
1348     pi4_num_vert_units_in_lyr[num_layers - 1] = num_vert_c;
1349     for(j = 0; j < num_vert_c; j++)
1350     {
1351         /* Here there is no output dependency for ME. However this data is used for encode, */
1352         /* and there is a 1-1 correspondence between this and the encode     */
1353         /* Hence we set output dependency of 1 */
1354         ps_me_job_q_prms->i4_num_output_dep = 1;
1355         ps_me_job_q_prms->ai4_out_dep_unit_off[0] = j;
1356         ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j];
1357         ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP);
1358         ps_me_job_q_prms++;
1359     }
1360 
1361     return;
1362 }
1363 
1364 /*!
1365 ******************************************************************************
1366 * \if Function name : ihevce_coarse_me_set_lyr1_mv_bank \endif
1367 *
1368 * \brief
1369 *    Frame level ME initialisation of MV bank of penultimate layer
1370 *
1371 * \par Description:
1372 *    Updates the Layer1 context with the given buffers
1373 *
1374 * \param[in] pv_me_ctxt : pointer to ME module
1375 * \param[in] pu1_mv_bank : MV bank buffer pointer
1376 * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer
1377 *
1378 * \return
1379 *    None
1380 *
1381 * \author
1382 *  Ittiam
1383 *
1384 *****************************************************************************
1385 */
ihevce_coarse_me_set_lyr1_mv_bank(void * pv_me_ctxt,ihevce_lap_enc_buf_t * ps_enc_lap_inp,void * pv_mv_bank,void * pv_ref_idx_bank,WORD32 i4_curr_idx)1386 void ihevce_coarse_me_set_lyr1_mv_bank(
1387     void *pv_me_ctxt,
1388     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
1389     void *pv_mv_bank,
1390     void *pv_ref_idx_bank,
1391     WORD32 i4_curr_idx)
1392 {
1393     coarse_me_ctxt_t *ps_thrd0_ctxt;
1394     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
1395     layer_ctxt_t *ps_lyr1_ctxt;
1396 
1397     /* Input descriptor that is updated and passed to ME */
1398     hme_inp_desc_t s_inp_desc;
1399 
1400     /*************************************************************************/
1401     /* Add the current input to ME's DPB. This will also create the pyramids */
1402     /* for the HME layers tha are not "encoded".                             */
1403     /*************************************************************************/
1404     s_inp_desc.i4_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
1405     s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf;
1406     s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf;
1407     s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf;
1408 
1409     s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd;
1410     s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd;
1411 
1412     hme_coarse_add_inp(pv_me_ctxt, &s_inp_desc, i4_curr_idx);
1413 
1414     /* All processing done using shared / common memory across */
1415     /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */
1416     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
1417 
1418     ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1];
1419 
1420     /* register the mv bank & ref idx bank pointer */
1421     ps_lyr1_ctxt->ps_layer_mvbank->pi1_ref_idx_base = (S08 *)pv_ref_idx_bank;
1422     ps_lyr1_ctxt->ps_layer_mvbank->ps_mv_base = (hme_mv_t *)pv_mv_bank;
1423 
1424     return;
1425 }
1426 
1427 /*!
1428 ******************************************************************************
1429 * \if Function name : ihevce_coarse_me_get_lyr1_ctxt \endif
1430 *
1431 * \brief
1432 *    function to get teh Layer 1 properties to be passed on the encode layer
1433 *
1434 * \par Description:
1435 *    Ucopies the enitre layer ctxt emory to the destination
1436 *
1437 * \param[in] pv_me_ctxt : pointer to ME module
1438 * \param[in] pu1_mv_bank : MV bank buffer pointer
1439 * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer
1440 *
1441 * \return
1442 *    None
1443 *
1444 * \author
1445 *  Ittiam
1446 *
1447 *****************************************************************************
1448 */
ihevce_coarse_me_get_lyr1_ctxt(void * pv_me_ctxt,void * pv_layer_ctxt,void * pv_layer_mv_bank_ctxt)1449 void ihevce_coarse_me_get_lyr1_ctxt(
1450     void *pv_me_ctxt, void *pv_layer_ctxt, void *pv_layer_mv_bank_ctxt)
1451 {
1452     coarse_me_ctxt_t *ps_thrd0_ctxt;
1453     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
1454     layer_ctxt_t *ps_lyr1_ctxt;
1455 
1456     /* All processing done using shared / common memory across */
1457     /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */
1458     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
1459 
1460     /* get the context of layer 1 */
1461     ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1];
1462 
1463     /* copy the layer ctxt eve registerd mv bank & ref idx bank also goes in */
1464     memcpy(pv_layer_ctxt, ps_lyr1_ctxt, sizeof(layer_ctxt_t));
1465 
1466     /* copy the layer mv bank contents */
1467     memcpy(pv_layer_mv_bank_ctxt, ps_lyr1_ctxt->ps_layer_mvbank, sizeof(layer_mv_t));
1468 
1469     /* register the MV bank pointer in the layer ctxt*/
1470     ((layer_ctxt_t *)pv_layer_ctxt)->ps_layer_mvbank = (layer_mv_t *)pv_layer_mv_bank_ctxt;
1471 
1472     return;
1473 }
1474