1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_pass.c
24 *
25 * \brief
26 *    This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 *    18/09/2012
30 *
31 * \author
32 *    Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40 
41 /*****************************************************************************/
42 /* File Includes                                                             */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52 
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57 
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61 
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81 #include "ihevc_quant_tables.h"
82 
83 #include "ihevce_defs.h"
84 #include "ihevce_hle_interface.h"
85 #include "ihevce_lap_enc_structs.h"
86 #include "ihevce_multi_thrd_structs.h"
87 #include "ihevce_multi_thrd_funcs.h"
88 #include "ihevce_me_common_defs.h"
89 #include "ihevce_had_satd.h"
90 #include "ihevce_error_codes.h"
91 #include "ihevce_bitstream.h"
92 #include "ihevce_cabac.h"
93 #include "ihevce_rdoq_macros.h"
94 #include "ihevce_function_selector.h"
95 #include "ihevce_enc_structs.h"
96 #include "ihevce_entropy_structs.h"
97 #include "ihevce_cmn_utils_instr_set_router.h"
98 #include "ihevce_ipe_instr_set_router.h"
99 #include "ihevce_decomp_pre_intra_structs.h"
100 #include "ihevce_decomp_pre_intra_pass.h"
101 #include "ihevce_enc_loop_structs.h"
102 #include "ihevce_nbr_avail.h"
103 #include "ihevce_enc_loop_utils.h"
104 #include "ihevce_sub_pic_rc.h"
105 #include "ihevce_global_tables.h"
106 #include "ihevce_bs_compute_ctb.h"
107 #include "ihevce_cabac_rdo.h"
108 #include "ihevce_deblk.h"
109 #include "ihevce_frame_process.h"
110 #include "ihevce_rc_enc_structs.h"
111 #include "hme_datatype.h"
112 #include "hme_interface.h"
113 #include "hme_common_defs.h"
114 #include "hme_defs.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128 
129 #include "cast_types.h"
130 #include "osal.h"
131 #include "osal_defaults.h"
132 
133 /*****************************************************************************/
134 /* Globals                                                                   */
135 /*****************************************************************************/
136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137 
138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139 
140 /*****************************************************************************/
141 /* Constant Macros                                                           */
142 /*****************************************************************************/
143 #define UPDATE_QP_AT_CTB 6
144 #define INTRAPRED_SIMD_LEFT_PADDING 16
145 #define INTRAPRED_SIMD_RIGHT_PADDING 8
146 
147 /*****************************************************************************/
148 /* Function Definitions                                                      */
149 /*****************************************************************************/
150 
151 /*!
152 ******************************************************************************
153 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
154 *
155 * \brief
156 *    This function copy the right data of CTB to context buffers
157 *
158 * \date
159 *    18/09/2012
160 *
161 * \author
162 *    Ittiam
163 *
164 * \return
165 *
166 * List of Functions
167 *
168 *
169 ******************************************************************************
170 */
ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms)171 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
172 {
173     /* ------------------------------------------------------------------ */
174     /* copy the right coloum data to the context buffers                  */
175     /* ------------------------------------------------------------------ */
176 
177     nbr_4x4_t *ps_left_nbr;
178     nbr_4x4_t *ps_nbr;
179     UWORD8 *pu1_buff;
180     WORD32 num_pels;
181     UWORD8 *pu1_luma_left, *pu1_chrm_left;
182 
183     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
184 
185     pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
186     pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
187     ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
188 
189     /* copy right luma data */
190     pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
191 
192     for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
193     {
194         WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
195 
196         pu1_luma_left[num_pels] = pu1_buff[i4_indx];
197     }
198 
199     /* copy right chroma data */
200     pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
201 
202     for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
203     {
204         WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
205 
206         *pu1_chrm_left++ = pu1_buff[i4_indx];
207         *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
208     }
209 
210     /* store the nbr 4x4 data at ctb level */
211     {
212         WORD32 ctr;
213         WORD32 nbr_strd;
214 
215         nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
216 
217         /* copy right nbr data */
218         ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
219         ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
220 
221         for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
222         {
223             WORD32 i4_indx = nbr_strd * ctr;
224 
225             ps_left_nbr[ctr] = ps_nbr[i4_indx];
226         }
227     }
228     return;
229 }
230 
231 /*!
232 ******************************************************************************
233 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
234 *
235 * \brief
236 *   Mark all modes for inter/intra for evaluation. This function will be
237 *   called by ref instance
238 *
239 * \param[in] pv_ctxt : pointer to enc_loop module
240 * \param[in] ps_cu_analyse : pointer to cu analyse
241 *
242 * \return
243 *    None
244 *
245 * \author
246 *  Ittiam
247 *
248 *****************************************************************************
249 */
ihevce_mark_all_modes_to_evaluate(void * pv_ctxt,cu_analyse_t * ps_cu_analyse)250 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
251 {
252     UWORD8 ctr;
253     WORD32 i4_part;
254 
255     (void)pv_ctxt;
256     /* run a loop over all Inter cands */
257     for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
258     {
259         ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
260     }
261 
262     /* run a loop over all intra candidates */
263     if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
264     {
265         for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
266         {
267             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
268             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
269 
270             for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
271             {
272                 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
273             }
274         }
275     }
276 }
277 
278 /*!
279 ******************************************************************************
280 * \if Function name : ihevce_cu_mode_decide \endif
281 *
282 * \brief
283 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
284 *
285 * \param[in] ps_ctxt : pointer to enc_loop module
286 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
287 * \param[in] ps_cu_analyse : pointer to cu analyse
288 * \param[out] ps_cu_final : pointer to cu final
289 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
290 * \param[out]ps_row_col_pu; colocated pu buffer pointer
291 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
292 * \param[in]col_start_pu_idx : pu index start value
293 *
294 * \return
295 *    None
296 *
297 *
298 * \author
299 *  Ittiam
300 *
301 *****************************************************************************
302 */
ihevce_cu_mode_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cu_analyse_t * ps_cu_analyse,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_ecd_data,pu_col_mv_t * ps_col_pu,UWORD8 * pu1_col_pu_map,WORD32 col_start_pu_idx)303 LWORD64 ihevce_cu_mode_decide(
304     ihevce_enc_loop_ctxt_t *ps_ctxt,
305     enc_loop_cu_prms_t *ps_cu_prms,
306     cu_analyse_t *ps_cu_analyse,
307     final_mode_state_t *ps_final_mode_state,
308     UWORD8 *pu1_ecd_data,
309     pu_col_mv_t *ps_col_pu,
310     UWORD8 *pu1_col_pu_map,
311     WORD32 col_start_pu_idx)
312 {
313     enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
314     cu_nbr_prms_t s_cu_nbr_prms;
315     inter_cu_mode_info_t s_inter_cu_mode_info;
316     cu_inter_cand_t *ps_best_inter_cand = NULL;
317     UWORD8 *pu1_cu_top;
318     UWORD8 *pu1_cu_top_left;
319     UWORD8 *pu1_cu_left;
320     UWORD8 *pu1_final_recon = NULL;
321     UWORD8 *pu1_curr_src = NULL;
322     void *pv_curr_src = NULL;
323     void *pv_cu_left = NULL;
324     void *pv_cu_top = NULL;
325     void *pv_cu_top_left = NULL;
326 
327     WORD32 cu_left_stride = 0;
328     WORD32 ctr;
329     WORD32 rd_opt_best_idx;
330     LWORD64 rd_opt_least_cost;
331     WORD32 rd_opt_curr_idx;
332     WORD32 num_4x4_in_ctb;
333     WORD32 nbr_4x4_left_strd = 0;
334 
335     nbr_4x4_t *ps_topleft_nbr_4x4;
336     nbr_4x4_t *ps_left_nbr_4x4 = NULL;
337     nbr_4x4_t *ps_top_nbr_4x4 = NULL;
338     nbr_4x4_t *ps_curr_nbr_4x4;
339     WORD32 enable_intra_eval_flag;
340     WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
341     WORD32 curr_cu_pos_in_row;
342     WORD32 cu_top_right_offset;
343     WORD32 cu_top_right_dep_pos;
344     WORD32 i4_ctb_x_off, i4_ctb_y_off;
345 
346     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
347     (void)ps_final_mode_state;
348     /* default init */
349     rd_opt_least_cost = MAX_COST_64;
350     ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
351     ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
352 
353     /* Zero cbf tool is enabled by default for all presets */
354     ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
355 
356     rd_opt_best_idx = 1;
357     rd_opt_curr_idx = 0;
358     enable_intra_eval_flag = 1;
359 
360     /* CU params in enc ctxt*/
361     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
362     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
363     ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
364 
365     num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
366     ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
367     ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
368     ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
369 
370     /* CB and Cr are pixel interleaved */
371     s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
372 
373     s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
374 
375     if(!ps_ctxt->u1_is_input_data_hbd)
376     {
377         /* --------------------------------------- */
378         /* ----- Luma Pointers Derivation -------- */
379         /* --------------------------------------- */
380 
381         /* based on CU position derive the pointers */
382         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
383 
384         pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
385 
386         pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
387 
388         pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
389 
390         pv_curr_src = pu1_curr_src;
391 
392         /* CU left */
393         if(0 == ps_cu_analyse->b3_cu_pos_x)
394         {
395             /* CTB boundary */
396             pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
397             pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
398             cu_left_stride = 1;
399 
400             ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
401             ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
402             nbr_4x4_left_strd = 1;
403         }
404         else
405         {
406             /* inside CTB */
407             pu1_cu_left = pu1_final_recon - 1;
408             cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
409 
410             ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
411             nbr_4x4_left_strd = num_4x4_in_ctb;
412         }
413 
414         pv_cu_left = pu1_cu_left;
415 
416         /* CU top */
417         if(0 == ps_cu_analyse->b3_cu_pos_y)
418         {
419             /* CTB boundary */
420             pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
421             pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
422             pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
423 
424             ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
425             ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
426             ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
427         }
428         else
429         {
430             /* inside CTB */
431             pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
432 
433             ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
434         }
435 
436         pv_cu_top = pu1_cu_top;
437 
438         /* CU top left */
439         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
440         {
441             /* left ctb boundary but not first row */
442             pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
443             ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
444         }
445         else
446         {
447             /* rest all cases topleft is top -1 */
448             pu1_cu_top_left = pu1_cu_top - 1;
449             ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
450         }
451 
452         pv_cu_top_left = pu1_cu_top_left;
453 
454         /* Store the CU nbr information in the ctxt for final reconstruction fun. */
455         s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
456         s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
457         s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
458         s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
459         s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
460         s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
461         s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
462         s_cu_nbr_prms.cu_left_stride = cu_left_stride;
463 
464         /* ------------------------------------------------------------ */
465         /* -- Initialize the number of neigbour skip cu count for rdo --*/
466         /* ------------------------------------------------------------ */
467         {
468             nbr_avail_flags_t s_nbr;
469             WORD32 i4_num_nbr_skip_cus = 0;
470 
471             /* get the neighbour availability flags for current cu  */
472             ihevce_get_nbr_intra(
473                 &s_nbr,
474                 ps_ctxt->pu1_ctb_nbr_map,
475                 ps_ctxt->i4_nbr_map_strd,
476                 (ps_cu_analyse->b3_cu_pos_x << 1),
477                 (ps_cu_analyse->b3_cu_pos_y << 1),
478                 (ps_cu_analyse->u1_cu_size >> 2));
479             if(s_nbr.u1_top_avail)
480             {
481                 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
482             }
483 
484             if(s_nbr.u1_left_avail)
485             {
486                 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
487             }
488             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
489                 i4_num_nbr_skip_cus;
490             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
491                 i4_num_nbr_skip_cus;
492         }
493 
494         /* --------------------------------------- */
495         /* --- Chroma Pointers Derivation -------- */
496         /* --------------------------------------- */
497 
498         /* based on CU position derive the pointers */
499         s_chrm_cu_buf_prms.pu1_final_recon =
500             ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
501 
502         s_chrm_cu_buf_prms.pu1_curr_src =
503             ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
504 
505         s_chrm_cu_buf_prms.pu1_final_recon +=
506             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
507 
508         s_chrm_cu_buf_prms.pu1_curr_src +=
509             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
510 
511         /* CU left */
512         if(0 == ps_cu_analyse->b3_cu_pos_x)
513         {
514             /* CTB boundary */
515             s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
516             s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
517             s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
518         }
519         else
520         {
521             /* inside CTB */
522             s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
523             s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
524         }
525 
526         /* CU top */
527         if(0 == ps_cu_analyse->b3_cu_pos_y)
528         {
529             /* CTB boundary */
530             s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
531             s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
532             s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
533         }
534         else
535         {
536             /* inside CTB */
537             s_chrm_cu_buf_prms.pu1_cu_top =
538                 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
539         }
540 
541         /* CU top left */
542         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
543         {
544             /* left ctb boundary but not first row */
545             s_chrm_cu_buf_prms.pu1_cu_top_left =
546                 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
547         }
548         else
549         {
550             /* rest all cases topleft is top -2 */
551             s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
552         }
553     }
554 
555     /* Set Variables for Dep. Checking and Setting */
556     i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
557 
558     i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
559     ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
560 
561     /* Set the pred pointer count for ME/intra to 0 to start */
562     ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
563 
564     ASSERT(
565         (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
566 
567     ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
568     s_inter_cu_mode_info.u1_num_inter_cands = 0;
569     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
570     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
571 
572     ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
573     ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
574     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
575     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
576     ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
577     if(0 != ps_cu_analyse->u1_num_inter_cands)
578     {
579         ihevce_inter_cand_sifter_prms_t s_prms;
580 
581         UWORD8 u1_enable_top_row_sync;
582 
583         if(ps_ctxt->u1_disable_intra_eval)
584         {
585             u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
586         }
587         else
588         {
589             u1_enable_top_row_sync = 1;
590         }
591 
592         if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
593         {
594             /* Wait till top data is ready          */
595             /* Currently checking till top right CU */
596             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
597 
598             if(i4_ctb_y_off == 0)
599             {
600                 /* No wait for 1st row */
601                 cu_top_right_offset = -(MAX_CTB_SIZE);
602                 {
603                     ihevce_tile_params_t *ps_col_tile_params =
604                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
605                          ps_ctxt->i4_tile_col_idx);
606                     /* No wait for 1st row */
607                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
608                 }
609                 cu_top_right_dep_pos = 0;
610             }
611             else
612             {
613                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
614                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
615             }
616 
617             if(0 == ps_cu_analyse->b3_cu_pos_y)
618             {
619                 ihevce_dmgr_chk_row_row_sync(
620                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
621                     curr_cu_pos_in_row,
622                     cu_top_right_offset,
623                     cu_top_right_dep_pos,
624                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
625                     ps_ctxt->thrd_id);
626             }
627         }
628 
629         if(ps_ctxt->i1_cu_qp_delta_enable)
630         {
631             ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, 4, 0);
632         }
633 
634         s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
635         s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
636         s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
637         s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
638         s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
639         s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
640         s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
641         s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
642         s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
643         s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
644         s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
645         s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
646         s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
647         s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
648         s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
649         s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
650         s_prms.pv_src = pv_curr_src;
651         s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
652         s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
653         s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
654         s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
655         s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
656         s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
657         s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
658         s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
659         s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
660         s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
661         s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
662         s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
663         s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
664         s_prms.u1_use_merge_cand_from_top_row =
665             (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
666         s_prms.u1_merge_idx_cabac_model =
667             ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
668 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
669         s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
670         s_prms.u1_reuse_me_sad = 1;
671 #else
672         s_prms.u1_reuse_me_sad = 0;
673 #endif
674 
675         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
676         {
677             if(ps_ctxt->i4_temporal_layer == 1)
678             {
679                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
680             }
681             else
682             {
683                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
684             }
685         }
686         else
687         {
688             s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
689         }
690         s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
691 
692         if(s_prms.u1_is_cu_noisy)
693         {
694             s_prms.i4_lambda_qf =
695                 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
696         }
697         s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
698 
699         s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
700 
701         s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
702         ihevce_inter_cand_sifter(&s_prms);
703     }
704     if(u1_is_422)
705     {
706         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
707         UWORD8 u1_num_bufs_allocated;
708 
709         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
710             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
711 
712         ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
713 
714         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
715             ctr++)
716         {
717             {
718                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
719                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
720             }
721 
722             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
723 
724             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
725         }
726 
727         {
728             ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
729                 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
730         }
731 
732         ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
733 
734         ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
735     }
736     else
737     {
738         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
739         UWORD8 u1_num_bufs_allocated;
740 
741         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
742             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
743 
744         ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
745 
746         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
747             ctr++)
748         {
749             {
750                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
751                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
752             }
753 
754             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
755 
756             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
757         }
758     }
759 
760     ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
761 
762     ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
763     ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
764     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
765     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
766     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
767     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
768     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
769     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
770     /* --------------------------------------- */
771     /* ------ Inter RD OPT stage ------------- */
772     /* --------------------------------------- */
773     if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
774     {
775         UWORD8 u1_ssd_bit_info_ctr = 0;
776 
777         /* -- run a loop over all Inter rd opt cands ------ */
778         for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
779         {
780             cu_inter_cand_t *ps_inter_cand;
781 
782             LWORD64 rd_opt_cost = 0;
783 
784             ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
785 
786             if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
787                (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
788             {
789                 ps_inter_cand->b1_eval_mark = 1;
790             }
791 
792             /****************************************************************/
793             /* This check is only valid for derived instances.              */
794             /* check if this mode needs to be evaluated or not.             */
795             /* if it is a skip candidate, go ahead and evaluate it even if  */
796             /* it has not been marked while sorting.                        */
797             /****************************************************************/
798             if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
799             {
800                 continue;
801             }
802 
803             /* RDOPT related copies and settings */
804             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
805 
806             /* RDOPT copy States : Prev Cu best to current init */
807             COPY_CABAC_STATES(
808                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
809                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
810                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
811             /* MVP ,MVD calc and Motion compensation */
812             rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
813                 ps_ctxt,
814                 ps_inter_cand,
815                 ps_cu_analyse->u1_cu_size,
816                 ps_cu_analyse->b3_cu_pos_x,
817                 ps_cu_analyse->b3_cu_pos_y,
818                 ps_left_nbr_4x4,
819                 ps_top_nbr_4x4,
820                 ps_topleft_nbr_4x4,
821                 nbr_4x4_left_strd,
822                 rd_opt_curr_idx);
823 
824 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
825             if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
826             {
827                 ihevce_determine_tu_tree_distribution(
828                     ps_inter_cand,
829                     (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
830                     ps_ctxt->ai2_scratch,
831                     (UWORD8 *)pv_curr_src,
832                     ps_cu_prms->i4_luma_src_stride,
833                     ps_ctxt->i4_satd_lamda,
834                     LAMBDA_Q_SHIFT,
835                     ps_cu_analyse->u1_cu_size,
836                     ps_ctxt->u1_max_tr_depth);
837             }
838 #endif
839 #if DISABLE_ZERO_ZBF_IN_INTER
840             ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
841 #else
842             ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
843 #endif
844             /* Recon loop with different TUs based on partition type*/
845             rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
846                 ps_ctxt,
847                 ps_cu_prms,
848                 pv_curr_src,
849                 ps_cu_analyse->u1_cu_size,
850                 ps_cu_analyse->b3_cu_pos_x,
851                 ps_cu_analyse->b3_cu_pos_y,
852                 rd_opt_curr_idx,
853                 &s_chrm_cu_buf_prms,
854                 ps_inter_cand,
855                 ps_cu_analyse,
856                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
857                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
858                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
859                                              100.0);
860 
861 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
862             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
863             {
864                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
865                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
866                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
867             }
868 #endif
869 
870             /* based on the rd opt cost choose the best and current index */
871             if(rd_opt_cost < rd_opt_least_cost)
872             {
873                 /* swap the best and current indx */
874                 rd_opt_best_idx = !rd_opt_best_idx;
875                 rd_opt_curr_idx = !rd_opt_curr_idx;
876 
877                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
878                 rd_opt_least_cost = rd_opt_cost;
879                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
880 
881                 /* Store the best Inter cand. for final_recon function */
882                 ps_best_inter_cand = ps_inter_cand;
883             }
884 
885             /* set the neighbour map to 0 */
886             ihevce_set_nbr_map(
887                 ps_ctxt->pu1_ctb_nbr_map,
888                 ps_ctxt->i4_nbr_map_strd,
889                 (ps_cu_analyse->b3_cu_pos_x << 1),
890                 (ps_cu_analyse->b3_cu_pos_y << 1),
891                 (ps_cu_analyse->u1_cu_size >> 2),
892                 0);
893 
894         } /* end of loop for all the Inter RD OPT cand */
895     }
896     /* --------------------------------------- */
897     /* ---- Conditional Eval of Intra -------- */
898     /* --------------------------------------- */
899     {
900         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
901         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
902 
903         /* check if inter candidates are valid */
904         if(0 != ps_cu_analyse->u1_num_inter_cands)
905         {
906             /* if skip or no residual inter candidates has won then */
907             /* evaluation of intra candidates is disabled           */
908             if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
909                (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
910             {
911                 enable_intra_eval_flag = 0;
912             }
913         }
914         /* Disable Intra Gating for HIGH QUALITY PRESET */
915 #if !ENABLE_INTRA_GATING_FOR_HQ
916         if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
917         {
918             enable_intra_eval_flag = 1;
919 
920 #if DISABLE_LARGE_INTRA_PQ
921             if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
922                (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
923             {
924                 if(ps_cu_analyse->u1_cu_size > 16)
925                 {
926                     /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
927                     enable_intra_eval_flag = 0;
928                 }
929                 else if(ps_cu_analyse->u1_cu_size == 16)
930                 {
931                     /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
932                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
933                 }
934             }
935 #endif
936         }
937 #endif
938     }
939 
940     /* --------------------------------------- */
941     /* ------ Intra RD OPT stage ------------- */
942     /* --------------------------------------- */
943 
944     /* -- run a loop over all Intra rd opt cands ------ */
945     if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
946     {
947         LWORD64 rd_opt_cost;
948         WORD32 end_flag = 0;
949         WORD32 cu_eval_done = 0;
950         WORD32 subcu_eval_done = 0;
951         WORD32 subpu_eval_done = 0;
952         WORD32 max_trans_size;
953         WORD32 sync_wait_stride;
954         max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
955         sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
956 
957         if(!ps_ctxt->u1_use_top_at_ctb_boundary)
958         {
959             /* Wait till top data is ready          */
960             /* Currently checking till top right CU */
961             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
962 
963             if(i4_ctb_y_off == 0)
964             {
965                 /* No wait for 1st row */
966                 cu_top_right_offset = -(MAX_CTB_SIZE);
967                 {
968                     ihevce_tile_params_t *ps_col_tile_params =
969                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
970                          ps_ctxt->i4_tile_col_idx);
971                     /* No wait for 1st row */
972                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
973                 }
974                 cu_top_right_dep_pos = 0;
975             }
976             else
977             {
978                 cu_top_right_offset = sync_wait_stride;
979                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
980             }
981 
982             if(0 == ps_cu_analyse->b3_cu_pos_y)
983             {
984                 ihevce_dmgr_chk_row_row_sync(
985                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
986                     curr_cu_pos_in_row,
987                     cu_top_right_offset,
988                     cu_top_right_dep_pos,
989                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
990                     ps_ctxt->thrd_id);
991             }
992         }
993         ctr = 0;
994 
995         /* Zero cbf tool is disabled for intra CUs */
996 #if ENABLE_ZERO_CBF_IN_INTRA
997         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
998 #else
999         ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
1000 #endif
1001 
1002         /* Intra Mode gating based on MPM cand list and encoder quality preset */
1003         if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
1004         {
1005             ihevce_mpm_idx_based_filter_RDOPT_cand(
1006                 ps_ctxt,
1007                 ps_cu_analyse,
1008                 ps_left_nbr_4x4,
1009                 ps_top_nbr_4x4,
1010                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1011                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1012 
1013             ihevce_mpm_idx_based_filter_RDOPT_cand(
1014                 ps_ctxt,
1015                 ps_cu_analyse,
1016                 ps_left_nbr_4x4,
1017                 ps_top_nbr_4x4,
1018                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1019                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1020         }
1021 
1022         /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1023         if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1024         {
1025             /* For cu_size = 64, there won't be any TU_EQ_CU case */
1026             if(64 != ps_cu_analyse->u1_cu_size)
1027             {
1028                 /* RDOPT copy States : Prev Cu best to current init */
1029                 COPY_CABAC_STATES(
1030                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1031                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1032                     IHEVC_CAB_CTXT_END);
1033 
1034                 /* RDOPT related copies and settings */
1035                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1036 
1037                 /* Calc. best SATD mode for TU_EQ_CU case */
1038                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1039                     ps_ctxt,
1040                     &s_chrm_cu_buf_prms,
1041                     ps_cu_analyse,
1042                     rd_opt_curr_idx,
1043                     TU_EQ_CU,
1044                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1045                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1046                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1047                                                  100.0,
1048                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1049 
1050 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1051                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1052                 {
1053                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1054                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1055                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1056                 }
1057 #endif
1058             }
1059 
1060             /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1061             TU_EQ_CU_DIV2 case */
1062 
1063             if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1064                 255) &&
1065                (8 != ps_cu_analyse->u1_cu_size))
1066             {
1067                 /* RDOPT copy States : Prev Cu best to current init */
1068                 COPY_CABAC_STATES(
1069                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1070                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1071                     IHEVC_CAB_CTXT_END);
1072 
1073                 /* RDOPT related copies and settings */
1074                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1075 
1076                 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1077                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1078                     ps_ctxt,
1079                     &s_chrm_cu_buf_prms,
1080                     ps_cu_analyse,
1081                     rd_opt_curr_idx,
1082                     TU_EQ_CU_DIV2,
1083                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1084                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1085                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1086                                                  100.0,
1087                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1088 
1089 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1090                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1091                 {
1092                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1093                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1094                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1095                 }
1096 #endif
1097             }
1098         }
1099 
1100         while(0 == end_flag)
1101         {
1102             UWORD8 *pu1_mode = NULL;
1103             WORD32 curr_func_mode = 0;
1104             void *pv_pred;
1105 
1106             ASSERT(ctr < 36);
1107 
1108             /* TU equal to CU size evaluation of different modes */
1109             if(0 == cu_eval_done)
1110             {
1111                 /* check if the all the modes have been evaluated */
1112                 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1113                 {
1114                     cu_eval_done = 1;
1115                     ctr = 0;
1116                 }
1117                 else if(
1118                     (1 == ctr) &&
1119                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1120                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1121                     (ps_ctxt->i1_slice_type != ISLICE))
1122                 {
1123                     ctr = 0;
1124                     cu_eval_done = 1;
1125                     subcu_eval_done = 1;
1126                     subpu_eval_done = 1;
1127                 }
1128                 else
1129                 {
1130                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1131                     {
1132                         ctr++;
1133                         continue;
1134                     }
1135 
1136                     pu1_mode =
1137                         &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1138                     ctr++;
1139                     curr_func_mode = TU_EQ_CU;
1140                 }
1141             }
1142             /* Sub CU (NXN) mode evaluation of different pred modes */
1143             if((0 == subpu_eval_done) && (1 == cu_eval_done))
1144             {
1145                 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1146                 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1147                 {
1148                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1149 
1150                     curr_func_mode = TU_EQ_SUBCU;
1151                     /* check if the any modes have to be evaluated */
1152                     if(255 == *pu1_mode)
1153                     {
1154                         subpu_eval_done = 1;
1155                         ctr = 0;
1156                     }
1157                     else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1158                     {
1159                         subpu_eval_done = 1;
1160                         ctr = 0;
1161                     }
1162                     else
1163                     {
1164                         ctr++;
1165                     }
1166                 }
1167             }
1168 
1169             /* TU size equal to CU div2 mode evaluation of different pred modes */
1170             if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1171             {
1172                 /* check if the all the modes have been evaluated */
1173                 if(255 ==
1174                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1175                 {
1176                     subcu_eval_done = 1;
1177                 }
1178                 else if(
1179                     (1 == ctr) &&
1180                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1181                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1182                     (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1183                 {
1184                     subcu_eval_done = 1;
1185                 }
1186                 else
1187                 {
1188                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1189                     {
1190                         ctr++;
1191                         continue;
1192                     }
1193 
1194                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1195                                     .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1196 
1197                     ctr++;
1198                     curr_func_mode = TU_EQ_CU_DIV2;
1199                 }
1200             }
1201 
1202             /* check if all CU option have been evalueted */
1203             if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1204             {
1205                 break;
1206             }
1207 
1208             /* RDOPT related copies and settings */
1209             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1210 
1211             /* Assign ME/Intra pred buf. to the current intra cand. since we
1212             are storing pred data for final_reon function */
1213             {
1214                 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1215             }
1216 
1217             /* RDOPT copy States : Prev Cu best to current init */
1218             COPY_CABAC_STATES(
1219                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1220                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1221                 IHEVC_CAB_CTXT_END);
1222 
1223             /* call the function which performs the normative Intra encode */
1224             rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1225                 ps_ctxt,
1226                 ps_cu_prms,
1227                 pv_pred,
1228                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1229                 &s_chrm_cu_buf_prms,
1230                 pu1_mode,
1231                 ps_cu_analyse,
1232                 pv_curr_src,
1233                 pv_cu_left,
1234                 pv_cu_top,
1235                 pv_cu_top_left,
1236                 ps_left_nbr_4x4,
1237                 ps_top_nbr_4x4,
1238                 nbr_4x4_left_strd,
1239                 cu_left_stride,
1240                 rd_opt_curr_idx,
1241                 curr_func_mode,
1242                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1243                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1244                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1245                                              100.0);
1246 
1247 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1248             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1249             {
1250                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1251                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1252                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1253             }
1254 #endif
1255 
1256             /* based on the rd opt cost choose the best and current index */
1257             if(rd_opt_cost < rd_opt_least_cost)
1258             {
1259                 /* swap the best and current indx */
1260                 rd_opt_best_idx = !rd_opt_best_idx;
1261                 rd_opt_curr_idx = !rd_opt_curr_idx;
1262                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1263 
1264                 rd_opt_least_cost = rd_opt_cost;
1265                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1266             }
1267 
1268             if((TU_EQ_SUBCU == curr_func_mode) &&
1269                (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1270                (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1271             {
1272                 UWORD8 au1_tu_eq_cu_div2_modes[4];
1273                 UWORD8 au1_freq_of_mode[4];
1274 
1275                 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1276                 {
1277                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1278                         255;  //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1279                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1280                         255;
1281                 }
1282                 else
1283                 {
1284                     WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1285                         ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1286                         au1_tu_eq_cu_div2_modes,
1287                         au1_freq_of_mode,
1288                         4);
1289 
1290                     if(2 == i4_num_clusters)
1291                     {
1292                         if(au1_freq_of_mode[0] == 3)
1293                         {
1294                             ps_cu_analyse->s_cu_intra_cand
1295                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1296                                 au1_tu_eq_cu_div2_modes[0];
1297                             ps_cu_analyse->s_cu_intra_cand
1298                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1299                         }
1300                         else if(au1_freq_of_mode[1] == 3)
1301                         {
1302                             ps_cu_analyse->s_cu_intra_cand
1303                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1304                                 au1_tu_eq_cu_div2_modes[1];
1305                             ps_cu_analyse->s_cu_intra_cand
1306                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1307                         }
1308                         else
1309                         {
1310                             ps_cu_analyse->s_cu_intra_cand
1311                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1312                                 au1_tu_eq_cu_div2_modes[0];
1313                             ps_cu_analyse->s_cu_intra_cand
1314                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1315                                 au1_tu_eq_cu_div2_modes[1];
1316                             ps_cu_analyse->s_cu_intra_cand
1317                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1318                         }
1319                     }
1320                 }
1321             }
1322 
1323             /* set the neighbour map to 0 */
1324             ihevce_set_nbr_map(
1325                 ps_ctxt->pu1_ctb_nbr_map,
1326                 ps_ctxt->i4_nbr_map_strd,
1327                 (ps_cu_analyse->b3_cu_pos_x << 1),
1328                 (ps_cu_analyse->b3_cu_pos_y << 1),
1329                 (ps_cu_analyse->u1_cu_size >> 2),
1330                 0);
1331         }
1332 
1333     } /* end of Intra RD OPT cand evaluation */
1334 
1335     ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1336     ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1337     ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1338 
1339     /* --------------------------------------- */
1340     /* --------Final mode Recon ---------- */
1341     /* --------------------------------------- */
1342     {
1343         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1344         void *pv_final_pred = NULL;
1345         WORD32 final_pred_strd = 0;
1346         void *pv_final_pred_chrm = NULL;
1347         WORD32 final_pred_strd_chrm = 0;
1348         WORD32 packed_pred_mode;
1349 
1350 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1351         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1352         {
1353             pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1354         }
1355 #else
1356         pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1357 #endif
1358 
1359         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1360         packed_pred_mode =
1361             ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1362 
1363         if(!ps_ctxt->u1_is_input_data_hbd)
1364         {
1365             if(ps_enc_loop_bestprms->u1_intra_flag)
1366             {
1367                 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1368                 final_pred_strd =
1369                     ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1370             }
1371             else
1372             {
1373                 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1374                 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1375             }
1376 
1377             pv_final_pred_chrm =
1378                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1379                 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1380                                    (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1381             final_pred_strd_chrm =
1382                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1383         }
1384 
1385         ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1386 
1387         {
1388             final_mode_process_prms_t s_prms;
1389 
1390             void *pv_cu_luma_recon;
1391             void *pv_cu_chroma_recon;
1392             WORD32 luma_stride, chroma_stride;
1393 
1394             if(!ps_ctxt->u1_is_input_data_hbd)
1395             {
1396 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1397                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1398                 {
1399                     pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1400                     pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1401                     luma_stride = ps_cu_analyse->u1_cu_size;
1402                     chroma_stride = ps_cu_analyse->u1_cu_size;
1403                 }
1404                 else
1405                 {
1406                     /* based on CU position derive the luma pointers */
1407                     pv_cu_luma_recon = pu1_final_recon;
1408 
1409                     /* based on CU position derive the chroma pointers */
1410                     pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1411 
1412                     luma_stride = ps_cu_prms->i4_luma_recon_stride;
1413 
1414                     chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1415                 }
1416 #else
1417                 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1418                 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1419                 luma_stride = ps_cu_analyse->u1_cu_size;
1420                 chroma_stride = ps_cu_analyse->u1_cu_size;
1421 #endif
1422 
1423                 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1424                 s_prms.ps_best_inter_cand = ps_best_inter_cand;
1425                 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1426                 s_prms.packed_pred_mode = packed_pred_mode;
1427                 s_prms.rd_opt_best_idx = rd_opt_best_idx;
1428                 s_prms.pv_src = pu1_curr_src;
1429                 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1430                 s_prms.pv_pred = pv_final_pred;
1431                 s_prms.pred_strd = final_pred_strd;
1432                 s_prms.pv_pred_chrm = pv_final_pred_chrm;
1433                 s_prms.pred_chrm_strd = final_pred_strd_chrm;
1434                 s_prms.pu1_final_ecd_data = pu1_ecd_data;
1435                 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1436                 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1437                 s_prms.pv_luma_recon = pv_cu_luma_recon;
1438                 s_prms.recon_luma_strd = luma_stride;
1439                 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1440                 s_prms.recon_chrma_strd = chroma_stride;
1441                 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1442                 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1443                 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1444                 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1445                 s_prms.u1_will_cabac_state_change = 1;
1446                 s_prms.u1_recompute_sbh_and_rdoq = 0;
1447                 s_prms.u1_is_first_pass = 1;
1448             }
1449 
1450 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1451             s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1452                                         ? ps_cu_prms->u1_is_cu_noisy
1453                                         : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1454 #endif
1455 
1456             ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1457 
1458 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1459             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1460             {
1461                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1462                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1463                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1464             }
1465 #endif
1466         }
1467     }
1468 
1469     /* --------------------------------------- */
1470     /* --------Populate CU out prms ---------- */
1471     /* --------------------------------------- */
1472     {
1473         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1474         UWORD8 *pu1_pu_map;
1475         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1476 
1477         /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1478         /* then it has to be coded as skip CU */
1479         if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1480            (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1481            (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1482         {
1483             ps_enc_loop_bestprms->u1_skip_flag = 1;
1484         }
1485 
1486         /* update number PUs in CU */
1487         ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1488 
1489         /* ---- populate the colocated pu map index --- */
1490         for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1491         {
1492             WORD32 i;
1493             WORD32 vert_ht;
1494             WORD32 horz_wd;
1495 
1496             if(ps_enc_loop_bestprms->u1_intra_flag)
1497             {
1498                 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1499                 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1500                 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1501             }
1502             else
1503             {
1504                 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1505                 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1506             }
1507 
1508             pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1509             pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1510 
1511             for(i = 0; i < vert_ht; i++)
1512             {
1513                 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1514                 pu1_pu_map += num_4x4_in_ctb;
1515             }
1516             /* increment the index */
1517             col_start_pu_idx++;
1518         }
1519         /* ---- copy the colocated PUs to frm pu ----- */
1520         memcpy(
1521             ps_col_pu,
1522             &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1523             ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1524 
1525         /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1526         {
1527             entropy_context_t *ps_entropy_ctxt;
1528 
1529             WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1530 
1531             WORD32 log2_min_cu_qp_delta_size;
1532             UWORD32 block_addr_align;
1533             ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1534 
1535             log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1536             diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1537 
1538             log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1539             block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1540 
1541             ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1542             ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1543             /*Update the Qp value used. It will not have a valid value iff
1544             current CU is (skipped/no_cbf). In that case the Qp needed for
1545             deblocking is calculated from top/left/previous coded CU*/
1546 
1547             ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1548 
1549             if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1550                ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1551             {
1552                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1553             }
1554             else
1555             {
1556                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1557             }
1558         }
1559 
1560         /* -- at the end of CU set the neighbour map to 1 -- */
1561         ihevce_set_nbr_map(
1562             ps_ctxt->pu1_ctb_nbr_map,
1563             ps_ctxt->i4_nbr_map_strd,
1564             (ps_cu_analyse->b3_cu_pos_x << 1),
1565             (ps_cu_analyse->b3_cu_pos_y << 1),
1566             (ps_cu_analyse->u1_cu_size >> 2),
1567             1);
1568 
1569         /* -- at the end of CU update best cabac rdopt states -- */
1570         /* -- and also set the top row skip flags  ------------- */
1571         ihevce_entropy_update_best_cu_states(
1572             &ps_ctxt->s_rdopt_entropy_ctxt,
1573             ps_cu_analyse->b3_cu_pos_x,
1574             ps_cu_analyse->b3_cu_pos_y,
1575             ps_cu_analyse->u1_cu_size,
1576             0,
1577             rd_opt_best_idx);
1578     }
1579 
1580     /* Store Output struct */
1581 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1582     {
1583         {
1584             memcpy(
1585                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1586                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1587                 sizeof(enc_loop_cu_final_prms_t));
1588         }
1589 
1590         memcpy(
1591             &ps_ctxt->as_cu_recur_nbr[0],
1592             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1593             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1594                 (ps_cu_analyse->u1_cu_size >> 2));
1595 
1596         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1597 
1598         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1599     }
1600 #else
1601     if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1602     {
1603         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1604 
1605         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1606 
1607         if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1608         {
1609             /* Wait till top data is ready          */
1610             /* Currently checking till top right CU */
1611             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1612 
1613             if(i4_ctb_y_off == 0)
1614             {
1615                 /* No wait for 1st row */
1616                 cu_top_right_offset = -(MAX_CTB_SIZE);
1617                 {
1618                     ihevce_tile_params_t *ps_col_tile_params =
1619                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1620                          ps_ctxt->i4_tile_col_idx);
1621 
1622                     /* No wait for 1st row */
1623                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1624                 }
1625                 cu_top_right_dep_pos = 0;
1626             }
1627             else
1628             {
1629                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1630                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1631             }
1632 
1633             if(0 == ps_cu_analyse->b3_cu_pos_y)
1634             {
1635                 ihevce_dmgr_chk_row_row_sync(
1636                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1637                     curr_cu_pos_in_row,
1638                     cu_top_right_offset,
1639                     cu_top_right_dep_pos,
1640                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1641                     ps_ctxt->thrd_id);
1642             }
1643         }
1644     }
1645     else
1646     {
1647         {
1648             memcpy(
1649                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1650                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1651                 sizeof(enc_loop_cu_final_prms_t));
1652         }
1653 
1654         memcpy(
1655             &ps_ctxt->as_cu_recur_nbr[0],
1656             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1657             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1658                 (ps_cu_analyse->u1_cu_size >> 2));
1659 
1660         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1661 
1662         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1663     }
1664 #endif
1665 
1666     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1667         ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1668 
1669     return rd_opt_least_cost;
1670 }
1671 
1672 /*!
1673 ******************************************************************************
1674 * \if Function name : ihevce_enc_loop_process_row \endif
1675 *
1676 * \brief
1677 *    Row level enc_loop pass function
1678 *
1679 * \param[in] pv_ctxt : pointer to enc_loop module
1680 * \param[in] ps_curr_src_bufs  : pointer to input yuv buffer (row buffer)
1681 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1682 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1683 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1684 * \param[out] ps_cu_out : pointer CU output structure (row buffer)
1685 * \param[out] ps_tu_out : pointer TU output structure (row buffer)
1686 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1687 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1688 *
1689 * \return
1690 *    None
1691 *
1692 * Note : Currently the frame level calcualtions done assumes that
1693 *        framewidth of the input /recon are excat multiple of ctbsize
1694 *
1695 * \author
1696 *  Ittiam
1697 *
1698 *****************************************************************************
1699 */
ihevce_enc_loop_process_row(ihevce_enc_loop_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src_bufs,iv_enc_yuv_buf_t * ps_curr_recon_bufs,iv_enc_yuv_buf_src_t * ps_curr_recon_bufs_src,UWORD8 ** ppu1_y_subpel_planes,ctb_analyse_t * ps_ctb_in,ctb_enc_loop_out_t * ps_ctb_out,ipe_l0_ctb_analyse_for_me_t * ps_row_ipe_analyse,cur_ctb_cu_tree_t * ps_row_cu_tree,cu_enc_loop_out_t * ps_row_cu,tu_enc_loop_out_t * ps_row_tu,pu_t * ps_row_pu,pu_col_mv_t * ps_row_col_pu,UWORD16 * pu2_num_pu_map,UWORD8 * pu1_row_pu_map,UWORD8 * pu1_row_ecd_data,UWORD32 * pu4_pu_offsets,frm_ctb_ctxt_t * ps_frm_ctb_prms,WORD32 vert_ctr,recon_pic_buf_t * ps_frm_recon,void * pv_dep_mngr_encloop_dep_me,pad_interp_recon_frm_t * ps_pad_interp_recon,WORD32 i4_pass,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,ihevce_tile_params_t * ps_tile_params)1700 void ihevce_enc_loop_process_row(
1701     ihevce_enc_loop_ctxt_t *ps_ctxt,
1702     iv_enc_yuv_buf_t *ps_curr_src_bufs,
1703     iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1704     iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1705     UWORD8 **ppu1_y_subpel_planes,
1706     ctb_analyse_t *ps_ctb_in,
1707     ctb_enc_loop_out_t *ps_ctb_out,
1708     ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1709     cur_ctb_cu_tree_t *ps_row_cu_tree,
1710     cu_enc_loop_out_t *ps_row_cu,
1711     tu_enc_loop_out_t *ps_row_tu,
1712     pu_t *ps_row_pu,
1713     pu_col_mv_t *ps_row_col_pu,
1714     UWORD16 *pu2_num_pu_map,
1715     UWORD8 *pu1_row_pu_map,
1716     UWORD8 *pu1_row_ecd_data,
1717     UWORD32 *pu4_pu_offsets,
1718     frm_ctb_ctxt_t *ps_frm_ctb_prms,
1719     WORD32 vert_ctr,
1720     recon_pic_buf_t *ps_frm_recon,
1721     void *pv_dep_mngr_encloop_dep_me,
1722     pad_interp_recon_frm_t *ps_pad_interp_recon,
1723     WORD32 i4_pass,
1724     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1725     ihevce_tile_params_t *ps_tile_params)
1726 {
1727     enc_loop_cu_prms_t s_cu_prms;
1728     ctb_enc_loop_out_t *ps_ctb_out_dblk;
1729 
1730     WORD32 ctb_ctr, ctb_start, ctb_end;
1731     WORD32 col_pu_map_idx;
1732     WORD32 num_ctbs_horz_pic;
1733     WORD32 ctb_size;
1734     WORD32 last_ctb_row_flag;
1735     WORD32 last_ctb_col_flag;
1736     WORD32 last_hz_ctb_wd;
1737     WORD32 last_vt_ctb_ht;
1738     void *pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1739     void *pv_dep_mngr_enc_loop_sao = ps_ctxt->pv_dep_mngr_enc_loop_sao;
1740     void *pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1741     WORD32 dblk_offset, dblk_check_dep_pos;
1742     WORD32 sao_offset, sao_check_dep_pos;
1743     WORD32 aux_offset, aux_check_dep_pos;
1744     void *pv_dep_mngr_me_dep_encloop;
1745     ctb_enc_loop_out_t *ps_ctb_out_sao;
1746     /*Structure to store deblocking parameters at CTB-row level*/
1747     deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1748     UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1749 
1750     pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1751     num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1752     ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1753 
1754     /* Store the num_ctb_horz in sao context*/
1755     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1756     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1757 
1758     /* Set Variables for Dep. Checking and Setting */
1759     aux_check_dep_pos = vert_ctr;
1760     aux_offset = 2; /* Should be there for 0th row also */
1761     if(vert_ctr > 0)
1762     {
1763         dblk_check_dep_pos = vert_ctr - 1;
1764         dblk_offset = 2;
1765     }
1766     else
1767     {
1768         /* First row should run without waiting */
1769         dblk_check_dep_pos = 0;
1770         dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1771     }
1772 
1773     /* Set sao_offset and sao_check_dep_pos */
1774     if(vert_ctr > 1)
1775     {
1776         sao_check_dep_pos = vert_ctr - 2;
1777         sao_offset = 2;
1778     }
1779     else
1780     {
1781         /* First row should run without waiting */
1782         sao_check_dep_pos = 0;
1783         sao_offset = -(ps_tile_params->i4_first_sample_x + 1);
1784     }
1785 
1786     /* check if the current row processed in last CTb row */
1787     last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1788 
1789     /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1790     last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1791 
1792     /* Valid Height (pixels) in the last CTB row (padding cases) */
1793     last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1794                      ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1795     /* reset the states copied flag */
1796     ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1797     ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1798 
1799     /* populate the cu prms which are common for entire ctb row */
1800     s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1801     s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1802     s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1803     s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1804     s_cu_prms.i4_ctb_size = ctb_size;
1805 
1806     ps_ctxt->i4_is_first_cu_qg_coded = 0;
1807 
1808     /* Initialize the number of PUs for the first CTB to 0 */
1809     *pu2_num_pu_map = 0;
1810 
1811     /*Getting the address of BS and Qp arrays and other info*/
1812     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1813     {
1814         WORD32 num_ctbs_horz_tile;
1815         /* Update the pointers which are accessed not by using ctb_ctr
1816         to the tile start here! */
1817         ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1818         ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1819 
1820         ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1821         ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1822         ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1823         pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1824         pu1_row_ecd_data +=
1825             (ps_tile_params->i4_first_ctb_x *
1826              ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1827                                 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1828              MAX_SCAN_COEFFS_BYTES_4x4);
1829 
1830         /* Update the pointers to the tile start */
1831         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1832             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
1833         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1834             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
1835         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1836 
1837         num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1838 
1839         ctb_start = ps_tile_params->i4_first_ctb_x;
1840         ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1841     }
1842     ps_ctb_out_dblk = ps_ctb_out;
1843 
1844     ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1845 
1846     /* --------- Loop over all the CTBs in a row --------------- */
1847     for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1848     {
1849         cu_final_update_prms s_cu_update_prms;
1850 
1851         cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1852         me_ctb_data_t *ps_cu_me_data;
1853         ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1854         cu_enc_loop_out_t *ps_cu_final;
1855         pu_col_mv_t *ps_ctb_col_pu;
1856 
1857         WORD32 cur_ctb_ht, cur_ctb_wd;
1858         WORD32 last_cu_pos_in_ctb;
1859         WORD32 last_cu_size;
1860         WORD32 num_pus_in_ctb;
1861         UWORD8 u1_is_ctb_noisy;
1862         ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1863 
1864         if(ctb_ctr)
1865         {
1866             ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1867         }
1868         /*If Sup pic rc is enabled*/
1869         if(ps_ctxt->i4_sub_pic_level_rc)
1870         {
1871             ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1872         }
1873         /* check if the current row processed in last CTb row */
1874         last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1875         if(1 == last_ctb_col_flag)
1876         {
1877             cur_ctb_wd = last_hz_ctb_wd;
1878         }
1879         else
1880         {
1881             cur_ctb_wd = ctb_size;
1882         }
1883 
1884         /* If it's the last CTB, get the actual ht of CTB */
1885         if(1 == last_ctb_row_flag)
1886         {
1887             cur_ctb_ht = last_vt_ctb_ht;
1888         }
1889         else
1890         {
1891             cur_ctb_ht = ctb_size;
1892         }
1893 
1894         ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1895         ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1896 
1897         /* Wait till reference frame recon is available */
1898 
1899         /* ------------ Wait till current data is ready from ME -------------- */
1900 
1901         /*only for ref instance and Non I pics */
1902         if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1903            ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1904         {
1905             if(ctb_ctr < (num_ctbs_horz_pic))
1906             {
1907                 ihevce_dmgr_chk_row_row_sync(
1908                     pv_dep_mngr_encloop_dep_me,
1909                     ctb_ctr,
1910                     1,
1911                     vert_ctr,
1912                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1913                     ps_ctxt->thrd_id);
1914             }
1915         }
1916 
1917         /* store the cu pointer for current ctb out */
1918         ps_ctb_out->ps_enc_cu = ps_row_cu;
1919         ps_cu_final = ps_row_cu;
1920 
1921         /* Get the base point of CU recursion tree */
1922         if(ISLICE != ps_ctxt->i1_slice_type)
1923         {
1924             ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1925             ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1926         }
1927         else
1928         {
1929             /* Initialize ptr to current CTB */
1930             ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1931         }
1932 
1933         /* Get the ME data pointer for 16x16 block data in ctb */
1934         ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1935         u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1936         s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1937         s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1938 
1939         /* store the ctb level prms in cu prms */
1940         s_cu_prms.i4_ctb_pos = ctb_ctr;
1941 
1942         s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1943         s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1944 
1945         {
1946             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1947             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1948         }
1949 
1950         s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1951 
1952         s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1953 
1954         s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1955 
1956         /* Initialize ptr to current CTB */
1957         ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr;  // * ctb_size;
1958 
1959         /* reset the map idx for current ctb */
1960         col_pu_map_idx = 0;
1961         num_pus_in_ctb = 0;
1962 
1963         /* reset the map buffer to 0*/
1964 
1965         memset(
1966             &ps_ctxt->au1_nbr_ctb_map[0][0],
1967             0,
1968             (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1969 
1970         /* set the CTB neighbour availability flags */
1971         ihevce_set_ctb_nbr(
1972             &ps_ctb_out->s_ctb_nbr_avail_flags,
1973             ps_ctxt->pu1_ctb_nbr_map,
1974             ps_ctxt->i4_nbr_map_strd,
1975             ctb_ctr,
1976             vert_ctr,
1977             ps_frm_ctb_prms);
1978 
1979         /* -------- update the cur CTB offsets for inter prediction-------- */
1980         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1981         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1982 
1983         /* -------- update the cur CTB offsets for MV prediction-------- */
1984         ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1985         ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1986 
1987         /* -------------- Boundary Strength Initialization ----------- */
1988         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1989         {
1990             ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1991         }
1992 
1993         /* -------- update cur CTB offsets for entropy rdopt context------- */
1994         ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1995 
1996         /* --------- CU Recursion --------------- */
1997 
1998         {
1999 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2000             WORD32 i4_max_tree_depth = 4;
2001 #endif
2002             WORD32 i4_tree_depth = 0;
2003             /* Init no. of CU in CTB to 0*/
2004             ps_ctb_out->u1_num_cus_in_ctb = 0;
2005 
2006 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2007             if(ps_ctxt->i4_bitrate_instance_num == 0)
2008             {
2009                 WORD32 i4_max_tree_depth = 4;
2010                 WORD32 i;
2011                 for(i = 0; i < i4_max_tree_depth; i++)
2012                 {
2013                     COPY_CABAC_STATES(
2014                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2015                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2016                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2017                 }
2018             }
2019 #else
2020             if(ps_ctxt->i4_bitrate_instance_num == 0)
2021             {
2022                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2023                 {
2024                     WORD32 i4_max_tree_depth = 4;
2025                     WORD32 i;
2026                     for(i = 0; i < i4_max_tree_depth; i++)
2027                     {
2028                         COPY_CABAC_STATES(
2029                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2030                             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2031                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2032                     }
2033                 }
2034             }
2035 
2036 #endif
2037             if(ps_ctxt->i4_bitrate_instance_num == 0)
2038             {
2039                 /* FOR I- PIC populate the curr_ctb accordingly */
2040                 if(ISLICE == ps_ctxt->i1_slice_type)
2041                 {
2042                     ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2043                     ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2044 
2045                     ihevce_populate_cu_tree(
2046                         ps_ctb_ipe_analyse,
2047                         ps_cu_tree_analyse,
2048                         0,
2049                         (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2050                         POS_NA,
2051                         POS_NA,
2052                         POS_NA);
2053                 }
2054             }
2055             ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2056             ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2057             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2058 
2059             s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2060             s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2061             s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2062             s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2063             s_cu_update_prms.pps_cu_final = &ps_cu_final;
2064             s_cu_update_prms.pps_row_pu = &ps_row_pu;
2065             s_cu_update_prms.pps_row_tu = &ps_row_tu;
2066             s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2067 
2068             // source satd computation
2069             /* compute the source 8x8 SATD for the current CTB */
2070             /* populate  pui4_source_satd in some structure and pass it inside */
2071             if(ps_ctxt->u1_enable_psyRDOPT)
2072             {
2073                 /* declare local variables */
2074                 WORD32 i;
2075                 WORD32 ctb_size;
2076                 WORD32 num_comp_had_blocks;
2077                 UWORD8 *pu1_l0_block;
2078                 WORD32 block_ht;
2079                 WORD32 block_wd;
2080                 WORD32 ht_offset;
2081                 WORD32 wd_offset;
2082 
2083                 WORD32 num_horz_blocks;
2084                 WORD32 had_block_size;
2085                 WORD32 total_had_block_size;
2086                 WORD16 pi2_residue_had_zscan[64];
2087                 UWORD8 ai1_zeros_buffer[64];
2088 
2089                 WORD32 index_satd;
2090                 WORD32 is_hbd;
2091                 /* initialize the variables */
2092                 block_ht = cur_ctb_ht;
2093                 block_wd = cur_ctb_wd;
2094 
2095                 is_hbd = ps_ctxt->u1_is_input_data_hbd;
2096 
2097                 had_block_size = 8;
2098                 total_had_block_size = had_block_size * had_block_size;
2099 
2100                 for(i = 0; i < total_had_block_size; i++)
2101                 {
2102                     ai1_zeros_buffer[i] = 0;
2103                 }
2104 
2105                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2106                 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2107 
2108                 num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
2109                 ht_offset = -had_block_size;
2110                 wd_offset = -had_block_size;
2111 
2112                 index_satd = 0;
2113                 /*Loop over all 8x8 blocsk in the CTB*/
2114                 for(i = 0; i < num_comp_had_blocks; i++)
2115                 {
2116                     if(i % num_horz_blocks == 0)
2117                     {
2118                         wd_offset = -had_block_size;
2119                         ht_offset += had_block_size;
2120                     }
2121                     wd_offset += had_block_size;
2122 
2123                     if(!is_hbd)
2124                     {
2125                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2126                         pu1_l0_block = s_cu_prms.pu1_luma_src +
2127                                        ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2128 
2129                         ps_ctxt->ai4_source_satd_8x8[index_satd] =
2130 
2131                             ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2132                                 pu1_l0_block,
2133                                 ps_curr_src_bufs->i4_y_strd,
2134                                 ai1_zeros_buffer,
2135                                 had_block_size,
2136                                 pi2_residue_had_zscan,
2137                                 had_block_size);
2138                     }
2139                     index_satd++;
2140                 }
2141             }
2142 
2143             if(ps_ctxt->u1_enable_psyRDOPT)
2144             {
2145                 /* declare local variables */
2146                 WORD32 i;
2147                 WORD32 ctb_size;
2148                 WORD32 num_comp_had_blocks;
2149                 UWORD8 *pu1_l0_block;
2150                 UWORD8 *pu1_l0_block_prev = NULL;
2151                 WORD32 block_ht;
2152                 WORD32 block_wd;
2153                 WORD32 ht_offset;
2154                 WORD32 wd_offset;
2155 
2156                 WORD32 num_horz_blocks;
2157                 WORD32 had_block_size;
2158                 WORD16 pi2_residue_had[64];
2159                 UWORD8 ai1_zeros_buffer[64];
2160                 WORD32 index_satd = 0;
2161 
2162                 WORD32 is_hbd;
2163                 is_hbd = ps_ctxt->u1_is_input_data_hbd;  // 8 bit
2164 
2165                 /* initialize the variables */
2166                 /* change this based ont he bit depth */
2167                 // ps_ctxt->u1_chroma_array_type
2168                 if(ps_ctxt->u1_chroma_array_type == 1)
2169                 {
2170                     block_ht = cur_ctb_ht / 2;
2171                     block_wd = cur_ctb_wd / 2;
2172                 }
2173                 else
2174                 {
2175                     block_ht = cur_ctb_ht;
2176                     block_wd = cur_ctb_wd / 2;
2177                 }
2178 
2179                 had_block_size = 4;
2180                 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2181 
2182                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2183                 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2184 
2185                 num_horz_blocks = 2 * block_wd / had_block_size;  //ctb_width / had_block_size;
2186                 ht_offset = -had_block_size;
2187                 wd_offset = -had_block_size;
2188 
2189                 if(!is_hbd)
2190                 {
2191                     /* loop over for every 4x4 blocks in the CU for Cb */
2192                     for(i = 0; i < num_comp_had_blocks; i++)
2193                     {
2194                         if(i % num_horz_blocks == 0)
2195                         {
2196                             wd_offset = -had_block_size;
2197                             ht_offset += had_block_size;
2198                         }
2199                         wd_offset += had_block_size;
2200 
2201                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2202                         if(i % 2 != 0)
2203                         {
2204                             if(!is_hbd)
2205                             {
2206                                 pu1_l0_block = pu1_l0_block_prev + 1;
2207                             }
2208                         }
2209                         else
2210                         {
2211                             if(!is_hbd)
2212                             {
2213                                 pu1_l0_block = s_cu_prms.pu1_chrm_src +
2214                                                s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2215                                 pu1_l0_block_prev = pu1_l0_block;
2216                             }
2217                         }
2218 
2219                         if(had_block_size == 4)
2220                         {
2221                             if(!is_hbd)
2222                             {
2223                                 ps_ctxt->ai4_source_chroma_satd[index_satd] =
2224                                     ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2225                                         pu1_l0_block,
2226                                         s_cu_prms.i4_chrm_src_stride,
2227                                         ai1_zeros_buffer,
2228                                         had_block_size,
2229                                         pi2_residue_had,
2230                                         had_block_size);
2231                             }
2232 
2233                             index_satd++;
2234 
2235                         }  // block size of 4x4
2236 
2237                     }  // for all blocks
2238 
2239                 }  // is hbd check
2240             }
2241 
2242             ihevce_cu_recurse_decide(
2243                 ps_ctxt,
2244                 &s_cu_prms,
2245                 ps_cu_tree_analyse,
2246                 ps_cu_tree_analyse,
2247                 ps_ctb_ipe_analyse,
2248                 ps_cu_me_data,
2249                 &ps_ctb_col_pu,
2250                 &s_cu_update_prms,
2251                 pu1_row_pu_map,
2252                 &col_pu_map_idx,
2253                 i4_tree_depth,
2254                 ctb_ctr << 6,
2255                 vert_ctr << 6,
2256                 cur_ctb_ht);
2257 
2258             if(ps_ctxt->i1_slice_type != ISLICE)
2259             {
2260                 ASSERT(
2261                     (cur_ctb_wd * cur_ctb_ht) <=
2262                     ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2263             }
2264             /*If Sup pic rc is enabled*/
2265             if(1 == ps_ctxt->i4_sub_pic_level_rc)
2266             {
2267                 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2268                 ihevce_sub_pic_rc_in_data(
2269                     (void *)ps_multi_thrd_ctxt,
2270                     (void *)ps_ctxt,
2271                     (void *)ps_ctb_ipe_analyse,
2272                     (void *)ps_frm_ctb_prms);
2273             }
2274 
2275             ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2276 
2277         } /* End of CU recursion block */
2278 
2279 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2280         {
2281             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2282             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2283             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2284 
2285             do
2286             {
2287                 ihevce_update_final_cu_results(
2288                     ps_ctxt,
2289                     ps_enc_out_ctxt,
2290                     ps_cu_prms,
2291                     NULL, /* &ps_ctb_col_pu */
2292                     NULL, /* &col_pu_map_idx */
2293                     &s_cu_update_prms,
2294                     ctb_ctr,
2295                     vert_ctr);
2296 
2297                 ps_enc_out_ctxt++;
2298 
2299                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2300 
2301             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2302         }
2303 #else
2304         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2305         {
2306             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2307             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2308             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2309 
2310             do
2311             {
2312                 ihevce_update_final_cu_results(
2313                     ps_ctxt,
2314                     ps_enc_out_ctxt,
2315                     ps_cu_prms,
2316                     NULL, /* &ps_ctb_col_pu */
2317                     NULL, /* &col_pu_map_idx */
2318                     &s_cu_update_prms,
2319                     ctb_ctr,
2320                     vert_ctr);
2321 
2322                 ps_enc_out_ctxt++;
2323 
2324                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2325 
2326             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2327         }
2328 #endif
2329 
2330         /* --- ctb level copy of data to left buffers--*/
2331         ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2332 
2333         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2334         {
2335             /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2336             ihevce_bs_clear_invalid(
2337                 &ps_ctxt->s_deblk_bs_prms,
2338                 last_ctb_row_flag,
2339                 (ctb_ctr == (num_ctbs_horz_pic - 1)),
2340                 last_hz_ctb_wd,
2341                 last_vt_ctb_ht);
2342 
2343             /* -----------------Read boundary strengts for current CTB------------- */
2344 
2345             if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2346             {
2347                 /*Storing boundary strengths of current CTB*/
2348                 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2349                 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2350 
2351                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2352                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2353             }
2354             //Increment for storing next CTB info
2355             s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2356                 (ctb_size >> 3);  //one vertical edge per 8x8 block
2357             s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2358                 (ctb_size >> 3);  //one horizontal edge per 8x8 block
2359         }
2360 
2361         /* -------------- ctb level updates ----------------- */
2362         ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2363 
2364         pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2365 
2366         /* first ctb offset will be populated by the caller */
2367         if(0 != ctb_ctr)
2368         {
2369             pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2370         }
2371         pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2372         ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2373 
2374         ps_ctb_in++;
2375         ps_ctb_out++;
2376     }
2377 
2378     /* ---------- Encloop end of row updates ----------------- */
2379 
2380     /* at the end of row processing cu pixel counter is set to */
2381     /* (num ctb * ctbzise) + ctb size                          */
2382     /* this is to set the dependency for right most cu of last */
2383     /* ctb's top right data dependency                         */
2384     /* this even takes care of entropy dependency for          */
2385     /* incomplete ctb as well                                  */
2386     ihevce_dmgr_set_row_row_sync(
2387         pv_dep_mngr_enc_loop_cu_top_right,
2388         (ctb_ctr * ctb_size + ctb_size),
2389         vert_ctr,
2390         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2391 
2392     ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2393 
2394     /* Restore structure.
2395     Getting the address of stored-BS and Qp-map and other info */
2396     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2397     {
2398         /* Update the pointers to the tile start */
2399         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2400             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
2401         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2402             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
2403         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2404     }
2405 
2406 #if PROFILE_ENC_REG_DATA
2407     s_profile.u8_enc_reg_data[vert_ctr] = 0;
2408 #endif
2409 
2410     /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2411     if(!ps_ctxt->u1_is_input_data_hbd)
2412     {
2413         WORD32 last_col_pic, last_col_tile;
2414 
2415         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2416         {
2417             /* store the ctb level prms in cu prms */
2418             s_cu_prms.i4_ctb_pos = ctb_ctr;
2419             s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2420             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2421 
2422             s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2423             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2424             s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2425 
2426             s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2427 
2428             s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2429 
2430             /* If last ctb in the horizontal row */
2431             if(ctb_ctr == (num_ctbs_horz_pic - 1))
2432             {
2433                 last_col_pic = 1;
2434             }
2435             else
2436             {
2437                 last_col_pic = 0;
2438             }
2439 
2440             /* If last ctb in the tile row */
2441             if(ctb_ctr == (ctb_end - 1))
2442             {
2443                 last_col_tile = 1;
2444             }
2445             else
2446             {
2447                 last_col_tile = 0;
2448             }
2449 
2450             if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2451             {
2452                 /* for last ctb of a row check top instead of top right */
2453                 if(((ctb_ctr + 1) == ctb_end) && (vert_ctr > 0))
2454                 {
2455                     dblk_offset = 1;
2456                 }
2457                 /* Wait till top neighbour CTB has done it's deblocking*/
2458                 ihevce_dmgr_chk_row_row_sync(
2459                     pv_dep_mngr_enc_loop_dblk,
2460                     ctb_ctr,
2461                     dblk_offset,
2462                     dblk_check_dep_pos,
2463                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2464                     ps_ctxt->thrd_id);
2465 
2466                 if((0 == ps_ctxt->i4_deblock_type))
2467                 {
2468                     /* Populate Qp-map */
2469                     if(ctb_start == ctb_ctr)
2470                     {
2471                         ihevce_deblk_populate_qp_map(
2472                             ps_ctxt,
2473                             &s_deblk_ctb_row_params,
2474                             ps_ctb_out_dblk,
2475                             vert_ctr,
2476                             ps_frm_ctb_prms,
2477                             ps_tile_params);
2478                     }
2479                     ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2480 
2481                     /* recon pointers and stride */
2482                     ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2483                     ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2484                     ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2485                     ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2486 
2487                     ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2488                     {
2489                         ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2490                             (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2491                     }
2492                     ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2493                     //or according to slice boundary. Support yet to be added !!!!
2494 
2495                     ihevce_deblk_ctb(
2496                         &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2497 
2498                     //Increment for storing next CTB info
2499                     s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2500                         (ctb_size >> 3);  //one vertical edge per 8x8 block
2501                     s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2502                         (ctb_size >> 3);  //one horizontal edge per 8x8 block
2503                     s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2504                         (ctb_size >> 2);  //one qp per 4x4 block.
2505                 }
2506             }  // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2507 
2508             /* update the number of ctbs deblocked for this row */
2509             ihevce_dmgr_set_row_row_sync(
2510                 pv_dep_mngr_enc_loop_dblk,
2511                 (ctb_ctr + 1),
2512                 vert_ctr,
2513                 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2514 
2515         }  //end of loop over CTBs in current CTB-row
2516 
2517         /* Apply SAO over the previous CTB-row */
2518         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2519         {
2520             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2521                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2522             {
2523                 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2524 
2525                 if(vert_ctr > ps_tile_params->i4_first_ctb_y)
2526                 {
2527                     /*For last ctb check top dep only*/
2528                     if((vert_ctr > 1) && ((ctb_ctr + 1) == ctb_end))
2529                     {
2530                         sao_offset = 1;
2531                     }
2532 
2533                     ihevce_dmgr_chk_row_row_sync(
2534                         pv_dep_mngr_enc_loop_sao,
2535                         ctb_ctr,
2536                         sao_offset,
2537                         sao_check_dep_pos,
2538                         ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2539                         ps_ctxt->thrd_id);
2540 
2541                     /* Call the sao function to do sao for the current ctb*/
2542 
2543                     /* Register the curr ctb's x pos in sao context*/
2544                     ps_sao_ctxt->i4_ctb_x = ctb_ctr;
2545 
2546                     /* Register the curr ctb's y pos in sao context*/
2547                     ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2548 
2549                     ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2550                                      (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2551                     ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2552                     ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2553                     ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2554 
2555                     ps_sao_ctxt->i4_is_last_ctb_row = 0;
2556                     ps_sao_ctxt->i4_is_last_ctb_col = 0;
2557 
2558                     if((ctb_ctr + 1) == ctb_end)
2559                     {
2560                         ps_sao_ctxt->i4_is_last_ctb_col = 1;
2561                         ps_sao_ctxt->i4_sao_blk_wd =
2562                             ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2563                                         ps_tile_params->i4_curr_tile_width);
2564                     }
2565 
2566                     /* Calculate the recon buf pointer and stride for teh current ctb */
2567                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2568                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2569                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2570                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2571 
2572                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2573 
2574                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2575                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2576                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2577                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2578                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2579 
2580                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2581                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2582 
2583                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2584                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2585                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2586                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2587 
2588                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2589 
2590                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2591                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2592                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2593                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2594                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2595 
2596                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2597 
2598                     /* Calculate the pointer to buff to store the (x,y)th sao
2599                     * for the top merge of (x,y+1)th ctb
2600                     */
2601                     ps_sao_ctxt->ps_top_ctb_sao =
2602                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2603                                                          [ps_sao_ctxt->i4_ctb_x +
2604                                                           (ps_sao_ctxt->i4_ctb_y) *
2605                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2606                                                           (ps_ctxt->i4_bitrate_instance_num *
2607                                                            ps_sao_ctxt->i4_num_ctb_units)];
2608 
2609                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2610                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2611                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2612                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2613                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2614                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2615                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2616 
2617                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2618                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2619                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2620                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2621                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2622                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2623                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2624 
2625                     {
2626                         UWORD32 u4_ctb_sao_bits;
2627 
2628                         ihevce_sao_analyse(
2629                             &ps_ctxt->s_sao_ctxt_t,
2630                             ps_ctb_out_sao,
2631                             &u4_ctb_sao_bits,
2632                             ps_tile_params);
2633                         ps_ctxt
2634                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2635                                                      [ps_ctxt->i4_bitrate_instance_num]
2636                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2637                         ps_ctxt
2638                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2639                                                      [ps_ctxt->i4_bitrate_instance_num]
2640                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2641                     }
2642                     /** Subpel generation not done for non-ref picture **/
2643                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2644                     {
2645                         /* Recon Padding */
2646                         ihevce_recon_padding(
2647                             ps_pad_interp_recon,
2648                             ctb_ctr,
2649                             vert_ctr - 1,
2650                             ps_frm_ctb_prms,
2651                             ps_ctxt->ps_func_selector);
2652                     }
2653                     /* update the number of SAO ctbs for this row */
2654                     ihevce_dmgr_set_row_row_sync(
2655                         pv_dep_mngr_enc_loop_sao,
2656                         ctb_ctr + 1,
2657                         vert_ctr - 1,
2658                         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2659                 }
2660             }
2661             else  //SAO Disabled
2662             {
2663                 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2664                 {
2665                     /* Recon Padding */
2666                     ihevce_recon_padding(
2667                         ps_pad_interp_recon,
2668                         ctb_ctr,
2669                         vert_ctr,
2670                         ps_frm_ctb_prms,
2671                         ps_ctxt->ps_func_selector);
2672                 }
2673             }
2674         }  // end of SAO for loop
2675 
2676         /* Call the sao function again for the last ctb row of frame */
2677         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2678            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2679         {
2680             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2681 
2682             if(vert_ctr ==
2683                (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2684             {
2685                 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2686                 {
2687                     /* Register the curr ctb's x pos in sao context*/
2688                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2689 
2690                     /* Register the curr ctb's y pos in sao context*/
2691                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2692 
2693                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2694                                      vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2695 
2696                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2697 
2698                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2699                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2700 
2701                     if((ctb_ctr + 1) == ctb_end)
2702                     {
2703                         ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2704                         ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2705                             ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2706                                         ps_tile_params->i4_curr_tile_width);
2707                     }
2708 
2709                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2710                         ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2711                                     ps_tile_params->i4_curr_tile_height);
2712 
2713                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2714 
2715                     /* Calculate the recon buf pointer and stride for teh current ctb */
2716                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2717                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2718                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2719                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2720 
2721                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2722 
2723                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2724                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2725                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2726                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2727                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2728 
2729                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2730                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2731 
2732                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2733                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2734                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2735                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2736 
2737                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2738 
2739                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2740                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2741                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2742                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2743                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2744 
2745                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2746 
2747                     /* Calculate the pointer to buff to store the (x,y)th sao
2748                     * for the top merge of (x,y+1)th ctb
2749                     */
2750                     ps_sao_ctxt->ps_top_ctb_sao =
2751                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2752                                                          [ps_sao_ctxt->i4_ctb_x +
2753                                                           (ps_sao_ctxt->i4_ctb_y) *
2754                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2755                                                           (ps_ctxt->i4_bitrate_instance_num *
2756                                                            ps_sao_ctxt->i4_num_ctb_units)];
2757 
2758                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2759                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2760                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2761                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2762                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2763                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2764                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2765 
2766                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2767                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2768                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2769                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2770                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2771                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2772                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2773 
2774                     {
2775                         UWORD32 u4_ctb_sao_bits;
2776                         ihevce_sao_analyse(
2777                             &ps_ctxt->s_sao_ctxt_t,
2778                             ps_ctb_out_sao,
2779                             &u4_ctb_sao_bits,
2780                             ps_tile_params);
2781                         ps_ctxt
2782                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2783                                                      [ps_ctxt->i4_bitrate_instance_num]
2784                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2785                         ps_ctxt
2786                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2787                                                      [ps_ctxt->i4_bitrate_instance_num]
2788                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2789                     }
2790                     /** Subpel generation not done for non-ref picture **/
2791                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2792                     {
2793                         /* Recon Padding */
2794                         ihevce_recon_padding(
2795                             ps_pad_interp_recon,
2796                             ctb_ctr,
2797                             vert_ctr,
2798                             ps_frm_ctb_prms,
2799                             ps_ctxt->ps_func_selector);
2800                     }
2801                 }
2802             }  //end of loop over CTBs in current CTB-row
2803         }
2804 
2805         /* Subpel Plane Generation*/
2806         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2807         {
2808             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2809                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2810             {
2811                 if(0 != vert_ctr)
2812                 {
2813                     /** Subpel generation not done for non-ref picture **/
2814                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2815                     {
2816                         /* Padding and Subpel Plane Generation */
2817                         ihevce_pad_interp_recon_ctb(
2818                             ps_pad_interp_recon,
2819                             ctb_ctr,
2820                             vert_ctr - 1,
2821                             ps_ctxt->i4_quality_preset,
2822                             ps_frm_ctb_prms,
2823                             ps_ctxt->ai2_scratch,
2824                             ps_ctxt->i4_bitrate_instance_num,
2825                             ps_ctxt->ps_func_selector);
2826                     }
2827                 }
2828             }
2829             else
2830             {  // SAO Disabled
2831                 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2832                 {
2833                     /* Padding and Subpel Plane Generation */
2834                     ihevce_pad_interp_recon_ctb(
2835                         ps_pad_interp_recon,
2836                         ctb_ctr,
2837                         vert_ctr,
2838                         ps_ctxt->i4_quality_preset,
2839                         ps_frm_ctb_prms,
2840                         ps_ctxt->ai2_scratch,
2841                         ps_ctxt->i4_bitrate_instance_num,
2842                         ps_ctxt->ps_func_selector);
2843                 }
2844             }
2845         }
2846 
2847         {
2848             if(!ps_ctxt->i4_bitrate_instance_num)
2849             {
2850                 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2851                    ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2852                 {
2853                     /* If SAO is on, then signal completion of previous CTB row */
2854                     if(0 != vert_ctr)
2855                     {
2856                         {
2857                             WORD32 post_ctb_ctr;
2858 
2859                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2860                             {
2861                                 ihevce_dmgr_map_set_sync(
2862                                     pv_dep_mngr_me_dep_encloop,
2863                                     post_ctb_ctr,
2864                                     (vert_ctr - 1),
2865                                     MAP_CTB_COMPLETE);
2866                             }
2867                         }
2868                     }
2869                 }
2870                 else
2871                 {
2872                     {
2873                         WORD32 post_ctb_ctr;
2874 
2875                         for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2876                         {
2877                             ihevce_dmgr_map_set_sync(
2878                                 pv_dep_mngr_me_dep_encloop,
2879                                 post_ctb_ctr,
2880                                 vert_ctr,
2881                                 MAP_CTB_COMPLETE);
2882                         }
2883                     }
2884                 }
2885             }
2886         }
2887 
2888         /*process last ctb row*/
2889         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2890            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2891         {
2892             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2893 
2894             if(vert_ctr ==
2895                (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2896             {
2897                 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2898                 {
2899                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2900                     {
2901                         /* Padding and Subpel Plane Generation */
2902                         ihevce_pad_interp_recon_ctb(
2903                             ps_pad_interp_recon,
2904                             ctb_ctr,
2905                             vert_ctr,
2906                             ps_ctxt->i4_quality_preset,
2907                             ps_frm_ctb_prms,
2908                             ps_ctxt->ai2_scratch,
2909                             ps_ctxt->i4_bitrate_instance_num,
2910                             ps_ctxt->ps_func_selector);
2911                     }
2912                 }
2913             }
2914             /* If SAO is on, then signal completion of the last CTB row of frame */
2915             {
2916                 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
2917                 {
2918                     if(!ps_ctxt->i4_bitrate_instance_num)
2919                     {
2920                         {
2921                             WORD32 post_ctb_ctr;
2922 
2923                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2924                             {
2925                                 ihevce_dmgr_map_set_sync(
2926                                     pv_dep_mngr_me_dep_encloop,
2927                                     post_ctb_ctr,
2928                                     vert_ctr,
2929                                     MAP_CTB_COMPLETE);
2930                             }
2931                         }
2932                     }
2933                 }
2934             }
2935         }
2936     }
2937 
2938     return;
2939 }
2940 
2941 /*!
2942 ******************************************************************************
2943 * \if Function name : ihevce_enc_loop_pass \endif
2944 *
2945 * \brief
2946 *    Frame level enc_loop pass function
2947 *
2948 * \param[in] pv_ctxt : pointer to enc_loop module
2949 * \param[in] ps_frm_lamda : Frame level Lambda params
2950 * \param[in] ps_inp  : pointer to input yuv buffer (frame buffer)
2951 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
2952 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
2953 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
2954 * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
2955 * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
2956 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
2957 *
2958 * \return
2959 *    None
2960 *
2961 * Note : Currently the frame level calcualtions done assumes that
2962 *        framewidth of the input /recon are excat multiple of ctbsize
2963 *
2964 * \author
2965 *  Ittiam
2966 *
2967 *****************************************************************************
2968 */
ihevce_enc_loop_process(void * pv_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,ctb_analyse_t * ps_ctb_in,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse,recon_pic_buf_t * ps_frm_recon,cur_ctb_cu_tree_t * ps_cu_tree_out,ctb_enc_loop_out_t * ps_ctb_out,cu_enc_loop_out_t * ps_cu_out,tu_enc_loop_out_t * ps_tu_out,pu_t * ps_pu_out,UWORD8 * pu1_frm_ecd_data,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_pass)2969 void ihevce_enc_loop_process(
2970     void *pv_ctxt,
2971     ihevce_lap_enc_buf_t *ps_curr_inp,
2972     ctb_analyse_t *ps_ctb_in,
2973     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
2974     recon_pic_buf_t *ps_frm_recon,
2975     cur_ctb_cu_tree_t *ps_cu_tree_out,
2976     ctb_enc_loop_out_t *ps_ctb_out,
2977     cu_enc_loop_out_t *ps_cu_out,
2978     tu_enc_loop_out_t *ps_tu_out,
2979     pu_t *ps_pu_out,
2980     UWORD8 *pu1_frm_ecd_data,
2981     frm_ctb_ctxt_t *ps_frm_ctb_prms,
2982     frm_lambda_ctxt_t *ps_frm_lamda,
2983     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
2984     WORD32 thrd_id,
2985     WORD32 i4_enc_frm_id,
2986     WORD32 i4_pass)
2987 {
2988     WORD32 vert_ctr;
2989     WORD32 tile_col_idx;
2990     iv_enc_yuv_buf_t s_curr_src_bufs;
2991     iv_enc_yuv_buf_t s_curr_recon_bufs;
2992     iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
2993     UWORD32 *pu4_pu_offsets;
2994     WORD32 end_of_frame;
2995     UWORD8 *apu1_y_sub_pel_planes[3];
2996     pad_interp_recon_frm_t s_pad_interp_recon;
2997     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
2998 
2999     ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3000 
3001     WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3002 
3003     /* initialize the closed loop lambda for the current frame */
3004     ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3005     ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3006     ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3007     ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3008     ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3009     ps_ctxt->thrd_id = thrd_id;
3010     ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3011 
3012 #if DISABLE_SAO_WHEN_NOISY
3013     ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3014     ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3015 #endif
3016 
3017 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3018     ps_ctxt->pv_err_func_selector = ps_func_selector;
3019 #endif
3020 
3021     /*Bit0 -  of this Flag indicates whether current pictute needs to be deblocked,
3022     padded and hpel planes need to be generated.
3023     Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled*/
3024     ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3025         (ps_frm_recon->i4_deblk_pad_hpel_cur_pic) ||
3026         ((ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3027           ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
3028          << 1);
3029 
3030     /* Share all reference pictures with nbr clients. This flag will be used only
3031     in case of dist-enc mode */
3032     ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3033     ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3034 
3035     /* Register the frame level ssd lamda for both luma and chroma*/
3036     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3037     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3038 
3039     ihevce_populate_cl_cu_lambda_prms(
3040         ps_ctxt,
3041         ps_frm_lamda,
3042         (WORD32)ps_ctxt->i1_slice_type,
3043         ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3044         ENC_LOOP_LAMBDA_TYPE);
3045 
3046     ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3047                                      (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3048                                      (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3049 
3050     end_of_frame = 0;
3051 
3052     /* ----------------------------------------------------- */
3053     /* store the stride and dimensions of source and recon   */
3054     /* buffer pointers will be over written at every CTB row */
3055     /* ----------------------------------------------------- */
3056     memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3057 
3058     memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3059 
3060     memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3061 
3062     /* get the frame level pu offset pointer*/
3063     pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3064 
3065     s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3066 
3067     /* ------------ Loop over all the CTB rows --------------- */
3068     while(0 == end_of_frame)
3069     {
3070         UWORD8 *pu1_tmp;
3071         UWORD8 *pu1_row_pu_map;
3072         UWORD8 *pu1_row_ecd_data;
3073         ctb_analyse_t *ps_ctb_row_in;
3074         ctb_enc_loop_out_t *ps_ctb_row_out;
3075         cu_enc_loop_out_t *ps_row_cu;
3076         tu_enc_loop_out_t *ps_row_tu;
3077         pu_t *ps_row_pu;
3078         pu_col_mv_t *ps_row_col_pu;
3079         job_queue_t *ps_job;
3080         UWORD32 *pu4_pu_row_offsets;
3081         UWORD16 *pu2_num_pu_row;
3082 
3083         ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3084         cur_ctb_cu_tree_t *ps_row_cu_tree;
3085         UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3086 
3087         /* Get the current row from the job queue */
3088         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3089             ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3090 
3091         /* Register the pointer to ctb out of the current frame*/
3092         ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3093 
3094         /* If all rows are done, set the end of process flag to 1, */
3095         /* and the current row to -1 */
3096         if(NULL == ps_job)
3097         {
3098             vert_ctr = -1;
3099             tile_col_idx = -1;
3100             end_of_frame = 1;
3101         }
3102         else
3103         {
3104             ihevce_tile_params_t *ps_col_tile_params_temp;
3105             ihevce_tile_params_t *ps_tile_params;
3106             WORD32 i4_tile_id;
3107 
3108             ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3109             /* set the output dependency */
3110             ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3111 
3112             /* Obtain the current row's details from the job */
3113             vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3114             {
3115                 /* Obtain the current colum tile index from the job */
3116                 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3117 
3118                 /* The tile parameter for the col. idx. Use only the properties
3119                 which is same for all the bottom tiles like width, start_x, etc.
3120                 Don't use height, start_y, etc.                                  */
3121                 ps_col_tile_params_temp =
3122                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3123 
3124                 /* Derive actual tile_id based on vert_ctr */
3125                 i4_tile_id =
3126                     *(ps_frm_ctb_prms->pi4_tile_id_map +
3127                       vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3128                       ps_col_tile_params_temp->i4_first_ctb_x);
3129                 /* Derive pointer to current tile prms */
3130                 ps_tile_params =
3131                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3132             }
3133 
3134             ps_ctxt->i4_tile_col_idx = tile_col_idx;
3135             /* derive the current ctb row pointers */
3136 
3137             /* luma src */
3138             pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3139                       (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3140                        ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3141                       ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3142 
3143             pu1_tmp +=
3144                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3145                  ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3146 
3147             s_curr_src_bufs.pv_y_buf = pu1_tmp;
3148 
3149             if(!ps_ctxt->u1_is_input_data_hbd)
3150             {
3151                 /* cb src */
3152                 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3153                 pu1_tmp +=
3154                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3155                      ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3156 
3157                 s_curr_src_bufs.pv_u_buf = pu1_tmp;
3158             }
3159 
3160             /* luma recon */
3161             pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3162             pu1_tmp +=
3163                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3164 
3165             s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3166             s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3167             s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3168             if(!ps_ctxt->u1_is_input_data_hbd)
3169             {
3170                 /* cb recon */
3171                 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3172                 pu1_tmp +=
3173                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3174                      ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3175 
3176                 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3177                 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3178                 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3179 
3180                 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3181 
3182                 /* Register the source buffer pointers in sao context*/
3183                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3184                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3185                     (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3186                      ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3187                     ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3188 
3189                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3190                     ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3191 
3192                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3193                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3194 
3195                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3196                     ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3197             }
3198 
3199             /* Subpel planes hxfy, fxhy, hxhy*/
3200             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3201             pu1_tmp +=
3202                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3203             apu1_y_sub_pel_planes[0] = pu1_tmp;
3204             s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3205 
3206             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3207             pu1_tmp +=
3208                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3209             apu1_y_sub_pel_planes[1] = pu1_tmp;
3210             s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3211 
3212             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3213             pu1_tmp +=
3214                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3215             apu1_y_sub_pel_planes[2] = pu1_tmp;
3216             s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3217 
3218             /* row level coeffs buffer */
3219             pu1_row_ecd_data =
3220                 pu1_frm_ecd_data +
3221                 (vert_ctr *
3222                  ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3223                                     : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3224                  MAX_SCAN_COEFFS_BYTES_4x4);
3225 
3226             /* Row level CU buffer */
3227             ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3228 
3229             /* Row level TU buffer */
3230             ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3231 
3232             /* Row level PU buffer */
3233             ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3234 
3235             /* Row level colocated PU buffer */
3236             /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3237             ps_row_col_pu =
3238                 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3239                                                ps_frm_ctb_prms->i4_num_pus_in_ctb);
3240             /* Row level col PU map buffer */
3241             /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3242             pu1_row_pu_map =
3243                 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3244                                                 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3245             /* row ctb in pointer  */
3246             ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3247 
3248             /* row ctb out pointer  */
3249             ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3250 
3251             /* row number of PUs map pointer */
3252             pu2_num_pu_row =
3253                 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3254 
3255             /* row pu offsets pointer  */
3256             pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3257             /* store the first CTB pu offset pointer */
3258             *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3259             /* Initialize ptr to current IPE row */
3260             ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3261 
3262             /* Initialize ptr to current row */
3263             ps_row_cu_tree = ps_cu_tree_out +
3264                              (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3265 
3266             /* Get the EncLoop Top-Right CU Dep Mngr */
3267             ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3268                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3269                                                                    [i4_bitrate_instance_num];
3270             /* Get the EncLoop Deblock Dep Mngr */
3271             ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3272                 ps_master_ctxt
3273                     ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3274             /* Get the EncLoop Sao Dep Mngr */
3275             ps_ctxt->pv_dep_mngr_enc_loop_sao =
3276                 ps_master_ctxt
3277                     ->aapv_dep_mngr_enc_loop_sao[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3278 
3279             ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3280 
3281             {
3282                 /* derive the pointers of top row buffers */
3283                 ps_ctxt->pv_top_row_luma =
3284                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3285                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3286                     (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3287 
3288                 ps_ctxt->pv_top_row_chroma =
3289                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3290                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3291                     (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3292 
3293                 /* derive the pointers of bottom row buffers to update current row data */
3294                 ps_ctxt->pv_bot_row_luma =
3295                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3296                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3297                     (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3298 
3299                 ps_ctxt->pv_bot_row_chroma =
3300                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3301                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3302                     (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3303 
3304                 /* Register the buffer pointers in sao context*/
3305                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3306                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3307                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3308                     ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3309 
3310                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3311                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3312                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3313                     ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3314 
3315                 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3316 
3317                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3318                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3319 
3320                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3321                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3322             }
3323 
3324             ps_ctxt->ps_top_row_nbr =
3325                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3326                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3327                 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3328 
3329             ps_ctxt->ps_bot_row_nbr =
3330                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3331                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3332                 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3333 
3334             if(vert_ctr > 0)
3335             {
3336                 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3337             }
3338             else
3339             {
3340                 ps_ctxt->pu1_top_rt_cabac_state = NULL;
3341             }
3342 
3343             ASSERT(
3344                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3345                     .ps_pps->i1_sign_data_hiding_flag ==
3346                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3347                     .ps_pps->i1_sign_data_hiding_flag);
3348 
3349             /* call the row level processing function */
3350             ihevce_enc_loop_process_row(
3351                 ps_ctxt,
3352                 &s_curr_src_bufs,
3353                 &s_curr_recon_bufs,
3354                 &s_curr_recon_bufs_src,
3355                 &apu1_y_sub_pel_planes[0],
3356                 ps_ctb_row_in,
3357                 ps_ctb_row_out,
3358                 ps_row_ipe_analyse,
3359                 ps_row_cu_tree,
3360                 ps_row_cu,
3361                 ps_row_tu,
3362                 ps_row_pu,
3363                 ps_row_col_pu,
3364                 pu2_num_pu_row,
3365                 pu1_row_pu_map,
3366                 pu1_row_ecd_data,
3367                 pu4_pu_row_offsets,
3368                 ps_frm_ctb_prms,
3369                 vert_ctr,
3370                 ps_frm_recon,
3371                 ps_ctxt->pv_dep_mngr_encloop_dep_me,
3372                 &s_pad_interp_recon,
3373                 i4_pass,
3374                 ps_multi_thrd_ctxt,
3375                 ps_tile_params);
3376         }
3377     }
3378 }
3379 
3380 /*!
3381 ******************************************************************************
3382 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3383 *
3384 * \brief Returns to the caller key attributes relevant for dependency manager,
3385 *        ie, the number of vertical units in l0 layer
3386 *
3387 * \par Description:
3388 *
3389 * \param[in] pai4_ht    : ht
3390 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3391 *                                         for deblocking
3392 *
3393 * \return
3394 *    None
3395 *
3396 * \author
3397 *  Ittiam
3398 *
3399 *****************************************************************************
3400 */
ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht,WORD32 * pi4_num_vert_units_in_lyr)3401 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3402 {
3403     /* Blk ht at a given layer*/
3404     WORD32 unit_ht_c;
3405     WORD32 ctb_size = 64;
3406 
3407     /* compute blk ht and unit ht */
3408     unit_ht_c = ctb_size;
3409 
3410     /* set the numebr of vertical units */
3411     *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3412 }
3413 
3414 /*!
3415 ******************************************************************************
3416 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3417 *
3418 * \brief
3419 *    Number of memory records are returned for enc_loop module
3420 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3421 *
3422 * \return
3423 *    None
3424 *
3425 * \author
3426 *  Ittiam
3427 *
3428 *****************************************************************************
3429 */
3430 WORD32
ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel)3431     ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3432 {
3433     WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3434     WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3435         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3436     WORD32 enc_loop_sao_dep_mngr_mem_recs =
3437         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3438     WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3439         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3440     WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3441         i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3442 
3443     return (
3444         (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + enc_loop_sao_dep_mngr_mem_recs +
3445          enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3446 }
3447 /*!
3448 ******************************************************************************
3449 * \if Function name : ihevce_enc_loop_get_mem_recs \endif
3450 *
3451 * \brief
3452 *    Memory requirements are returned for ENC_LOOP.
3453 *
3454 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
3455 * \param[in] ps_init_prms : Create time static parameters
3456 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
3457 * \param[in] i4_mem_space : memspace in whihc memory request should be done
3458 *
3459 * \return
3460 *    None
3461 *
3462 * \author
3463 *  Ittiam
3464 *
3465 *****************************************************************************
3466 */
ihevce_enc_loop_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel,WORD32 i4_mem_space,WORD32 i4_resolution_id)3467 WORD32 ihevce_enc_loop_get_mem_recs(
3468     iv_mem_rec_t *ps_mem_tab,
3469     ihevce_static_cfg_params_t *ps_init_prms,
3470     WORD32 i4_num_proc_thrds,
3471     WORD32 i4_num_bitrate_inst,
3472     WORD32 i4_num_enc_loop_frm_pllel,
3473     WORD32 i4_mem_space,
3474     WORD32 i4_resolution_id)
3475 {
3476     UWORD32 u4_width, u4_height, n_tabs;
3477     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3478     WORD32 ctr;
3479     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3480 
3481     /* derive frame dimensions */
3482     /*width of the input YUV to be encoded */
3483     u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3484     /*making the width a multiple of CTB size*/
3485     u4_width += SET_CTB_ALIGN(
3486         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3487 
3488     /*height of the input YUV to be encoded */
3489     u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3490     /*making the height a multiple of CTB size*/
3491     u4_height += SET_CTB_ALIGN(
3492         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3493     u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3494     u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3495     /* memories should be requested assuming worst case requirememnts */
3496 
3497     /* Module context structure */
3498     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3499 
3500     ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3501 
3502     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3503 
3504     /* Thread context structure */
3505     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3506         i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3507 
3508     ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3509 
3510     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3511 
3512     /* Scale matrices */
3513     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3514 
3515     ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3516 
3517     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3518 
3519     /* Rescale matrices */
3520     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3521 
3522     ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3523 
3524     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3525 
3526     /* top row luma one row of pixel data per CTB row */
3527     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3528     {
3529         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3530                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3531                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3532     }
3533     else
3534     {
3535         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3536                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3537                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3538     }
3539 
3540     ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3541 
3542     ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3543 
3544     /* top row chroma */
3545     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3546     {
3547         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3548             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3549             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3550     }
3551     else
3552     {
3553         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3554             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3555             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3556     }
3557 
3558     ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3559 
3560     ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3561 
3562     /* top row neighbour 4x4 */
3563     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3564         (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3565         i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3566 
3567     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3568 
3569     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3570 
3571     /* memory to dump rate control parameters by each thread for each bit-rate instance */
3572     /* RC params collated by each thread for each bit-rate instance separately */
3573     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3574                                                  i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3575 
3576     ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3577 
3578     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3579     /* Memory required for deblocking */
3580     {
3581         /* Memory to store Qp of top4x4 blocks for each CTB row.
3582         This memory is allocated at frame level and shared across
3583         all cores. The Qp values are needed to form Qp-map(described
3584         in the ENC_LOOP_DEBLOCKING section below)*/
3585 
3586         UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3587         UWORD32 u4_size_top_4x4_qp_memory;
3588 
3589         /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3590         /*Space required per CTB*/
3591         u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3592         /*Space required for entire CTB row*/
3593         u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3594         /*Space required for entire frame*/
3595         u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3596         /*Space required for multiple bitrate*/
3597         u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3598         /*Space required for multiple frames in parallel*/
3599         u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3600 
3601         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3602         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3603         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3604 
3605         /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3606         ## Boundary Strength(Vertical):
3607         BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3608         of the row followed by 8 entries of second CTB and so on.
3609         8 entries: Includes left edge of current CTB and excludes right edge.
3610         ## Boundary Strength(Horizontal):
3611         Same as Vertical.
3612         8 entries:  Includes top edge of current CTB and excludes bottom edge.
3613 
3614         ## Qp-map storage:
3615         T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3616         00 01 02 03 04 05 ..........to the end of the CTB row
3617         10 11 12 13 14 15 ..........to the end of the CTB row
3618         20 21 22 23 24 25 ..........to the end of the CTB row
3619         30 31 32 33 34 35 ..........to the end of the CTB row
3620         40 41 42 43 44 45 ..........to the end of the CTB row
3621         ............................to the end of the CTB row
3622         upto height_of_CTB..........to the end of the CTB row
3623 
3624         Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3625         A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3626         where,
3627         => height_of_CTB = number of 4x4 blocks in a CTB  vertically,
3628         => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3629         in order to deblock top edge of current CTB.
3630         => width_of_CTB  = number of 4x4 blocks in a CTB  horizontally,
3631         */
3632 
3633         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3634         /*1 vertical edge per 8 pixel*/
3635         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3636         /*Vertical edges for entire width of CTB row*/
3637         u4_size_bs_memory *= u4_ctb_in_a_row;
3638         /*Each vertical edge of CTB row is 4 bytes*/
3639         u4_size_bs_memory = u4_size_bs_memory << 2;
3640         /*Adding Memory required for storing horizontal BS by doubling*/
3641         u4_size_bs_memory = u4_size_bs_memory << 1;
3642 
3643         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3644         /*Number of 4x4 blocks in the width of a CTB*/
3645         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3646         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3647         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3648         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3649         /*Storage for entire CTB row*/
3650         u4_size_qp_memory *= u4_ctb_in_a_row;
3651 
3652         /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3653         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3654             i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3655 
3656         ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3657 
3658         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3659     }
3660 
3661     /* Memory required to store pred for 422 chroma */
3662     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3663         i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3664         (i4_chroma_format == IV_YUV_422SP_UV) *
3665         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3666 
3667     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3668 
3669     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3670 
3671     /* Memory for inter pred buffers */
3672     {
3673         WORD32 i4_num_bufs_per_thread = 0;
3674 
3675         WORD32 i4_buf_size_per_cand =
3676             (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3677             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3678         WORD32 i4_quality_preset =
3679             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3680         switch(i4_quality_preset)
3681         {
3682         case IHEVCE_QUALITY_P0:
3683         {
3684             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3685             break;
3686         }
3687         case IHEVCE_QUALITY_P2:
3688         {
3689             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3690             break;
3691         }
3692         case IHEVCE_QUALITY_P3:
3693         {
3694             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3695             break;
3696         }
3697         case IHEVCE_QUALITY_P4:
3698         {
3699             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3700             break;
3701         }
3702         case IHEVCE_QUALITY_P5:
3703         case IHEVCE_QUALITY_P6:
3704         case IHEVCE_QUALITY_P7:
3705         {
3706             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3707             break;
3708         }
3709         default:
3710         {
3711             ASSERT(0);
3712         }
3713         }
3714 
3715         i4_num_bufs_per_thread += 4;
3716 
3717         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3718             i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3719 
3720         ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3721 
3722         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3723     }
3724 
3725     /* Memory required to store chroma intra pred */
3726     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3727         i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3728         ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3729         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3730 
3731     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3732 
3733     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3734 
3735     /* Memory required to store pred for reference substitution output */
3736     /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3737        allocate 16 bytes to the left and 7 bytes to the right to facilitate
3738        SIMD access */
3739     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3740         i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3741         + INTRAPRED_SIMD_LEFT_PADDING)*
3742         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3743 
3744     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3745 
3746     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3747 
3748     /* Memory required to store pred for reference filtering output */
3749     /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3750        allocate 16 bytes to the left and 7 bytes to the right to facilitate
3751        SIMD access */
3752     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3753         i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3754         + INTRAPRED_SIMD_LEFT_PADDING)*
3755         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3756 
3757     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3758 
3759     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3760 
3761 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3762     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3763 #endif
3764     {
3765         /* Memory assignments for recon storage during CU Recursion */
3766         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3767             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3768             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3769 
3770         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3771 
3772         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3773 
3774         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3775             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3776             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3777             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3778 
3779         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3780 
3781         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3782     }
3783 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3784     else
3785     {
3786         /* Memory assignments for recon storage during CU Recursion */
3787         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3788 
3789         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3790 
3791         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3792 
3793         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3794 
3795         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3796 
3797         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3798     }
3799 #endif
3800 
3801 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3802     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3803 #endif
3804     {
3805         /* Memory assignments for pred storage during CU Recursion */
3806         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3807             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3808             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3809 
3810         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3811 
3812         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3813 
3814         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3815             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3816             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3817             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3818 
3819         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3820 
3821         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3822     }
3823 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3824     else
3825     {
3826         /* Memory assignments for pred storage during CU Recursion */
3827         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3828 
3829         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3830 
3831         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3832 
3833         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3834 
3835         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3836 
3837         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3838     }
3839 #endif
3840 
3841     /* Memory assignments for CTB left luma data storage */
3842     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3843         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3844         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3845 
3846     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3847 
3848     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3849 
3850     /* Memory assignments for CTB left chroma data storage */
3851     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3852         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3853         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3854     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3855         ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3856 
3857     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3858 
3859     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3860 
3861     /* Memory required for SAO */
3862     {
3863         WORD32 num_vert_units;
3864         WORD32 num_horz_units;
3865         WORD32 ctb_aligned_ht, ctb_aligned_wd;
3866         WORD32 luma_buf, chroma_buf;
3867 
3868         num_vert_units = u4_height / MAX_CTB_SIZE;
3869         num_horz_units = u4_width / MAX_CTB_SIZE;
3870 
3871         ctb_aligned_ht = u4_height;
3872         ctb_aligned_wd = u4_width;
3873 
3874         /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3875         * and 1 extra location is required for top left buf ptr for row 0
3876         * Also 1 extra byte is required for every row for top left pixel if
3877         * the top left ptr is to be passed to leaf level unconditionally
3878         */
3879         luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3880                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3881         chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3882                      ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3883 
3884         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3885             (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3886 
3887         /* Add the memory required to store the sao information of top ctb for top merge
3888         * This is frame level buffer.
3889         */
3890         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
3891             ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
3892             (i4_num_enc_loop_frm_pllel);
3893 
3894         ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3895 
3896         ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
3897     }
3898 
3899     /* Memory for CU level Coeff data buffer */
3900     {
3901         /* 16 additional bytes are required to ensure alignment */
3902         {
3903             ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
3904                 i4_num_proc_thrds *
3905                 (((MAX_LUMA_COEFFS_CTB +
3906                    (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
3907                   16) *
3908                  (2) * sizeof(UWORD8));
3909         }
3910 
3911         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3912 
3913         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
3914 
3915         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
3916             i4_num_proc_thrds *
3917             (MAX_LUMA_COEFFS_CTB +
3918              (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
3919             sizeof(UWORD8);
3920 
3921         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3922 
3923         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
3924     }
3925 
3926     /* Memory for CU dequant data buffer */
3927     {
3928         /* 16 additional bytes are required to ensure alignment */
3929         {
3930             ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
3931                 i4_num_proc_thrds *
3932                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
3933                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
3934                  8) *
3935                 (2) * sizeof(WORD16);
3936         }
3937 
3938         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3939 
3940         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
3941     }
3942 
3943     /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
3944     {
3945         WORD32 i4_memSize_perThread;
3946 
3947         WORD32 i4_chroma_memSize_perThread = 0;
3948         /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3949         /* used in RDOPT to store cur and best modes' data */
3950         WORD32 i4_luma_memSize_perThread =
3951             4 * MAX_CU_SIZE * MAX_CU_SIZE *
3952             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3953 
3954         /* 'Glossary' for comments in the following codeBlock */
3955         /* 1 - 2 Bufs for storing recons of the best modes determined in the */
3956         /* function 'ihevce_intra_chroma_pred_mode_selector' */
3957         /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3958         /* used in RDOPT to store cur and best modes' data */
3959         if(i4_chroma_format == IV_YUV_422SP_UV)
3960         {
3961             WORD32 i4_quality_preset =
3962                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3963             switch(i4_quality_preset)
3964             {
3965             case IHEVCE_QUALITY_P0:
3966             {
3967                 /* 1 */
3968                 i4_chroma_memSize_perThread +=
3969                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
3970                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3971 
3972                 /* 2 */
3973                 i4_chroma_memSize_perThread +=
3974                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
3975                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3976 
3977                 break;
3978             }
3979             case IHEVCE_QUALITY_P2:
3980             {
3981                 /* 1 */
3982                 i4_chroma_memSize_perThread +=
3983                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
3984                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3985 
3986                 /* 2 */
3987                 i4_chroma_memSize_perThread +=
3988                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
3989                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3990 
3991                 break;
3992             }
3993             case IHEVCE_QUALITY_P3:
3994             {
3995                 /* 1 */
3996                 i4_chroma_memSize_perThread +=
3997                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
3998                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3999 
4000                 /* 2 */
4001                 i4_chroma_memSize_perThread +=
4002                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4003                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4004 
4005                 break;
4006             }
4007             case IHEVCE_QUALITY_P4:
4008             {
4009                 /* 1 */
4010                 i4_chroma_memSize_perThread +=
4011                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4012                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4013 
4014                 /* 2 */
4015                 i4_chroma_memSize_perThread +=
4016                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4017                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4018 
4019                 break;
4020             }
4021             case IHEVCE_QUALITY_P5:
4022             {
4023                 /* 1 */
4024                 i4_chroma_memSize_perThread +=
4025                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4026                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4027 
4028                 /* 2 */
4029                 i4_chroma_memSize_perThread +=
4030                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4031                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4032 
4033                 break;
4034             }
4035             case IHEVCE_QUALITY_P6:
4036             case IHEVCE_QUALITY_P7:
4037             {
4038                 /* 1 */
4039                 i4_chroma_memSize_perThread +=
4040                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4041                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4042 
4043                 /* 2 */
4044                 i4_chroma_memSize_perThread +=
4045                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4046                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4047 
4048                 break;
4049             }
4050             }
4051         }
4052         else
4053         {
4054             WORD32 i4_quality_preset =
4055                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4056             switch(i4_quality_preset)
4057             {
4058             case IHEVCE_QUALITY_P0:
4059             {
4060                 /* 1 */
4061                 i4_chroma_memSize_perThread +=
4062                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4063                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4064 
4065                 /* 2 */
4066                 i4_chroma_memSize_perThread +=
4067                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4068                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4069                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4070 
4071                 break;
4072             }
4073             case IHEVCE_QUALITY_P2:
4074             {
4075                 /* 1 */
4076                 i4_chroma_memSize_perThread +=
4077                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4078                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4079 
4080                 /* 2 */
4081                 i4_chroma_memSize_perThread +=
4082                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4083                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4084                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4085 
4086                 break;
4087             }
4088             case IHEVCE_QUALITY_P3:
4089             {
4090                 /* 1 */
4091                 i4_chroma_memSize_perThread +=
4092                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4093                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4094 
4095                 /* 2 */
4096                 i4_chroma_memSize_perThread +=
4097                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4098                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4099                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4100 
4101                 break;
4102             }
4103             case IHEVCE_QUALITY_P4:
4104             {
4105                 /* 1 */
4106                 i4_chroma_memSize_perThread +=
4107                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4108                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4109 
4110                 /* 2 */
4111                 i4_chroma_memSize_perThread +=
4112                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4113                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4114                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4115 
4116                 break;
4117             }
4118             case IHEVCE_QUALITY_P5:
4119             {
4120                 /* 1 */
4121                 i4_chroma_memSize_perThread +=
4122                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4123                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4124 
4125                 /* 2 */
4126                 i4_chroma_memSize_perThread +=
4127                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4128                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4129                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4130 
4131                 break;
4132             }
4133             case IHEVCE_QUALITY_P6:
4134             case IHEVCE_QUALITY_P7:
4135             {
4136                 /* 1 */
4137                 i4_chroma_memSize_perThread +=
4138                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4139                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4140 
4141                 /* 2 */
4142                 i4_chroma_memSize_perThread +=
4143                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4144                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4145                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4146 
4147                 break;
4148             }
4149             }
4150         }
4151 
4152         i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4153 
4154         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4155             i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4156 
4157         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4158 
4159         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4160     }
4161 
4162     n_tabs = NUM_ENC_LOOP_MEM_RECS;
4163 
4164     /*************************************************************************/
4165     /* --- EncLoop Deblock and SAO sync Dep Mngr Mem requests --                     */
4166     /*************************************************************************/
4167 
4168     /* Fill the memtabs for  EncLoop Deblock Dep Mngr */
4169     {
4170         WORD32 count;
4171         WORD32 num_vert_units;
4172         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4173 
4174         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4175         ASSERT(num_vert_units > 0);
4176         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4177         {
4178             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4179             {
4180                 n_tabs += ihevce_dmgr_get_mem_recs(
4181                     &ps_mem_tab[n_tabs],
4182                     DEP_MNGR_ROW_ROW_SYNC,
4183                     num_vert_units,
4184                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4185                     i4_num_proc_thrds,
4186                     i4_mem_space);
4187             }
4188         }
4189 
4190         /* Fill the memtabs for  EncLoop SAO Dep Mngr */
4191         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4192         {
4193             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4194             {
4195                 n_tabs += ihevce_dmgr_get_mem_recs(
4196                     &ps_mem_tab[n_tabs],
4197                     DEP_MNGR_ROW_ROW_SYNC,
4198                     num_vert_units,
4199                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4200                     i4_num_proc_thrds,
4201                     i4_mem_space);
4202             }
4203         }
4204     }
4205 
4206     /*************************************************************************/
4207     /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests --                */
4208     /*************************************************************************/
4209 
4210     /* Fill the memtabs for  Top-Right CU sync Dep Mngr */
4211     {
4212         WORD32 count;
4213         WORD32 num_vert_units;
4214         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4215         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4216         ASSERT(num_vert_units > 0);
4217 
4218         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4219         {
4220             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4221             {
4222                 n_tabs += ihevce_dmgr_get_mem_recs(
4223                     &ps_mem_tab[n_tabs],
4224                     DEP_MNGR_ROW_ROW_SYNC,
4225                     num_vert_units,
4226                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4227                     i4_num_proc_thrds,
4228                     i4_mem_space);
4229             }
4230         }
4231     }
4232 
4233     /*************************************************************************/
4234     /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests --        */
4235     /*************************************************************************/
4236 
4237     /* Fill the memtabs for  EncLoop Aux. on Ref. bitrate Dep Mngr */
4238     {
4239         WORD32 count;
4240         WORD32 num_vert_units;
4241         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4242 
4243         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4244         ASSERT(num_vert_units > 0);
4245 
4246         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4247         {
4248             for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4249             {
4250                 n_tabs += ihevce_dmgr_get_mem_recs(
4251                     &ps_mem_tab[n_tabs],
4252                     DEP_MNGR_ROW_ROW_SYNC,
4253                     num_vert_units,
4254                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4255                     i4_num_proc_thrds,
4256                     i4_mem_space);
4257             }
4258         }
4259     }
4260 
4261     return (n_tabs);
4262 }
4263 
4264 /*!
4265 ******************************************************************************
4266 * \if Function name : ihevce_enc_loop_init \endif
4267 *
4268 * \brief
4269 *    Intialization for ENC_LOOP context state structure .
4270 *
4271 * \param[in] ps_mem_tab : pointer to memory descriptors table
4272 * \param[in] ps_init_prms : Create time static parameters
4273 * \param[in] pv_osal_handle : Osal handle
4274 *
4275 * \return
4276 *    None
4277 *
4278 * \author
4279 *  Ittiam
4280 *
4281 *****************************************************************************
4282 */
ihevce_enc_loop_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,func_selector_t * ps_func_selector,rc_quant_t * ps_rc_quant_ctxt,ihevce_tile_params_t * ps_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_enc_loop_frm_pllel,UWORD8 u1_is_popcnt_available)4283 void *ihevce_enc_loop_init(
4284     iv_mem_rec_t *ps_mem_tab,
4285     ihevce_static_cfg_params_t *ps_init_prms,
4286     WORD32 i4_num_proc_thrds,
4287     void *pv_osal_handle,
4288     func_selector_t *ps_func_selector,
4289     rc_quant_t *ps_rc_quant_ctxt,
4290     ihevce_tile_params_t *ps_tile_params_base,
4291     WORD32 i4_resolution_id,
4292     WORD32 i4_num_enc_loop_frm_pllel,
4293     UWORD8 u1_is_popcnt_available)
4294 {
4295     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4296     ihevce_enc_loop_ctxt_t *ps_ctxt;
4297     WORD32 ctr, n_tabs;
4298     UWORD32 u4_width, u4_height;
4299     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4300     UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4301     UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4302     WORD32 i;
4303     WORD32 i4_num_bitrate_inst =
4304         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4305     enc_loop_rc_params_t *ps_enc_loop_rc_params;
4306     UWORD8 *pu1_sao_base; /* store the base address of sao*/
4307     UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4308     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4309     WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4310     WORD32 i4_enc_frm_id;
4311     WORD32 num_cu_in_ctb;
4312     WORD32 i4_num_tile_cols = 1;  //Default value is 1
4313 
4314     /* ENC_LOOP state structure */
4315     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4316 
4317     ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4318 
4319     ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4320     ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4321     ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4322     /*Calculation of memory sizes for deblocking*/
4323     {
4324         /*width of the input YUV to be encoded. */
4325         u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4326         /*making the width a multiple of CTB size*/
4327         u4_width += SET_CTB_ALIGN(
4328             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4329 
4330         u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4331 
4332         /*height of the input YUV to be encoded */
4333         u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4334         /*making the height a multiple of CTB size*/
4335         u4_height += SET_CTB_ALIGN(
4336             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4337 
4338         u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4339 
4340         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4341         /*1 vertical edge per 8 pixel*/
4342         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4343         /*Vertical edges for entire width of CTB row*/
4344         u4_size_bs_memory *= u4_ctb_in_a_row;
4345         /*Each vertical edge of CTB row is 4 bytes*/
4346         u4_size_bs_memory = u4_size_bs_memory << 2;
4347         /*Adding Memory required for storing horizontal BS by doubling*/
4348         u4_size_bs_memory = u4_size_bs_memory << 1;
4349 
4350         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4351         /*Number of 4x4 blocks in the width of a CTB*/
4352         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4353         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4354         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4355         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4356         /*Storage for entire CTB row*/
4357         u4_size_qp_memory *= u4_ctb_in_a_row;
4358 
4359         pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4360     }
4361 
4362     /*Derive the base pointer of sao*/
4363     pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4364     ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4365     u4_ctb_aligned_wd = u4_width;
4366     u4_ctb_aligned_ht = u4_height;
4367     num_vert_units = (u4_height) / ctb_size;
4368 
4369     for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4370     {
4371         ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4372         /* Store Tile params base into EncLoop context */
4373         ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4374         ihevce_cmn_utils_instr_set_router(
4375             &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4376         ihevce_sifter_sad_fxn_assigner(
4377             (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4378         ps_ctxt->i4_max_search_range_horizontal =
4379             ps_init_prms->s_config_prms.i4_max_search_range_horz;
4380         ps_ctxt->i4_max_search_range_vertical =
4381             ps_init_prms->s_config_prms.i4_max_search_range_vert;
4382 
4383         ps_ctxt->i4_quality_preset =
4384             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4385 
4386         if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4387         {
4388             ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4389         }
4390 
4391         ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4392 
4393         ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4394 
4395         ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4396 
4397         ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4398 
4399         ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4400 
4401         ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4402 
4403         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4404         {
4405             ps_ctxt->i4_use_ctb_level_lamda = 0;
4406         }
4407         else
4408         {
4409             ps_ctxt->i4_use_ctb_level_lamda = 0;
4410         }
4411 
4412         /** Register the function selector pointer*/
4413         ps_ctxt->ps_func_selector = ps_func_selector;
4414 
4415         ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4416 
4417         /* Initiallization for non-distributed mode */
4418         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4419         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4420         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4421         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4422 
4423         ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4424         ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4425 
4426         ps_ctxt->i4_frm_top_row_luma_size =
4427             ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4428 
4429         ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4430 
4431         ps_ctxt->i4_frm_top_row_chroma_size =
4432             ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4433 
4434         {
4435             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4436             {
4437                 /* +1 is to provision top left pel */
4438                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4439                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4440                     (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4441 
4442                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4443                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4444                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4445                     ps_ctxt->i4_top_row_luma_stride;
4446 
4447                 /* +2 is to provision top left pel */
4448                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4449                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4450                     (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4451 
4452                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4453                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4454                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4455                     ps_ctxt->i4_top_row_chroma_stride;
4456             }
4457         }
4458 
4459         /* +1 is to provision top left nbr */
4460         ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4461         ps_ctxt->i4_frm_top_row_nbr_size =
4462             ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4463         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4464         {
4465             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4466                 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4467                 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4468             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4469         }
4470 
4471         num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4472         num_cu_in_ctb *= num_cu_in_ctb;
4473 
4474         /* pointer incremented by 1 row to avoid OOB access in 0th row */
4475 
4476         /* Memory for CU level Coeff data buffer */
4477         {
4478             WORD32 i4_16byte_boundary_overshoot;
4479             WORD32 buf_size_per_cu;
4480             WORD32 buf_size_per_thread_wo_alignment_req;
4481             WORD32 buf_size_per_thread;
4482 
4483             buf_size_per_cu =
4484                 ((MAX_LUMA_COEFFS_CTB +
4485                   (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4486                  16) *
4487                 sizeof(UWORD8);
4488             buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4489 
4490             {
4491                 buf_size_per_thread = buf_size_per_cu * (2);
4492 
4493                 for(i = 0; i < 2; i++)
4494                 {
4495                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4496                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4497                         (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4498 
4499                     i4_16byte_boundary_overshoot =
4500                         ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4501 
4502                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4503                 }
4504             }
4505 
4506             ps_ctxt->pu1_cu_recur_coeffs =
4507                 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4508                 (ctr * buf_size_per_thread_wo_alignment_req);
4509         }
4510 
4511         /* Memory for CU dequant data buffer */
4512         {
4513             WORD32 buf_size_per_thread;
4514             WORD32 i4_16byte_boundary_overshoot;
4515 
4516             WORD32 buf_size_per_cu =
4517                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4518                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4519                  8) *
4520                 sizeof(WORD16);
4521 
4522             {
4523                 buf_size_per_thread = buf_size_per_cu * 2;
4524 
4525                 for(i = 0; i < 2; i++)
4526                 {
4527                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4528                         (WORD16
4529                              *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4530 
4531                     i4_16byte_boundary_overshoot =
4532                         ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4533 
4534                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4535                         (WORD16
4536                              *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4537                 }
4538             }
4539         }
4540 
4541         /*------ Deblocking memory's pointers assignements starts ------*/
4542 
4543         /*Assign stride = 4x4 blocks in horizontal edge*/
4544         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4545 
4546         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4547             ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4548 
4549         /*Assign frame level memory to store the Qp of
4550         top 4x4 neighbours of each CTB row*/
4551         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4552         {
4553             ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4554                 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4555                 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4556                  i4_enc_frm_id);
4557         }
4558 
4559         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4560 
4561         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4562             (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4563 
4564         ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4565 
4566         /*Assign stride = 4x4 blocks in horizontal edge*/
4567         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4568 
4569         pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4570 
4571         /*------Deblocking memory's pointers assignements ends ------*/
4572 
4573         /*------SAO memory's pointer assignment starts------------*/
4574         if(!is_hbd_mode)
4575         {
4576             /* 2 is added to allocate top left pixel */
4577             ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4578                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4579             ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4580                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4581             ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4582                 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4583 
4584             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4585             {
4586                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4587                     pu1_sao_base +
4588                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4589                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4590                      i4_num_bitrate_inst * i4_enc_frm_id) +  // move to the next frame_id
4591                     u4_ctb_aligned_wd +
4592                     2;
4593 
4594                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4595                     pu1_sao_base +
4596                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4597                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4598                      i4_num_bitrate_inst * i4_enc_frm_id) +
4599                     +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4600                     u4_ctb_aligned_wd + 4;
4601 
4602                 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4603                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4604                     *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4605                     (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4606             }
4607             ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4608                 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4609             ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4610         }
4611 
4612         /*------SAO memory's pointer assignment ends------------*/
4613 
4614         /* perform all one time initialisation here */
4615         ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4616 
4617         ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4618 
4619         ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4620 
4621         /* move the pointer to 1,2 location */
4622         ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4623         ps_ctxt->pu1_ctb_nbr_map++;
4624 
4625         ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4626 
4627         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4628 
4629         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4630 
4631         CREATE_SUBBLOCK2CSBFID_MAP(
4632             gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4633 
4634         CREATE_SUBBLOCK2CSBFID_MAP(
4635             gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4636 
4637         /* For both instance initialise the chroma dequant start idx */
4638         ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4639         ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4640 
4641         /* initialise all the function pointer tables */
4642         {
4643             ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4644                 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4645 
4646             ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4647 
4648 #if ENABLE_RDO_BASED_TU_RECURSION
4649             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4650             {
4651                 ps_ctxt->pv_inter_rdopt_cu_ntu =
4652                     (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4653             }
4654 #endif
4655             ps_ctxt->pv_intra_chroma_pred_mode_selector =
4656                 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4657             ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4658             ps_ctxt->pv_final_rdopt_mode_prcs =
4659                 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4660             ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4661             ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4662             ps_ctxt->pv_enc_loop_ctb_left_copy =
4663                 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4664 
4665             /* Memory assignments for chroma intra pred buffer */
4666             {
4667                 WORD32 pred_buf_size =
4668                     MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4669                 WORD32 pred_buf_size_per_thread =
4670                     NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4671                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4672                                    (ctr * pred_buf_size_per_thread);
4673 
4674                 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4675                 {
4676                     ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4677                     pu1_base += pred_buf_size;
4678                 }
4679             }
4680 
4681             /* Memory assignments for reference substitution output */
4682             {
4683                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4684                                        + INTRAPRED_SIMD_LEFT_PADDING);
4685                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4686                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4687                                    (ctr * pred_buf_size_per_thread);
4688 
4689                 ps_ctxt->pv_ref_sub_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4690             }
4691 
4692             /* Memory assignments for reference filtering output */
4693             {
4694                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4695                                        + INTRAPRED_SIMD_LEFT_PADDING);
4696                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4697                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4698                                    (ctr * pred_buf_size_per_thread);
4699 
4700                 ps_ctxt->pv_ref_filt_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4701             }
4702 
4703             /* Memory assignments for recon storage during CU Recursion */
4704 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4705             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4706 #endif
4707             {
4708                 {
4709                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4710                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4711                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4712                                        (ctr * pred_buf_size_per_thread);
4713 
4714                     ps_ctxt->pv_cu_luma_recon = pu1_base;
4715                 }
4716 
4717                 {
4718                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4719                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4720                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4721                     UWORD8 *pu1_base =
4722                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4723                         (ctr * pred_buf_size_per_thread);
4724 
4725                     ps_ctxt->pv_cu_chrma_recon = pu1_base;
4726                 }
4727             }
4728 
4729             /* Memory assignments for pred storage during CU Recursion */
4730 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4731             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4732 #endif
4733             {
4734                 {
4735                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4736                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4737                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4738                                        (ctr * pred_buf_size_per_thread);
4739 
4740                     ps_ctxt->pv_CTB_pred_luma = pu1_base;
4741                 }
4742 
4743                 {
4744                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4745                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4746                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4747                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4748                                        (ctr * pred_buf_size_per_thread);
4749 
4750                     ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4751                 }
4752             }
4753 
4754             /* Memory assignments for CTB left luma data storage */
4755             {
4756                 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4757                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4758                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4759                                    (ctr * pred_buf_size_per_thread);
4760 
4761                 ps_ctxt->pv_left_luma_data = pu1_base;
4762             }
4763 
4764             /* Memory assignments for CTB left chroma data storage */
4765             {
4766                 WORD32 pred_buf_size =
4767                     (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4768                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4769                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4770                                    (ctr * pred_buf_size_per_thread);
4771 
4772                 ps_ctxt->pv_left_chrm_data = pu1_base;
4773             }
4774         }
4775 
4776         /* Memory for inter pred buffers */
4777         {
4778             WORD32 i4_num_bufs_per_thread;
4779 
4780             WORD32 i4_buf_size_per_cand =
4781                 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4782                 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4783 
4784             i4_num_bufs_per_thread =
4785                 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4786                 i4_buf_size_per_cand;
4787 
4788             ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4789 
4790             ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4791 
4792             {
4793                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4794                                    +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4795 
4796                 for(i = 0; i < i4_num_bufs_per_thread; i++)
4797                 {
4798                     ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4799                         pu1_base + i * i4_buf_size_per_cand;
4800                     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4801                 }
4802             }
4803         }
4804 
4805         /* Memory required to store pred for 422 chroma */
4806         if(i4_chroma_format == IV_YUV_422SP_UV)
4807         {
4808             WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4809             WORD32 pred_buf_size_per_thread =
4810                 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4811                 sizeof(UWORD8);
4812             void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4813                             (ctr * pred_buf_size_per_thread);
4814 
4815             ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4816         }
4817         else
4818         {
4819             ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4820         }
4821 
4822         /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4823         {
4824             WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4825             WORD32 i4_chromaBufSize =
4826                 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4827             WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4828                                           (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4829             WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4830             {
4831                 UWORD8 *pu1_mem_base =
4832                     (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4833                      ctr * i4_memSize_perThread);
4834 
4835                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4836                     pu1_mem_base + i4_lumaBufSize * 0;
4837                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4838                     pu1_mem_base + i4_lumaBufSize * 1;
4839                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4840                     pu1_mem_base + i4_lumaBufSize * 2;
4841                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4842                     pu1_mem_base + i4_lumaBufSize * 3;
4843 
4844                 pu1_mem_base += i4_lumaBufSize * 4;
4845 
4846                 switch(i4_quality_preset)
4847                 {
4848                 case IHEVCE_QUALITY_P0:
4849                 {
4850 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4851                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4852                         pu1_mem_base + i4_chromaBufSize * 0;
4853                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4854                         pu1_mem_base + i4_chromaBufSize * 1;
4855 #else
4856                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4857                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4858 #endif
4859 
4860 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4861                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4862                         pu1_mem_base + i4_chromaBufSize * 2;
4863                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4864                         pu1_mem_base + i4_chromaBufSize * 3;
4865                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4866                         pu1_mem_base + i4_chromaBufSize * 2;
4867                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4868                         pu1_mem_base + i4_chromaBufSize * 3;
4869 #else
4870                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4871                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4872                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4873                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4874 #endif
4875 
4876                     break;
4877                 }
4878                 case IHEVCE_QUALITY_P2:
4879                 {
4880 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4881                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4882                         pu1_mem_base + i4_chromaBufSize * 0;
4883                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4884                         pu1_mem_base + i4_chromaBufSize * 1;
4885 #else
4886                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4887                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4888 #endif
4889 
4890 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4891                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4892                         pu1_mem_base + i4_chromaBufSize * 2;
4893                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4894                         pu1_mem_base + i4_chromaBufSize * 3;
4895                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4896                         pu1_mem_base + i4_chromaBufSize * 2;
4897                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4898                         pu1_mem_base + i4_chromaBufSize * 3;
4899 #else
4900                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4901                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4902                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4903                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4904 #endif
4905 
4906                     break;
4907                 }
4908                 case IHEVCE_QUALITY_P3:
4909                 {
4910 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
4911                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4912                         pu1_mem_base + i4_chromaBufSize * 0;
4913                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4914                         pu1_mem_base + i4_chromaBufSize * 1;
4915 #else
4916                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4917                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4918 #endif
4919 
4920 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
4921                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4922                         pu1_mem_base + i4_chromaBufSize * 2;
4923                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4924                         pu1_mem_base + i4_chromaBufSize * 3;
4925                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4926                         pu1_mem_base + i4_chromaBufSize * 2;
4927                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4928                         pu1_mem_base + i4_chromaBufSize * 3;
4929 #else
4930                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4931                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4932                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4933                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4934 #endif
4935 
4936                     break;
4937                 }
4938                 case IHEVCE_QUALITY_P4:
4939                 {
4940 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
4941                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4942                         pu1_mem_base + i4_chromaBufSize * 0;
4943                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4944                         pu1_mem_base + i4_chromaBufSize * 1;
4945 #else
4946                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4947                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4948 #endif
4949 
4950 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
4951                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4952                         pu1_mem_base + i4_chromaBufSize * 2;
4953                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4954                         pu1_mem_base + i4_chromaBufSize * 3;
4955                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4956                         pu1_mem_base + i4_chromaBufSize * 2;
4957                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4958                         pu1_mem_base + i4_chromaBufSize * 3;
4959 #else
4960                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4961                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4962                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4963                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4964 #endif
4965 
4966                     break;
4967                 }
4968                 case IHEVCE_QUALITY_P5:
4969                 {
4970 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
4971                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4972                         pu1_mem_base + i4_chromaBufSize * 0;
4973                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4974                         pu1_mem_base + i4_chromaBufSize * 1;
4975 #else
4976                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4977                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4978 #endif
4979 
4980 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
4981                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4982                         pu1_mem_base + i4_chromaBufSize * 2;
4983                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4984                         pu1_mem_base + i4_chromaBufSize * 3;
4985                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4986                         pu1_mem_base + i4_chromaBufSize * 2;
4987                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4988                         pu1_mem_base + i4_chromaBufSize * 3;
4989 #else
4990                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4991                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4992                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4993                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4994 #endif
4995 
4996                     break;
4997                 }
4998                 }
4999             }
5000 
5001             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5002             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5003             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5004             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5005 
5006         } /* Recon Datastore */
5007 
5008         /****************************************************/
5009         /****************************************************/
5010         /* ps_pps->i1_sign_data_hiding_flag  == UNHIDDEN    */
5011         /* when NO_SBH. else HIDDEN                         */
5012         /****************************************************/
5013         /****************************************************/
5014         /* Zero cbf tool is enabled by default for all presets */
5015         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5016 
5017         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5018         {
5019             ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5020             ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5021             ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5022             ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5023         }
5024         else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5025         {
5026             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5027             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5028             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5029             ps_ctxt->i4_sbh_level = NO_SBH;
5030         }
5031         else
5032         {
5033             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5034             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5035             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5036             ps_ctxt->i4_sbh_level = NO_SBH;
5037         }
5038 
5039 #if DISABLE_QUANT_ROUNDING
5040         ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5041         ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5042 #endif
5043         /*Disabling RDOQ only when spatial modulation is enabled
5044                 as RDOQ degrades visual quality*/
5045         if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5046         {
5047             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5048         }
5049 
5050 #if DISABLE_RDOQ
5051         ps_ctxt->i4_rdoq_level = NO_RDOQ;
5052 #endif
5053 
5054 #if DISABLE_SBH
5055         ps_ctxt->i4_sbh_level = NO_SBH;
5056 #endif
5057 
5058         /*Rounding factor calc based on previous cabac states */
5059 
5060         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5061         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5062         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5063         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5064 
5065         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5066         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5067         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5068         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5069 
5070         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5071         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5072         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5073 
5074         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5075         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5076         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5077 
5078         /****************************************************************************************/
5079         /* Setting the perform rdoq and sbh flags appropriately                                 */
5080         /****************************************************************************************/
5081         {
5082             /******************************************/
5083             /* For best cand rdoq and/or sbh          */
5084             /******************************************/
5085             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5086                 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5087             /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5088             we would have to do RDOQ again.*/
5089             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5090                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5091                 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5092                  (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5093 
5094             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5095                 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5096 
5097             /* SBH should be performed if
5098             a) i4_sbh_level is BEST_CAND_SBH.
5099             b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5100             if SBH has to be done because for these presets the quant, iquant and scan coeff
5101             data are calculated in this function and not during the RDOPT stage*/
5102 
5103             /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5104             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5105                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5106                 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5107                  (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5108 
5109             /******************************************/
5110             /* For all cand rdoq and/or sbh          */
5111             /******************************************/
5112             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5113                 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5114             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5115                 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5116             ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5117                 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5118         }
5119 
5120         if(!is_hbd_mode)
5121         {
5122             if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5123             {
5124                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5125                 {
5126                     ps_ctxt->apf_quant_iquant_ssd[0] =
5127                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5128                     ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5129                 }
5130                 else
5131                 {
5132                     ps_ctxt->apf_quant_iquant_ssd[0] =
5133                         ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5134                     ps_ctxt->apf_quant_iquant_ssd[2] =
5135                         ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5136                 }
5137 
5138                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5139                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5140                 {
5141                     ps_ctxt->apf_quant_iquant_ssd[1] =
5142                         ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5143                     ps_ctxt->apf_quant_iquant_ssd[3] =
5144                         ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5145                 }
5146                 else
5147                 {
5148                     ps_ctxt->apf_quant_iquant_ssd[1] =
5149                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5150                     ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5151                 }
5152             }
5153             else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5154             {
5155                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5156                 {
5157                     ps_ctxt->apf_quant_iquant_ssd[0] =
5158                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5159                     ps_ctxt->apf_quant_iquant_ssd[2] =
5160                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5161                 }
5162                 else
5163                 {
5164                     ps_ctxt->apf_quant_iquant_ssd[0] =
5165                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5166                     ps_ctxt->apf_quant_iquant_ssd[2] =
5167                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5168                 }
5169 
5170                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5171                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5172                 {
5173                     ps_ctxt->apf_quant_iquant_ssd[1] =
5174                         ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5175                     ps_ctxt->apf_quant_iquant_ssd[3] =
5176                         ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5177                 }
5178                 else
5179                 {
5180                     ps_ctxt->apf_quant_iquant_ssd[1] =
5181                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5182                     ps_ctxt->apf_quant_iquant_ssd[3] =
5183                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5184                 }
5185             }
5186 
5187             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5188                 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5189             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5190                 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5191             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5192                 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5193             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5194                 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5195 
5196             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5197                 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5198             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5199                 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5200             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5201                 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5202             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5203                 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5204 
5205             ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5206             ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5207             ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5208             ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5209             ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5210 
5211             ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5212             ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5213             ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5214 
5215             ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5216             ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5217             ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5218             ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5219             ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5220 
5221             ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5222             ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5223             ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5224 
5225             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5226                 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5227             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5228             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5229                 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5230             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5231                 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5232             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5233                 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5234             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5235                 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5236             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5237                 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5238             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5239                 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5240             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5241             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5242                 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5243 
5244             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5245                 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5246             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5247                 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5248             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5249                 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5250             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5251                 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5252             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5253                 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5254             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5255                 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5256             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5257                 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5258             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5259                 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5260             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5261                 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5262             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5263                 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5264 
5265             ps_ctxt->apf_chrm_resd_trns_had[0] =
5266                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5267             ps_ctxt->apf_chrm_resd_trns_had[1] =
5268                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5269             ps_ctxt->apf_chrm_resd_trns_had[2] =
5270                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5271         }
5272 
5273         if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5274         {
5275             /* initialise the scale & rescale matricies */
5276             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5277             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5278             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5279             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5280             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5281             /*init for inter matrix*/
5282             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5283             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5284             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5285             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5286             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5287 
5288             /*init for rescale matrix*/
5289             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5290             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5291             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5292             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5293             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5294             /*init for rescale inter matrix*/
5295             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5296             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5297             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5298             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5299             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5300         }
5301         else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5302         {
5303             /* initialise the scale & rescale matricies */
5304             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5305             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5306             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5307             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5308             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5309             /*init for inter matrix*/
5310             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5311             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5312             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5313             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5314             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5315 
5316             /*init for rescale matrix*/
5317             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5318             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5319             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5320             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5321             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5322             /*init for rescale inter matrix*/
5323             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5324             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5325             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5326             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5327             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5328         }
5329         else
5330         {
5331             ASSERT(0);
5332         }
5333 
5334         /* Not recomputing Luma pred-data and header data for any preset now */
5335         ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5336         ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5337         ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5338 
5339         switch(ps_ctxt->i4_quality_preset)
5340         {
5341         case IHEVCE_QUALITY_P0:
5342         {
5343             ps_ctxt->i4_max_merge_candidates = 5;
5344             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5345             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5346             ps_ctxt->u1_use_early_cbf_data = 0;
5347             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5348             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5349                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5350 
5351             break;
5352         }
5353         case IHEVCE_QUALITY_P2:
5354         {
5355             ps_ctxt->i4_max_merge_candidates = 5;
5356             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5357             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5358             ps_ctxt->u1_use_early_cbf_data = 0;
5359 
5360             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5361             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5362                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5363 
5364             break;
5365         }
5366         case IHEVCE_QUALITY_P3:
5367         {
5368             ps_ctxt->i4_max_merge_candidates = 3;
5369             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5370             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5371 
5372             ps_ctxt->u1_use_early_cbf_data = 0;
5373             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5374             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5375                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5376 
5377             break;
5378         }
5379         case IHEVCE_QUALITY_P4:
5380         {
5381             ps_ctxt->i4_max_merge_candidates = 2;
5382             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5383             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5384             ps_ctxt->u1_use_early_cbf_data = 0;
5385             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5386             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5387                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5388 
5389             break;
5390         }
5391         case IHEVCE_QUALITY_P5:
5392         {
5393             ps_ctxt->i4_max_merge_candidates = 2;
5394             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5395             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5396             ps_ctxt->u1_use_early_cbf_data = 0;
5397             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5398             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5399                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5400 
5401             break;
5402         }
5403         case IHEVCE_QUALITY_P6:
5404         {
5405             ps_ctxt->i4_max_merge_candidates = 2;
5406             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5407             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5408             ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5409             break;
5410         }
5411         default:
5412         {
5413             ASSERT(0);
5414         }
5415         }
5416 
5417 #if DISABLE_SKIP_AND_MERGE_EVAL
5418         ps_ctxt->i4_max_merge_candidates = 0;
5419 #endif
5420 
5421         ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5422             !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5423 
5424         /*initialize memory for RC related parameters required/populated by enc_loop */
5425         /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5426         |-------|-> Thread 0, instance 0
5427         |       |
5428         |       |
5429         |       |
5430         |-------|-> thread 0, instance 1
5431         |       |
5432         |       |
5433         |       |
5434         |-------|-> thread 0, intance 2
5435         |       |
5436         |       |
5437         |       |
5438         |-------|-> thread 1, instance 0
5439         |       |
5440         |       |
5441         |       |
5442         |-------|-> thread 1, instance 1
5443         |       |
5444         |       |
5445         |       |
5446         |-------|-> thread 1, instance 2
5447         ...         ...
5448 
5449         Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5450         Finally, one thread will become master and collate the data from all the threads */
5451         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5452         {
5453             for(i = 0; i < i4_num_bitrate_inst; i++)
5454             {
5455                 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5456                 ps_enc_loop_rc_params++;
5457             }
5458         }
5459         /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5460 
5461 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5462         ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5463 #endif
5464 
5465         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5466             MAX_TU_SIZE;
5467         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5468             MAX_TU_SIZE;
5469         /*Multiplying by two to account for interleaving of cb and cr*/
5470         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5471                                                                                        << 1;
5472         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5473             MAX_TU_SIZE << 1;
5474 
5475         /*     Memory for a frame level memory to store tile-id                  */
5476         /*              corresponding to each CTB of frame                       */
5477         ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5478 
5479         ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5480         /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5481         /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5482         if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5483            (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5484         {
5485             UWORD32 psy_strength;
5486             UWORD32 psy_strength_mask =
5487                 224;  // only bits 5,6,7 are ones. These three bits represent the psy strength
5488             psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5489             ps_ctxt->u1_enable_psyRDOPT = 1;
5490             ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5491             if(psy_strength == 0)
5492             {
5493                 ps_ctxt->u1_enable_psyRDOPT = 0;
5494                 ps_ctxt->u4_psy_strength = 0;
5495             }
5496         }
5497 
5498         ps_ctxt->u1_is_stasino_enabled =
5499             ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5500               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5501              (ps_init_prms->s_coding_tools_prms.i4_vqet &
5502               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5503 
5504         ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5505         ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5506         ps_ctxt++;
5507     }
5508     /* Store Tile params base into EncLoop Master context */
5509     ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5510 
5511     if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5512     {
5513         i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5514     }
5515 
5516     /* Updating  ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5517     /* Loop over all tile-cols in frame */
5518     for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5519     {
5520         WORD32 i4_tile_col_wd_in_ctb_unit =
5521             (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5522         WORD32 offset_x;
5523 
5524         if(ctr == (i4_num_tile_cols - 1))
5525         { /* Last tile-row of frame */
5526             WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5527 
5528             WORD32 cu_aligned_pic_wd =
5529                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5530                 SET_CTB_ALIGN(
5531                     ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5532                     min_cu_size);
5533 
5534             WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5535 
5536             offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5537             offset_x += last_hz_ctb_wd;
5538         }
5539         else
5540         { /* Not the last tile-row of frame */
5541             offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5542         }
5543 
5544         offset_x /= 4;
5545         offset_x -= 1;
5546 
5547         ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5548     }
5549 
5550     n_tabs = NUM_ENC_LOOP_MEM_RECS;
5551 
5552     /*store num bit-rate instances in the master context */
5553     ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5554     ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5555     /*************************************************************************/
5556     /* --- EncLoop Deblock and SAO sync Dep Mngr Mem init --                         */
5557     /*************************************************************************/
5558     {
5559         WORD32 count;
5560         WORD32 num_vert_units, num_blks_in_row;
5561         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5562         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5563 
5564         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5565         ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5566         ASSERT(num_vert_units > 0);
5567         ASSERT(num_blks_in_row > 0);
5568 
5569         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5570         {
5571             for(i = 0; i < i4_num_bitrate_inst; i++)
5572             {
5573                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5574                     &ps_mem_tab[n_tabs],
5575                     pv_osal_handle,
5576                     DEP_MNGR_ROW_ROW_SYNC,
5577                     num_vert_units,
5578                     num_blks_in_row,
5579                     i4_num_tile_cols, /* Number of Col Tiles */
5580                     i4_num_proc_thrds,
5581                     0 /*Sem Disabled*/
5582                 );
5583 
5584                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5585             }
5586         }
5587 
5588         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5589         {
5590             for(i = 0; i < i4_num_bitrate_inst; i++)
5591             {
5592                 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[count][i] = ihevce_dmgr_init(
5593                     &ps_mem_tab[n_tabs],
5594                     pv_osal_handle,
5595                     DEP_MNGR_ROW_ROW_SYNC,
5596                     num_vert_units,
5597                     num_blks_in_row,
5598                     i4_num_tile_cols, /* Number of Col Tiles */
5599                     i4_num_proc_thrds,
5600                     0 /*Sem Disabled*/
5601                 );
5602 
5603                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5604             }
5605         }
5606     }
5607     /*************************************************************************/
5608     /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init --                   */
5609     /*************************************************************************/
5610     {
5611         WORD32 count;
5612         WORD32 num_vert_units, num_blks_in_row;
5613         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5614         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5615 
5616         WORD32 i4_sem = 0;
5617 
5618         if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5619            IHEVCE_QUALITY_P4)
5620             i4_sem = 0;
5621         else
5622             i4_sem = 1;
5623         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5624         /* For Top-Right CU sync, adding one more CTB since value updation */
5625         /* happens in that way for the last CTB in the row                 */
5626         num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5627         num_blks_in_row += MAX_CTB_SIZE;
5628 
5629         ASSERT(num_vert_units > 0);
5630         ASSERT(num_blks_in_row > 0);
5631 
5632         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5633         {
5634             for(i = 0; i < i4_num_bitrate_inst; i++)
5635             {
5636                 /* For ES/HS, CU level updates uses spin-locks than semaphore */
5637                 {
5638                     ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5639                         ihevce_dmgr_init(
5640                             &ps_mem_tab[n_tabs],
5641                             pv_osal_handle,
5642                             DEP_MNGR_ROW_ROW_SYNC,
5643                             num_vert_units,
5644                             num_blks_in_row,
5645                             i4_num_tile_cols, /* Number of Col Tiles */
5646                             i4_num_proc_thrds,
5647                             i4_sem /*Sem Disabled*/
5648                         );
5649                 }
5650                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5651             }
5652         }
5653     }
5654 
5655     for(i = 1; i < 5; i++)
5656     {
5657         WORD32 i4_log2_trans_size = i + 1;
5658         WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5659 
5660         ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5661     }
5662 
5663     ga_trans_shift[0] = ga_trans_shift[1];
5664 
5665     /* return the handle to caller */
5666     return ((void *)ps_master_ctxt);
5667 }
5668 
5669 /*!
5670 ******************************************************************************
5671 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5672 *
5673 * \brief
5674 *    Intialization for ENC_LOOP context state structure .
5675 *
5676 * \param[in] ps_mem_tab : pointer to memory descriptors table
5677 * \param[in] ppv_sem_hdls : Array of semaphore handles
5678 * \param[in] i4_num_proc_thrds : Number of processing threads
5679 *
5680 * \return
5681 *    None
5682 *
5683 * \author
5684 *  Ittiam
5685 *
5686 *****************************************************************************
5687 */
ihevce_enc_loop_reg_sem_hdls(void * pv_enc_loop_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)5688 void ihevce_enc_loop_reg_sem_hdls(
5689     void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5690 {
5691     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5692     WORD32 i, enc_frm_id;
5693 
5694     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5695 
5696     /*************************************************************************/
5697     /* --- EncLoop Deblock and SAO sync Dep Mngr reg Semaphores --                   */
5698     /*************************************************************************/
5699     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5700     {
5701         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5702         {
5703             ihevce_dmgr_reg_sem_hdls(
5704                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5705                 ppv_sem_hdls,
5706                 i4_num_proc_thrds);
5707         }
5708     }
5709 
5710     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5711     {
5712         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5713         {
5714             ihevce_dmgr_reg_sem_hdls(
5715                 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][i],
5716                 ppv_sem_hdls,
5717                 i4_num_proc_thrds);
5718         }
5719     }
5720 
5721     /*************************************************************************/
5722     /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores --             */
5723     /*************************************************************************/
5724     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5725     {
5726         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5727         {
5728             ihevce_dmgr_reg_sem_hdls(
5729                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5730                 ppv_sem_hdls,
5731                 i4_num_proc_thrds);
5732         }
5733     }
5734 
5735     return;
5736 }
5737 
5738 /*!
5739 ******************************************************************************
5740 * \if Function name : ihevce_enc_loop_delete \endif
5741 *
5742 * \brief
5743 *    Destroy EncLoop module
5744 * Note : Only Destroys the resources allocated in the module like
5745 *   semaphore,etc. Memory free is done Separately using memtabs
5746 *
5747 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5748 *
5749 * \return
5750 *    None
5751 *
5752 * \author
5753 *  Ittiam
5754 *
5755 *****************************************************************************
5756 */
ihevce_enc_loop_delete(void * pv_enc_loop_ctxt)5757 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5758 {
5759     ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5760     WORD32 ctr, enc_frm_id;
5761 
5762     ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5763 
5764     for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5765     {
5766         for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5767         {
5768             /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5769             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5770             /* --- EncLoop Sao sync Dep Mngr Delete --*/
5771             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][ctr]);
5772             /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5773             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5774         }
5775     }
5776 }
5777 
5778 /*!
5779 ******************************************************************************
5780 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5781 *
5782 * \brief
5783 *    Frame level Reset for the Dependency Mngrs local to EncLoop.,
5784 *    ie CU_TopRight and Dblk
5785 *
5786 * \param[in] pv_enc_loop_ctxt       : Enc_loop context pointer
5787 *
5788 * \return
5789 *    None
5790 *
5791 * \author
5792 *  Ittiam
5793 *
5794 *****************************************************************************
5795 */
ihevce_enc_loop_dep_mngr_frame_reset(void * pv_enc_loop_ctxt,WORD32 enc_frm_id)5796 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5797 {
5798     WORD32 ctr, frame_id;
5799     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5800 
5801     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5802 
5803     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5804     {
5805         frame_id = 0;
5806     }
5807     else
5808     {
5809         frame_id = enc_frm_id;
5810     }
5811 
5812     for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5813     {
5814         /* Dep. Mngr : Reset the num ctb Deblocked in every row  for ENC sync */
5815         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5816 
5817         /* Dep. Mngr : Reset the num SAO ctb in every row  for ENC sync */
5818         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[frame_id][ctr]);
5819 
5820         /* Dep. Mngr : Reset the TopRight CU Processed in every row  for ENC sync */
5821         ihevce_dmgr_rst_row_row_sync(
5822             ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5823     }
5824 }
5825 
5826 /*!
5827 ******************************************************************************
5828 * \if Function name : ihevce_enc_loop_frame_init \endif
5829 *
5830 * \brief
5831 *    Frame level init of enocde loop function .
5832 *
5833 * \param[in] pv_enc_loop_ctxt           : Enc_loop context pointer
5834 * \param[in] pi4_cu_processed           : ptr to cur frame cu process in pix.
5835 * \param[in] aps_ref_list               : ref pic list for the current frame
5836 * \param[in] ps_slice_hdr               : ptr to current slice header params
5837 * \param[in] ps_pps                     : ptr to active pps params
5838 * \param[in] ps_sps                     : ptr to active sps params
5839 * \param[in] ps_vps                     : ptr to active vps params
5840 
5841 
5842 * \param[in] i1_weighted_pred_flag      : weighted pred enable flag (unidir)
5843 * \param[in] i1_weighted_bipred_flag    : weighted pred enable flag (bidir)
5844 * \param[in] log2_luma_wght_denom       : down shift factor for weighted pred of luma
5845 * \param[in] log2_chroma_wght_denom       : down shift factor for weighted pred of chroma
5846 * \param[in] cur_poc                    : currennt frame poc
5847 * \param[in] i4_bitrate_instance_num    : number indicating the instance of bit-rate for multi-rate encoder
5848 *
5849 * \return
5850 *    None
5851 *
5852 * \author
5853 *  Ittiam
5854 *
5855 *****************************************************************************
5856 */
ihevce_enc_loop_frame_init(void * pv_enc_loop_ctxt,WORD32 i4_frm_qp,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],recon_pic_buf_t * ps_frm_recon,slice_header_t * ps_slice_hdr,pps_t * ps_pps,sps_t * ps_sps,vps_t * ps_vps,WORD8 i1_weighted_pred_flag,WORD8 i1_weighted_bipred_flag,WORD32 log2_luma_wght_denom,WORD32 log2_chroma_wght_denom,WORD32 cur_poc,WORD32 i4_display_num,enc_ctxt_t * ps_enc_ctxt,me_enc_rdopt_ctxt_t * ps_curr_inp_prms,WORD32 i4_bitrate_instance_num,WORD32 i4_thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_num_bitrates,WORD32 i4_quality_preset,void * pv_dep_mngr_encloop_dep_me)5857 void ihevce_enc_loop_frame_init(
5858     void *pv_enc_loop_ctxt,
5859     WORD32 i4_frm_qp,
5860     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5861     recon_pic_buf_t *ps_frm_recon,
5862     slice_header_t *ps_slice_hdr,
5863     pps_t *ps_pps,
5864     sps_t *ps_sps,
5865     vps_t *ps_vps,
5866     WORD8 i1_weighted_pred_flag,
5867     WORD8 i1_weighted_bipred_flag,
5868     WORD32 log2_luma_wght_denom,
5869     WORD32 log2_chroma_wght_denom,
5870     WORD32 cur_poc,
5871     WORD32 i4_display_num,
5872     enc_ctxt_t *ps_enc_ctxt,
5873     me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5874     WORD32 i4_bitrate_instance_num,
5875     WORD32 i4_thrd_id,
5876     WORD32 i4_enc_frm_id,
5877     WORD32 i4_num_bitrates,
5878     WORD32 i4_quality_preset,
5879     void *pv_dep_mngr_encloop_dep_me)
5880 {
5881     /* local variables */
5882     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5883     ihevce_enc_loop_ctxt_t *ps_ctxt;
5884     WORD32 chroma_qp_offset, i4_div_factor;
5885     WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5886     WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5887 
5888     /* ENC_LOOP master state structure */
5889     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5890 
5891     /* Nithya: Store the current POC in the slice header */
5892     ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5893 
5894     /* Update the POC list of the current frame to the recon buffer */
5895     if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5896     {
5897         int i4_i;
5898         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5899         {
5900             ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5901         }
5902     }
5903     if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5904     {
5905         int i4_i;
5906         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5907         {
5908             ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5909         }
5910     }
5911 
5912     /* loop over all the threads */
5913     // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5914     {
5915         /* ENC_LOOP state structure */
5916         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5917 
5918         /* SAO ctxt structure initialization*/
5919         ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5920         ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5921         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5922 
5923         /*bit-rate instance number for Multi-bitrate (MBR) encode */
5924         ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5925         ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5926         ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5927         ps_ctxt->i4_is_first_query = 1;
5928         ps_ctxt->i4_is_ctb_qp_modified = 0;
5929 
5930         /* enc_frm_id for multiframe encode */
5931 
5932         if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5933         {
5934             ps_ctxt->i4_enc_frm_id = 0;
5935             i4_enc_frm_id = 0;
5936         }
5937         else
5938         {
5939             ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
5940         }
5941 
5942         /*Initialize the sub pic rc buf appropriately */
5943 
5944         /*Set the thrd id flag */
5945         ps_enc_ctxt->s_multi_thrd
5946             .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
5947 
5948         ps_enc_ctxt->s_multi_thrd
5949             .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5950         ps_enc_ctxt->s_multi_thrd
5951             .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5952 
5953         ps_enc_ctxt->s_multi_thrd
5954             .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5955         ps_enc_ctxt->s_multi_thrd
5956             .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5957 
5958         ps_enc_ctxt->s_multi_thrd
5959             .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5960         ps_enc_ctxt->s_multi_thrd
5961             .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5962         ps_enc_ctxt->s_multi_thrd
5963             .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5964         ps_enc_ctxt->s_multi_thrd
5965             .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5966         ps_enc_ctxt->s_multi_thrd
5967             .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5968         ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
5969             i4_frm_qp;
5970 
5971         /*Frame level data for Sub Pic rc is initalized here */
5972         /*Can be sent once per frame*/
5973         {
5974             WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
5975                                       ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
5976 
5977             /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
5978             ps_ctxt->u4_total_cu_bits = 0;
5979             ps_ctxt->u4_total_cu_hdr_bits = 0;
5980 
5981             ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
5982             ps_ctxt->u4_cu_tot_bits = 0;
5983             ps_ctxt->u4_total_cu_bits_mul_qs = 0;
5984             ps_ctxt->i4_display_num = i4_display_num;
5985             ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
5986             /*The Qscale is to be generated every 10th of total frame ctb is completed */
5987             //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
5988             ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
5989 
5990             ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
5991             /*Sub Pic RC frame level params */
5992             ps_ctxt->i8_frame_l1_ipe_sad =
5993                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
5994             ps_ctxt->i8_frame_l0_ipe_satd =
5995                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
5996             ps_ctxt->i8_frame_l1_me_sad =
5997                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
5998             ps_ctxt->i8_frame_l1_activity_fact =
5999                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
6000             if(ps_ctxt->i4_sub_pic_level_rc)
6001             {
6002                 ASSERT(
6003                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6004                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6005 
6006                 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6007                                                  [ps_ctxt->i4_bitrate_instance_num] =
6008                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6009                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6010             }
6011             //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6012 
6013             ps_ctxt->i4_is_I_scenecut =
6014                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6015                  (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6016                   ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6017 
6018             ps_ctxt->i4_is_non_I_scenecut =
6019                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6020                  (ps_ctxt->i4_is_I_scenecut == 0));
6021 
6022             /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6023             ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6024             ps_ctxt->i4_is_model_valid =
6025                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6026         }
6027         /* cb and cr offsets are assumed to be same */
6028         chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6029 
6030         /* assumption of cb = cr qp */
6031         ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6032         ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6033 
6034         ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6035 
6036         ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6037 
6038         ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6039         ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6040 
6041         /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6042         ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6043         ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6044         ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6045 
6046         ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6047         ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6048         ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6049         ps_ctxt->i4_use_const_lamda_modifier =
6050             ps_ctxt->i4_use_const_lamda_modifier ||
6051             ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6052               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6053              ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6054                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6055               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6056                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6057               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6058                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6059               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6060                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6061 
6062         {
6063             ps_ctxt->f_i_pic_lamda_modifier =
6064                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6065         }
6066 
6067         ps_ctxt->i4_frame_qp = i4_frm_qp;
6068         ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6069         ps_ctxt->i4_cu_qp = i4_frm_qp;
6070         ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6071         ps_ctxt->i4_chrm_cu_qp =
6072             (ps_ctxt->u1_chroma_array_type == 2)
6073                 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6074                 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6075 
6076         ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6077         i4_div_factor = (i4_frm_qp + 3) / 6;
6078         i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6079         ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6080 
6081         ps_ctxt->i4_chrm_cu_qp_div6 =
6082             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6083         ps_ctxt->i4_chrm_cu_qp_mod6 =
6084             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6085 
6086 #define INTER_RND_QP_BY_6
6087 #ifdef INTER_RND_QP_BY_6
6088 
6089         { /*1/6 rounding for 8 bit b frames*/
6090             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6091                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6092         }
6093 #else
6094         /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6095         ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6096 #endif
6097 
6098         if(ISLICE == i1_slice_type)
6099         {
6100             /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6101             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6102                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6103         }
6104         else
6105         {
6106             /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6107             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6108                 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6109             /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6110         }
6111 
6112         ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6113 
6114         ps_ctxt->i1_slice_type = i1_slice_type;
6115 
6116         /* intialize the inter pred (MC) context at frame level */
6117         ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6118         ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6119         ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6120         ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6121         ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6122 
6123         /* intialize the MV pred context at frame level */
6124         ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6125         ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6126         ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6127         ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6128             ps_pps->i1_log2_parallel_merge_level - 2;
6129 
6130 #if ADAPT_COLOCATED_FROM_L0_FLAG
6131         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6132         {
6133             if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6134                (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6135                 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6136             {
6137                 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6138             }
6139         }
6140 #endif
6141         /* Initialization of deblocking params */
6142         ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6143         ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6144 
6145         ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6146 
6147         ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6148         /*init frame level stat accumualtion parameters */
6149         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6150             ->u4_frame_sad_acc = 0;
6151         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6152             ->u4_frame_intra_sad_acc = 0;
6153         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6154             ->u4_frame_open_loop_intra_sad = 0;
6155         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6156             ->i8_frame_open_loop_ssd = 0;
6157         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6158             ->u4_frame_inter_sad_acc = 0;
6159 
6160         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6161             ->i8_frame_cost_acc = 0;
6162         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6163             ->i8_frame_intra_cost_acc = 0;
6164         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6165             ->i8_frame_inter_cost_acc = 0;
6166 
6167         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6168             ->u4_frame_intra_sad = 0;
6169         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6170             ->u4_frame_rdopt_bits = 0;
6171         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6172             ->u4_frame_rdopt_header_bits = 0;
6173         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6174             ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6175         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6176             ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6177         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6178             ->i4_8x8_cu_sum[0] = 0;
6179         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6180             ->i4_8x8_cu_sum[1] = 0;
6181         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6182             ->i8_sad_by_qscale[0] = 0;
6183         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6184             ->i8_sad_by_qscale[1] = 0;
6185         /* Compute the frame_qstep */
6186         GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6187 
6188         ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6189 
6190         ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6191         /* intialize the cabac rdopt context at frame level */
6192         ihevce_entropy_rdo_frame_init(
6193             &ps_ctxt->s_rdopt_entropy_ctxt,
6194             ps_slice_hdr,
6195             ps_pps,
6196             ps_sps,
6197             ps_vps,
6198             ps_master_ctxt->au1_cu_skip_top_row,
6199             &ps_enc_ctxt->s_rc_quant);
6200 
6201         /* register the dep mngr instance for forward ME sync */
6202         ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6203     }
6204 }
6205 /*
6206 ******************************************************************************
6207 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6208 *
6209 * \brief
6210 *    returns Nil
6211 *
6212 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6213 * \param[out]ps_rc_prms       : ptr to frame level info structure
6214 *
6215 * \return
6216 *    None
6217 *
6218 * \author
6219 *  Ittiam
6220 *
6221 *****************************************************************************
6222 */
ihevce_enc_loop_get_frame_rc_prms(void * pv_enc_loop_ctxt,rc_bits_sad_t * ps_rc_prms,WORD32 i4_br_id,WORD32 i4_enc_frm_id)6223 void ihevce_enc_loop_get_frame_rc_prms(
6224     void *pv_enc_loop_ctxt,
6225     rc_bits_sad_t *ps_rc_prms,
6226     WORD32 i4_br_id,  //bitrate instance id
6227     WORD32 i4_enc_frm_id)  // frame id
6228 {
6229     /*Get the master thread pointer*/
6230     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6231     ihevce_enc_loop_ctxt_t *ps_ctxt;
6232     UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6233     LWORD64 i8_total_ssd_frame = 0;
6234     UWORD32 total_frame_sad = 0;
6235     UWORD32 total_frame_rdopt_bits = 0;
6236     UWORD32 total_frame_rdopt_header_bits = 0;
6237     WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6238     WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6239     LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6240     WORD32 i4_curr_qp_acc = 0;
6241     WORD32 i;
6242 
6243     /* ENC_LOOP master state structure */
6244     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6245 
6246     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6247     {
6248         i4_enc_frm_id = 0;
6249     }
6250     /*loop through all threads and accumulate intra sad across all threads*/
6251     for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6252     {
6253         /* ENC_LOOP state structure */
6254         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6255         total_frame_open_loop_intra_sad +=
6256             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6257         i8_total_ssd_frame +=
6258             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6259         total_frame_intra_sad +=
6260             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6261         total_frame_sad +=
6262             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6263         total_frame_rdopt_bits +=
6264             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6265         total_frame_rdopt_header_bits +=
6266             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6267         i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6268                                               ->i4_qp_normalized_8x8_cu_sum[0];
6269         i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6270                                               ->i4_qp_normalized_8x8_cu_sum[1];
6271         i4_8x8_cu_sum[0] +=
6272             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6273         i4_8x8_cu_sum[1] +=
6274             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6275         i8_sad_by_qscale[0] +=
6276             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6277         i8_sad_by_qscale[1] +=
6278             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6279     }
6280 
6281     ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6282     ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6283     ps_rc_prms->u4_total_sad = total_frame_sad;
6284     ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6285     ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6286     /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6287     ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6288     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6289     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6290     ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6291     ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6292     ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6293     ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6294 }
6295