1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 ******************************************************************************
23 * \file ihevce_enc_cu_recursion.c
24 *
25 * \brief
26 *    This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 *    18/09/2012
30 *
31 * \author
32 *    Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40 
41 /*****************************************************************************/
42 /* File Includes                                                             */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 
52 /* User include files */
53 #include "ihevc_typedefs.h"
54 #include "itt_video_api.h"
55 #include "ihevce_api.h"
56 
57 #include "rc_cntrl_param.h"
58 #include "rc_frame_info_collector.h"
59 #include "rc_look_ahead_params.h"
60 
61 #include "ihevc_defs.h"
62 #include "ihevc_macros.h"
63 #include "ihevc_debug.h"
64 #include "ihevc_structs.h"
65 #include "ihevc_platform_macros.h"
66 #include "ihevc_deblk.h"
67 #include "ihevc_itrans_recon.h"
68 #include "ihevc_chroma_itrans_recon.h"
69 #include "ihevc_chroma_intra_pred.h"
70 #include "ihevc_intra_pred.h"
71 #include "ihevc_inter_pred.h"
72 #include "ihevc_mem_fns.h"
73 #include "ihevc_padding.h"
74 #include "ihevc_weighted_pred.h"
75 #include "ihevc_sao.h"
76 #include "ihevc_resi_trans.h"
77 #include "ihevc_quant_iquant_ssd.h"
78 #include "ihevc_cabac_tables.h"
79 
80 #include "ihevce_defs.h"
81 #include "ihevce_hle_interface.h"
82 #include "ihevce_lap_enc_structs.h"
83 #include "ihevce_multi_thrd_structs.h"
84 #include "ihevce_multi_thrd_funcs.h"
85 #include "ihevce_me_common_defs.h"
86 #include "ihevce_had_satd.h"
87 #include "ihevce_error_codes.h"
88 #include "ihevce_bitstream.h"
89 #include "ihevce_cabac.h"
90 #include "ihevce_rdoq_macros.h"
91 #include "ihevce_function_selector.h"
92 #include "ihevce_enc_structs.h"
93 #include "ihevce_entropy_structs.h"
94 #include "ihevce_cmn_utils_instr_set_router.h"
95 #include "ihevce_ipe_instr_set_router.h"
96 #include "ihevce_decomp_pre_intra_structs.h"
97 #include "ihevce_decomp_pre_intra_pass.h"
98 #include "ihevce_enc_loop_structs.h"
99 #include "ihevce_global_tables.h"
100 #include "ihevce_nbr_avail.h"
101 #include "ihevce_enc_loop_utils.h"
102 #include "ihevce_bs_compute_ctb.h"
103 #include "ihevce_cabac_rdo.h"
104 #include "ihevce_dep_mngr_interface.h"
105 #include "ihevce_enc_loop_pass.h"
106 #include "ihevce_rc_enc_structs.h"
107 #include "ihevce_enc_cu_recursion.h"
108 #include "ihevce_stasino_helpers.h"
109 
110 #include "cast_types.h"
111 #include "osal.h"
112 #include "osal_defaults.h"
113 
114 /*****************************************************************************/
115 /* Macros                                                                    */
116 /*****************************************************************************/
117 #define NUM_CTB_QUANT_ROUNDING 6
118 
119 /*****************************************************************************/
120 /* Function Definitions                                                      */
121 /*****************************************************************************/
122 
123 /**
124 *********************************************************************************
125 * Function name : ihevce_store_cu_final
126 *
127 * \brief
128 *    This function store cu info to the enc loop cu context
129 *
130 * \param[in] ps_ctxt : pointer to enc loop context structure
131 * \param[in] ps_cu_final  : pointer to enc loop output CU structure
132 * \param[in] pu1_ecd_data : ecd data pointer
133 * \param[in] ps_enc_out_ctxt : pointer to CU information structure
134 * \param[in] ps_cu_prms : pointer to  cu level parameters for SATD / RDOPT
135 *
136 * \return
137 *    None
138 *
139 **********************************************************************************/
ihevce_store_cu_final(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_enc_loop_out_t * ps_cu_final,UWORD8 * pu1_ecd_data,ihevce_enc_cu_node_ctxt_t * ps_enc_out_ctxt,enc_loop_cu_prms_t * ps_cu_prms)140 void ihevce_store_cu_final(
141     ihevce_enc_loop_ctxt_t *ps_ctxt,
142     cu_enc_loop_out_t *ps_cu_final,
143     UWORD8 *pu1_ecd_data,
144     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
145     enc_loop_cu_prms_t *ps_cu_prms)
146 {
147     enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
148     WORD32 i4_8x8_blks_in_cu;
149     WORD32 i4_br_id, i4_enc_frm_id;
150 
151     WORD32 u4_tex_bits, u4_hdr_bits;
152     WORD32 i4_qscale, i4_qscale_ctb;
153     ps_enc_loop_bestprms = ps_enc_out_ctxt->ps_cu_prms;
154     i4_qscale = ((ps_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
155                       [ps_enc_out_ctxt->i1_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
156     i4_qscale_ctb = ((
157         ps_ctxt->ps_rc_quant_ctxt
158             ->pi4_qp_to_qscale[ps_ctxt->i4_frame_mod_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
159 
160     /* All texture bits accumulated */
161     u4_tex_bits = ps_enc_loop_bestprms->u4_cu_luma_res_bits +
162                   ps_enc_loop_bestprms->u4_cu_chroma_res_bits +
163                   ps_enc_loop_bestprms->u4_cu_cbf_bits;
164 
165     u4_hdr_bits = ps_enc_loop_bestprms->u4_cu_hdr_bits;
166 
167     i4_br_id = ps_ctxt->i4_bitrate_instance_num;
168     i4_enc_frm_id = ps_ctxt->i4_enc_frm_id;
169 
170     i4_8x8_blks_in_cu = ((ps_enc_out_ctxt->u1_cu_size >> 3) * (ps_enc_out_ctxt->u1_cu_size >> 3));
171 
172     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd +=
173         ps_enc_loop_bestprms
174             ->i8_cu_ssd;  // + (((float)(ps_ctxt->i8_cl_ssd_lambda_qf/ (1<< LAMBDA_Q_SHIFT))) * ps_enc_loop_bestprms->u4_cu_hdr_bits);
175 
176     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad +=
177         (UWORD32)(
178             ps_enc_loop_bestprms->u4_cu_open_intra_sad +
179             (((float)(ps_ctxt->i4_sad_lamda) / (1 << LAMBDA_Q_SHIFT)) *
180              ps_enc_loop_bestprms->u4_cu_hdr_bits));
181 
182     if(1 == ps_enc_loop_bestprms->u1_intra_flag)
183     {
184         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad_acc +=
185             ps_enc_loop_bestprms->u4_cu_sad;
186         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_intra_cost_acc +=
187             ps_enc_loop_bestprms->i8_best_rdopt_cost;
188     }
189     else
190     {
191         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_inter_sad_acc +=
192             ps_enc_loop_bestprms->u4_cu_sad;
193         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_inter_cost_acc +=
194             ps_enc_loop_bestprms->i8_best_rdopt_cost;
195     }
196     /*accumulating the frame level stats across frame*/
197     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc +=
198         ps_enc_loop_bestprms->u4_cu_sad;
199 
200     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_cost_acc +=
201         ps_enc_loop_bestprms->i8_best_rdopt_cost;
202 
203     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits +=
204         (u4_tex_bits + u4_hdr_bits);
205 
206     /*Total bits and header bits accumalted here for CTB*/
207     ps_ctxt->u4_total_cu_bits += (u4_tex_bits + u4_hdr_bits);
208     ps_ctxt->u4_total_cu_bits_mul_qs +=
209         ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale_ctb)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
210         QSCALE_Q_FAC_3;
211     ps_ctxt->u4_total_cu_hdr_bits += u4_hdr_bits;
212     ps_ctxt->u4_cu_tot_bits_into_qscale +=
213         ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
214         QSCALE_Q_FAC_3;
215     ps_ctxt->u4_cu_tot_bits += (u4_tex_bits + u4_hdr_bits);
216 
217     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits +=
218         u4_hdr_bits;
219 
220     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
221         ->i8_sad_by_qscale[ps_enc_loop_bestprms->u1_intra_flag] +=
222         ((((LWORD64)ps_enc_loop_bestprms->u4_cu_sad) << SAD_BY_QSCALE_Q) / i4_qscale);
223 
224     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
225         ->i4_qp_normalized_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] +=
226         (i4_8x8_blks_in_cu * i4_qscale);
227 
228     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
229         ->i4_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] += i4_8x8_blks_in_cu;
230 
231     /* PCM not supported */
232     ps_cu_final->b1_pcm_flag = 0;
233     ps_cu_final->b1_pred_mode_flag = ps_enc_loop_bestprms->u1_intra_flag;
234 
235     ps_cu_final->b1_skip_flag = ps_enc_loop_bestprms->u1_skip_flag;
236     ps_cu_final->b1_tq_bypass_flag = 0;
237     ps_cu_final->b3_part_mode = ps_enc_loop_bestprms->u1_part_mode;
238 
239     ps_cu_final->pv_coeff = pu1_ecd_data;
240 
241     ps_cu_final->i1_cu_qp = ps_enc_out_ctxt->i1_cu_qp;
242     if(ps_enc_loop_bestprms->u1_is_cu_coded)
243     {
244         ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_enc_out_ctxt->i1_cu_qp;
245     }
246     else
247     {
248         ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_pred_qp;
249     }
250     ps_cu_final->b1_first_cu_in_qg = ps_enc_out_ctxt->b1_first_cu_in_qg;
251 
252     /* Update the no residue flag. Needed for inter cu. */
253     /* Needed for deblocking inter/intra both           */
254     //if(ps_cu_final->b1_pred_mode_flag == PRED_MODE_INTER)
255     {
256         ps_cu_final->b1_no_residual_syntax_flag = !ps_enc_loop_bestprms->u1_is_cu_coded;
257     }
258 
259     /* store the number of TUs */
260     ps_cu_final->u2_num_tus_in_cu = ps_enc_loop_bestprms->u2_num_tus_in_cu;
261 
262     /* ---- copy the TUs to final structure ----- */
263     memcpy(
264         ps_cu_final->ps_enc_tu,
265         &ps_enc_loop_bestprms->as_tu_enc_loop[0],
266         ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(tu_enc_loop_out_t));
267 
268     /* ---- copy the PUs to final structure ----- */
269     memcpy(
270         ps_cu_final->ps_pu,
271         &ps_enc_loop_bestprms->as_pu_enc_loop[0],
272         ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_t));
273 
274     /* --- copy reminder and prev_flags ----- */
275     /* only required for intra */
276     if(PRED_MODE_INTRA == ps_cu_final->b1_pred_mode_flag)
277     {
278         memcpy(
279             &ps_cu_final->as_prev_rem[0],
280             &ps_enc_loop_bestprms->as_intra_prev_rem[0],
281             ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(intra_prev_rem_flags_t));
282 
283         ps_cu_final->b3_chroma_intra_pred_mode = ps_enc_loop_bestprms->u1_chroma_intra_pred_mode;
284     }
285 
286     /* --------------------------------------------------- */
287     /* ---- Boundary Strength Calculation at CU level ---- */
288     /* --------------------------------------------------- */
289     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
290     {
291         WORD32 num_4x4_in_ctb;
292         nbr_4x4_t *ps_left_nbr_4x4;
293         nbr_4x4_t *ps_top_nbr_4x4;
294         nbr_4x4_t *ps_curr_nbr_4x4;
295         WORD32 nbr_4x4_left_strd;
296 
297         num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
298 
299         ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
300         ps_curr_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
301         ps_curr_nbr_4x4 += ((ps_enc_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
302 
303         /* CU left */
304         if(0 == ps_enc_out_ctxt->b3_cu_pos_x)
305         {
306             ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
307             ps_left_nbr_4x4 += ps_enc_out_ctxt->b3_cu_pos_y << 1;
308             nbr_4x4_left_strd = 1;
309         }
310         else
311         {
312             /* inside CTB */
313             ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
314             nbr_4x4_left_strd = num_4x4_in_ctb;
315         }
316 
317         /* CU top */
318         if(0 == ps_enc_out_ctxt->b3_cu_pos_y)
319         {
320             /* CTB boundary */
321             ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
322             ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
323             ps_top_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
324         }
325         else
326         {
327             /* inside CTB */
328             ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
329         }
330 
331         ihevce_bs_compute_cu(
332             ps_cu_final,
333             ps_top_nbr_4x4,
334             ps_left_nbr_4x4,
335             ps_curr_nbr_4x4,
336             nbr_4x4_left_strd,
337             num_4x4_in_ctb,
338             &ps_ctxt->s_deblk_bs_prms);
339     }
340 }
341 
342 /**
343 *********************************************************************************
344 * Function name : ihevce_store_cu_results
345 *
346 * \brief
347 *    This function store cu result to cu info context
348 *
349 * \param[in] ps_ctxt : pointer to enc loop context structure
350 * \param[out] ps_cu_prms : pointer to  cu level parameters for SATD / RDOPT
351 *
352 * \return
353 *    None
354 *
355 **********************************************************************************/
ihevce_store_cu_results(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,final_mode_state_t * ps_final_state)356 void ihevce_store_cu_results(
357     ihevce_enc_loop_ctxt_t *ps_ctxt,
358     enc_loop_cu_prms_t *ps_cu_prms,
359     final_mode_state_t *ps_final_state)
360 {
361     ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
362     nbr_4x4_t *ps_nbr_4x4, *ps_tmp_nbr_4x4, *ps_curr_nbr_4x4;
363 
364     UWORD8 *pu1_recon, *pu1_final_recon;
365     WORD32 num_4x4_in_ctb, ctr;
366     WORD32 num_4x4_in_cu;
367     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
368     WORD32 cu_depth, log2_ctb_size, log2_cu_size;
369 
370     ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
371     (void)ps_final_state;
372 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
373     {
374         /* ---- copy the child luma recon back to curr. recon -------- */
375         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
376 
377         /* based on CU position derive the luma pointers */
378         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
379 
380         pu1_final_recon +=
381             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
382 
383         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
384             pu1_final_recon,
385             ps_cu_prms->i4_luma_recon_stride,
386             pu1_recon,
387             ps_enc_tmp_out_ctxt->u1_cu_size,
388             ps_enc_tmp_out_ctxt->u1_cu_size,
389             ps_enc_tmp_out_ctxt->u1_cu_size);
390 
391         /* ---- copy the child chroma recon back to curr. recon -------- */
392         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
393 
394         /* based on CU position derive the chroma pointers */
395         pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
396 
397         pu1_final_recon +=
398             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
399              ps_cu_prms->i4_chrm_recon_stride);
400 
401         /* Cb and Cr pixel interleaved */
402         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
403             pu1_final_recon,
404             ps_cu_prms->i4_chrm_recon_stride,
405             pu1_recon,
406             ps_enc_tmp_out_ctxt->u1_cu_size,
407             ps_enc_tmp_out_ctxt->u1_cu_size,
408             (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
409     }
410 #else
411     if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
412     {
413         /* ---- copy the child luma recon back to curr. recon -------- */
414         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
415 
416         /* based on CU position derive the luma pointers */
417         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
418 
419         pu1_final_recon +=
420             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
421 
422         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
423             pu1_final_recon,
424             ps_cu_prms->i4_luma_recon_stride,
425             pu1_recon,
426             ps_enc_tmp_out_ctxt->u1_cu_size,
427             ps_enc_tmp_out_ctxt->u1_cu_size,
428             ps_enc_tmp_out_ctxt->u1_cu_size);
429 
430         /* ---- copy the child chroma recon back to curr. recon -------- */
431         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
432 
433         /* based on CU position derive the chroma pointers */
434         pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
435 
436         pu1_final_recon +=
437             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
438              ps_cu_prms->i4_chrm_recon_stride);
439 
440         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
441             pu1_final_recon,
442             ps_cu_prms->i4_chrm_recon_stride,
443             pu1_recon,
444             ps_enc_tmp_out_ctxt->u1_cu_size,
445             ps_enc_tmp_out_ctxt->u1_cu_size,
446             (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
447     }
448 #endif
449     /*copy qp for qg*/
450     {
451         WORD32 i4_num_8x8, i4_x, i4_y;
452         WORD32 i4_cu_pos_x, i4_cu_pox_y;
453         i4_num_8x8 = ps_enc_tmp_out_ctxt->u1_cu_size >> 3;
454         i4_cu_pos_x = ps_enc_tmp_out_ctxt->b3_cu_pos_x;
455         i4_cu_pox_y = ps_enc_tmp_out_ctxt->b3_cu_pos_y;
456         for(i4_y = 0; i4_y < i4_num_8x8; i4_y++)
457         {
458             for(i4_x = 0; i4_x < i4_num_8x8; i4_x++)
459             {
460                 if(ps_enc_tmp_out_ctxt->ps_cu_prms->u1_is_cu_coded)
461                 {
462                     ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
463                         ps_ctxt->i4_cu_qp;
464                 }
465                 else
466                 {
467                     ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
468                         ps_ctxt->i4_pred_qp;
469                 }
470             }
471         }
472     }
473 
474     /* ------ copy the nbr 4x4 to final output ------ */
475     num_4x4_in_cu = ps_enc_tmp_out_ctxt->u1_cu_size >> 2;
476     num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
477 
478     ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
479     ps_curr_nbr_4x4 += (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 1);
480     ps_curr_nbr_4x4 += ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
481     ps_tmp_nbr_4x4 = ps_curr_nbr_4x4;
482 
483     ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
484 
485     GETRANGE(log2_ctb_size, ps_cu_prms->i4_ctb_size);
486     GETRANGE(log2_cu_size, ps_enc_tmp_out_ctxt->u1_cu_size);
487     cu_depth = log2_ctb_size - log2_cu_size;
488 
489     ASSERT(cu_depth <= 3);
490     ASSERT(cu_depth >= 0);
491 
492     /*assign qp for all 4x4 nbr blocks*/
493     for(ctr = 0; ctr < num_4x4_in_cu * num_4x4_in_cu; ctr++, ps_nbr_4x4++)
494     {
495         ps_nbr_4x4->b1_skip_flag = ps_enc_tmp_out_ctxt->s_cu_prms.u1_skip_flag;
496         ps_nbr_4x4->b2_cu_depth = cu_depth;
497         ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
498     }
499 
500     ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
501 
502     for(ctr = 0; ctr < num_4x4_in_cu; ctr++)
503     {
504         memcpy(ps_tmp_nbr_4x4, ps_nbr_4x4, num_4x4_in_cu * sizeof(nbr_4x4_t));
505 
506         ps_tmp_nbr_4x4 += num_4x4_in_ctb;
507         ps_nbr_4x4 += num_4x4_in_cu;
508     }
509 }
510 
511 /**
512 *********************************************************************************
513 * Function name : ihevce_populate_cu_struct
514 *
515 * \brief
516 *    This function populate cu struct
517 *
518 * \param[in] ps_ctxt : pointer to enc loop context structure
519 * \param[in] ps_cur_ipe_ctb : pointer to  IPE L0 analyze structure
520 * \param[in] ps_cu_tree_analyse : pointer to  Structure for CU recursion
521 * \param[in] ps_best_results : pointer to  strcuture  contain result for partition type of CU
522 * \param[in] ps_cu_out : pointer to  structre contain  mode analysis info
523 * \param[in] i4_32x32_id : noise estimation id
524 * \param[in] u1_num_best_results : num best result value
525 *
526 * \return
527 *    None
528 *
529 **********************************************************************************/
ihevce_populate_cu_struct(ihevce_enc_loop_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,cur_ctb_cu_tree_t * ps_cu_tree_analyse,part_type_results_t * ps_best_results,cu_analyse_t * ps_cu_out,WORD32 i4_32x32_id,UWORD8 u1_is_cu_noisy,UWORD8 u1_num_best_results)530 void ihevce_populate_cu_struct(
531     ihevce_enc_loop_ctxt_t *ps_ctxt,
532     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
533     cur_ctb_cu_tree_t *ps_cu_tree_analyse,
534     part_type_results_t *ps_best_results,
535     cu_analyse_t *ps_cu_out,
536     WORD32 i4_32x32_id,
537 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
538     UWORD8 u1_is_cu_noisy,
539 #endif
540     UWORD8 u1_num_best_results)
541 {
542     cu_inter_cand_t *ps_cu_candt;
543 
544     WORD32 j;
545     /* open loop intra cost by IPE */
546     WORD32 intra_cost_ol;
547     /* closed loop intra cost based on empirical coding noise estimate */
548     WORD32 intra_cost_cl_est = 0;
549     /* closed loop intra coding noise estimate */
550     WORD32 intra_noise_cl_est;
551     WORD32 num_results_to_copy = 0;
552 
553     WORD32 found_intra = 0;
554     WORD32 quality_preset = ps_ctxt->i4_quality_preset;
555     WORD32 frm_qp = ps_ctxt->i4_frame_qp;
556     WORD32 frm_qstep_multiplier = gau4_frame_qstep_multiplier[frm_qp - 1];
557     WORD32 frm_qstep = ps_ctxt->i4_frame_qstep;
558     UWORD8 u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
559     UWORD8 u1_x_off = ps_cu_tree_analyse->b3_cu_pos_x << 3;
560     UWORD8 u1_y_off = ps_cu_tree_analyse->b3_cu_pos_y << 3;
561     UWORD8 u1_threshold_multi;
562     switch(quality_preset)
563     {
564     case IHEVCE_QUALITY_P0:
565     case IHEVCE_QUALITY_P2:
566     {
567         num_results_to_copy =
568             MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ, u1_num_best_results);
569         break;
570     }
571     case IHEVCE_QUALITY_P3:
572     {
573         num_results_to_copy = MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_MS, u1_num_best_results);
574         break;
575     }
576     case IHEVCE_QUALITY_P4:
577     case IHEVCE_QUALITY_P5:
578     case IHEVCE_QUALITY_P6:
579     {
580         num_results_to_copy =
581             MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_HS_AND_XS, u1_num_best_results);
582         break;
583     }
584     }
585 
586     ps_cu_out->u1_num_inter_cands = 0;
587 
588     /***************************************************************/
589     /* Depending CU size that has won in ME,                       */
590     /*     Estimate the closed loop intra cost for enabling intra  */
591     /*     evaluation in rdopt stage based on preset               */
592     /***************************************************************/
593     switch(u1_cu_size)
594     {
595     case 64:
596     {
597         /* coding noise estimate for intra closed loop cost */
598         intra_cost_ol = ps_cur_ipe_ctb->i4_best64x64_intra_cost - frm_qstep * 256;
599 
600         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
601 
602         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 16;
603 
604         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
605         break;
606     }
607     case 32:
608     {
609         /* coding noise estimate for intra closed loop cost */
610         intra_cost_ol = ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id] - frm_qstep * 64;
611 
612         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
613 
614         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 4;
615 
616         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
617         break;
618     }
619     case 16:
620     {
621         /* coding noise estimate for intra closed loop cost */
622         intra_cost_ol =
623             ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_x_off >> 4) + ((u1_y_off >> 4) << 2)] -
624             frm_qstep * 16;
625 
626         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
627 
628         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16));
629 
630         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
631         break;
632     }
633     case 8:
634     {
635         /* coding noise estimate for intra closed loop cost */
636         intra_cost_ol =
637             ps_cur_ipe_ctb->ai4_best8x8_intra_cost[(u1_x_off >> 3) + u1_y_off] - frm_qstep * 4;
638 
639         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
640 
641         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) >> 2;
642 
643         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
644         break;
645     }
646     }
647 #if DISABLE_INTER_CANDIDATES
648     return;
649 #endif
650 
651     u1_threshold_multi = 1;
652 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
653     if(u1_is_cu_noisy)
654     {
655         intra_cost_cl_est = INT_MAX;
656     }
657 #endif
658 
659     ps_cu_candt = ps_cu_out->as_cu_inter_cand;
660 
661     /* Check if the first best candidate is inter or intra */
662     if(ps_best_results[0].as_pu_results[0].pu.b1_intra_flag)
663     {
664         ps_cu_out->u1_best_is_intra = 1;
665     }
666     else
667     {
668         ps_cu_out->u1_best_is_intra = 0;
669     }
670 
671     for(j = 0; j < u1_num_best_results; j++)
672     {
673         part_type_results_t *ps_best = &ps_best_results[j];
674 
675         if(ps_best->as_pu_results[0].pu.b1_intra_flag)
676         {
677             found_intra = 1;
678         }
679         else
680         {
681             /* populate the TU split flags, 4 flags copied as max cu can be 64 */
682             memcpy(ps_cu_candt->ai4_tu_split_flag, ps_best->ai4_tu_split_flag, 4 * sizeof(WORD32));
683 
684             /* populate the TU early CBF flags, 4 flags copied as max cu can be 64 */
685             memcpy(ps_cu_candt->ai4_tu_early_cbf, ps_best->ai4_tu_early_cbf, 4 * sizeof(WORD32));
686 
687             /* Note: the enums of part size and me part types shall match */
688             ps_cu_candt->b3_part_size = ps_best->u1_part_type;
689 
690             /* ME will always set the skip flag to 0            */
691             /* in closed loop skip will be added as a candidate */
692             ps_cu_candt->b1_skip_flag = 0;
693 
694             /* copy the inter pus : Note: assuming NxN part type is not supported */
695             ps_cu_candt->as_inter_pu[0] = ps_best->as_pu_results[0].pu;
696 
697             ps_cu_candt->as_inter_pu[0].b1_merge_flag = 0;
698 
699             /* Copy the total cost of the CU candt */
700             ps_cu_candt->i4_total_cost = ps_best->i4_tot_cost;
701 
702             ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][0] =
703                 ps_best->as_pu_results[0].i4_mv_cost;
704 
705 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
706             ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][0] =
707                 ps_best->as_pu_results[0].i4_tot_cost - ps_best->as_pu_results[0].i4_mv_cost;
708 #endif
709 
710             if(ps_best->u1_part_type)
711             {
712                 ps_cu_candt->as_inter_pu[1] = ps_best->as_pu_results[1].pu;
713                 ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][1] =
714                     ps_best->as_pu_results[1].i4_mv_cost;
715 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
716                 ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][1] =
717                     ps_best->as_pu_results[1].i4_tot_cost - ps_best->as_pu_results[1].i4_mv_cost;
718 #endif
719 
720                 ps_cu_candt->as_inter_pu[1].b1_merge_flag = 0;
721             }
722 
723             ps_cu_candt++;
724             ps_cu_out->u1_num_inter_cands++;
725             if(intra_cost_cl_est < ((ps_best->i4_tot_cost * u1_threshold_multi) >> 0))
726             {
727                 /* The rationale - */
728                 /* Artefacts were being observed in some sequences, */
729                 /* Brooklyn_1080p in particular - where it was readily */
730                 /* apparent. The cause was coding of CU's as inter CU's */
731                 /* when they actually needed to be coded as intra CU's. */
732                 /* This was observed during either fade-outs aor flashes. */
733                 /* After tinkering with the magnitude of the coding noise */
734                 /* factor that was added to the intra cost to see when the */
735                 /* artefacts in Brooklyn vanished, it was observed that the */
736                 /* factor multiplied with the frame_qstep followed a pattern. */
737                 /* When the pattern was subjected to a regression analysis, the */
738                 /* formula seen below emerged. Also note the fact that the coding */
739                 /* noise factor is the product of the frame_qstep and a constant */
740                 /* multiplier */
741 
742                 /*UWORD32 frm_qstep_multiplier =
743                 -3.346 * log((float)frm_qstep) + 15.925;*/
744                 found_intra = 1;
745             }
746 
747             if(ps_cu_out->u1_num_inter_cands >= num_results_to_copy)
748             {
749                 break;
750             }
751         }
752     }
753 
754     if(quality_preset < IHEVCE_QUALITY_P4)
755     {
756         found_intra = 1;
757     }
758 
759     if(!found_intra)
760     {
761         /* rdopt evaluation of intra disabled as inter is clear winner */
762         ps_cu_out->u1_num_intra_rdopt_cands = 0;
763 
764         /* all the modes invalidated */
765         ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
766         ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
767         ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
768         ps_cu_out->u1_chroma_intra_pred_mode = 255;
769 
770         /* no intra candt to verify */
771         ps_cu_out->s_cu_intra_cand.b6_num_intra_cands = 0;
772     }
773 }
774 
775 /**
776 *********************************************************************************
777 * Function name : ihevce_create_child_nodes_cu_tree
778 *
779 * \brief
780 *    This function create child node from cu tree
781 *
782 * \param[in] ps_cu_tree_root : pointer to Structure for CU recursion
783 * \param[out] ps_cu_tree_cur_node : pointer to  Structure for CU recursion
784 * \param[in] ai4_child_node_enable : child node enable flag
785 * \param[in] nodes_already_created : already created node value
786 * \return
787 *    None
788 *
789 **********************************************************************************/
ihevce_create_child_nodes_cu_tree(cur_ctb_cu_tree_t * ps_cu_tree_root,cur_ctb_cu_tree_t * ps_cu_tree_cur_node,WORD32 * ai4_child_node_enable,WORD32 nodes_already_created)790 WORD32 ihevce_create_child_nodes_cu_tree(
791     cur_ctb_cu_tree_t *ps_cu_tree_root,
792     cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
793     WORD32 *ai4_child_node_enable,
794     WORD32 nodes_already_created)
795 {
796     cur_ctb_cu_tree_t *ps_tl;
797     cur_ctb_cu_tree_t *ps_tr;
798     cur_ctb_cu_tree_t *ps_bl;
799     cur_ctb_cu_tree_t *ps_br;
800 
801     ps_tl = ps_cu_tree_root + nodes_already_created;
802     ps_tr = ps_tl + 1;
803     ps_bl = ps_tr + 1;
804     ps_br = ps_bl + 1;
805 
806     if(1 == ps_cu_tree_cur_node->is_node_valid)
807     {
808         ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL;
809         ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL;
810         ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL;
811         ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL;
812 
813         /* In incomplete CTB, if any of the child nodes are assigned to NULL */
814         /* then parent node ceases to be valid */
815         if((ps_tl == NULL) || (ps_tr == NULL) || (ps_br == NULL) || (ps_bl == NULL))
816         {
817             ps_cu_tree_cur_node->is_node_valid = 0;
818         }
819     }
820     ps_cu_tree_cur_node->ps_child_node_tl = ps_tl;
821     ps_cu_tree_cur_node->ps_child_node_tr = ps_tr;
822     ps_cu_tree_cur_node->ps_child_node_bl = ps_bl;
823     ps_cu_tree_cur_node->ps_child_node_br = ps_br;
824 
825     return 4;
826 }
827 
828 /**
829 *********************************************************************************
830 * Function name : ihevce_populate_cu_tree
831 *
832 * \brief
833 *    This function create child node from cu tree
834 *
835 * \param[in] ps_cur_ipe_ctb : pointer to Structure for CU recursion
836 * \param[out] ps_cu_tree : pointer to  Structure for CU recursion
837 * \param[in] tree_depth : child node enable flag
838 * \param[in] e_quality_preset : already created node value
839 * \param[in] e_grandparent_blk_pos : already created node value
840 * \param[in] e_parent_blk_pos : already created node value
841 * \param[in] e_cur_blk_pos : already created node value
842 *
843 * \return
844 *    None
845 *
846 **********************************************************************************/
ihevce_populate_cu_tree(ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,cur_ctb_cu_tree_t * ps_cu_tree,WORD32 tree_depth,IHEVCE_QUALITY_CONFIG_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)847 void ihevce_populate_cu_tree(
848     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
849     cur_ctb_cu_tree_t *ps_cu_tree,
850     WORD32 tree_depth,
851     IHEVCE_QUALITY_CONFIG_T e_quality_preset,
852     CU_POS_T e_grandparent_blk_pos,
853     CU_POS_T e_parent_blk_pos,
854     CU_POS_T e_cur_blk_pos)
855 {
856     WORD32 ai4_child_enable[4];
857     WORD32 children_nodes_required = 0;
858     WORD32 cu_pos_x = 0;
859     WORD32 cu_pos_y = 0;
860     WORD32 cu_size = 0;
861     WORD32 i;
862     WORD32 node_validity = 0;
863 
864     if(NULL == ps_cu_tree)
865     {
866         return;
867     }
868 
869     switch(tree_depth)
870     {
871     case 0:
872     {
873         /* 64x64 block */
874         intra32_analyse_t *ps_intra32_analyse = ps_cur_ipe_ctb->as_intra32_analyse;
875 
876         children_nodes_required = 1;
877         cu_size = 64;
878         cu_pos_x = 0;
879         cu_pos_y = 0;
880 
881         node_validity = !ps_cur_ipe_ctb->u1_split_flag;
882 
883         if(e_quality_preset >= IHEVCE_QUALITY_P2)
884         {
885             if(node_validity == 1)
886             {
887                 children_nodes_required = 0;
888             }
889         }
890 
891         for(i = 0; i < 4; i++)
892         {
893             ai4_child_enable[i] = ps_intra32_analyse[i].b1_valid_cu;
894         }
895 
896         break;
897     }
898     case 1:
899     {
900         /* 32x32 block */
901         WORD32 valid_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_valid_cu);
902 
903         intra16_analyse_t *ps_intra16_analyse =
904             ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].as_intra16_analyse;
905 
906         cu_size = 32;
907 
908         /* Explanation for logic below - */
909         /* * pos_x and pos_y are in units of 8x8 CU's */
910         /* * pos_x = 0 for TL and BL children */
911         /* * pos_x = 4 for TR and BR children */
912         /* * pos_y = 0 for TL and TR children */
913         /* * pos_y = 4 for BL and BR children */
914         cu_pos_x = (e_cur_blk_pos & 1) << 2;
915         cu_pos_y = (e_cur_blk_pos & 2) << 1;
916 
917         {
918             node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
919 
920             if(e_quality_preset >= IHEVCE_QUALITY_P2)
921             {
922                 node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
923             }
924 
925             node_validity = node_validity && valid_flag_32;
926             children_nodes_required = !node_validity || ps_cur_ipe_ctb->u1_split_flag;
927         }
928 
929         if(e_quality_preset >= IHEVCE_QUALITY_P2)
930         {
931             if(node_validity == 1)
932             {
933                 children_nodes_required = 0;
934             }
935             else
936             {
937                 children_nodes_required =
938                     (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
939             }
940         }
941 
942         for(i = 0; i < 4; i++)
943         {
944             ai4_child_enable[i] = ps_intra16_analyse[i].b1_valid_cu;
945         }
946 
947         break;
948     }
949     case 2:
950     {
951         /* 16x16 block */
952         WORD32 cu_pos_x_parent;
953         WORD32 cu_pos_y_parent;
954         WORD32 merge_flag_16;
955         WORD32 merge_flag_32;
956 
957         intra8_analyse_t *ps_intra8_analyse = ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
958                                                   .as_intra16_analyse[e_cur_blk_pos]
959                                                   .as_intra8_analyse;
960 
961         WORD32 valid_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
962                                     .as_intra16_analyse[e_cur_blk_pos]
963                                     .b1_valid_cu);
964 
965         cu_size = 16;
966 
967         /* Explanation for logic below - */
968         /* See similar explanation above */
969         cu_pos_x_parent = (e_parent_blk_pos & 1) << 2;
970         cu_pos_y_parent = (e_parent_blk_pos & 2) << 1;
971         cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1);
972         cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2);
973 
974         merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
975                              .as_intra16_analyse[e_cur_blk_pos]
976                              .b1_merge_flag);
977         merge_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos].b1_merge_flag);
978 
979 #if !ENABLE_UNIFORM_CU_SIZE_8x8
980         node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
981 #else
982         node_validity = 0;
983 #endif
984 
985         node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
986 
987         if(e_quality_preset >= IHEVCE_QUALITY_P2)
988         {
989             node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
990                                   .as_intra16_analyse[e_cur_blk_pos]
991                                   .b1_split_flag);
992         }
993 
994         node_validity = node_validity && valid_flag_16;
995 
996         children_nodes_required = ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32)) ||
997                                   !merge_flag_16;
998 
999         if(e_quality_preset >= IHEVCE_QUALITY_P2)
1000         {
1001             children_nodes_required = !node_validity;
1002         }
1003 
1004         for(i = 0; i < 4; i++)
1005         {
1006             ai4_child_enable[i] = ps_intra8_analyse[i].b1_valid_cu;
1007         }
1008         break;
1009     }
1010     case 3:
1011     {
1012         /* 8x8 block */
1013         WORD32 cu_pos_x_grandparent;
1014         WORD32 cu_pos_y_grandparent;
1015 
1016         WORD32 cu_pos_x_parent;
1017         WORD32 cu_pos_y_parent;
1018 
1019         WORD32 valid_flag_8 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
1020                                    .as_intra16_analyse[e_parent_blk_pos]
1021                                    .as_intra8_analyse[e_cur_blk_pos]
1022                                    .b1_valid_cu);
1023 
1024         cu_size = 8;
1025 
1026         cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2;
1027         cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1;
1028         cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1);
1029         cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2);
1030         cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1);
1031         cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1);
1032 
1033         node_validity = 1 && valid_flag_8;
1034 
1035         children_nodes_required = 0;
1036 
1037         break;
1038     }
1039     }
1040 
1041     /* Fill the current cu_tree node */
1042     ps_cu_tree->is_node_valid = node_validity;
1043     ps_cu_tree->u1_cu_size = cu_size;
1044     ps_cu_tree->b3_cu_pos_x = cu_pos_x;
1045     ps_cu_tree->b3_cu_pos_y = cu_pos_y;
1046 
1047     if(children_nodes_required)
1048     {
1049         tree_depth++;
1050 
1051         ps_cur_ipe_ctb->nodes_created_in_cu_tree += ihevce_create_child_nodes_cu_tree(
1052             ps_cur_ipe_ctb->ps_cu_tree_root,
1053             ps_cu_tree,
1054             ai4_child_enable,
1055             ps_cur_ipe_ctb->nodes_created_in_cu_tree);
1056 
1057         ihevce_populate_cu_tree(
1058             ps_cur_ipe_ctb,
1059             ps_cu_tree->ps_child_node_tl,
1060             tree_depth,
1061             e_quality_preset,
1062             e_parent_blk_pos,
1063             e_cur_blk_pos,
1064             POS_TL);
1065 
1066         ihevce_populate_cu_tree(
1067             ps_cur_ipe_ctb,
1068             ps_cu_tree->ps_child_node_tr,
1069             tree_depth,
1070             e_quality_preset,
1071             e_parent_blk_pos,
1072             e_cur_blk_pos,
1073             POS_TR);
1074 
1075         ihevce_populate_cu_tree(
1076             ps_cur_ipe_ctb,
1077             ps_cu_tree->ps_child_node_bl,
1078             tree_depth,
1079             e_quality_preset,
1080             e_parent_blk_pos,
1081             e_cur_blk_pos,
1082             POS_BL);
1083 
1084         ihevce_populate_cu_tree(
1085             ps_cur_ipe_ctb,
1086             ps_cu_tree->ps_child_node_br,
1087             tree_depth,
1088             e_quality_preset,
1089             e_parent_blk_pos,
1090             e_cur_blk_pos,
1091             POS_BR);
1092     }
1093     else
1094     {
1095         ps_cu_tree->ps_child_node_tl = NULL;
1096         ps_cu_tree->ps_child_node_tr = NULL;
1097         ps_cu_tree->ps_child_node_bl = NULL;
1098         ps_cu_tree->ps_child_node_br = NULL;
1099     }
1100 }
1101 
1102 /**
1103 *********************************************************************************
1104 * Function name : ihevce_intra_mode_populator
1105 *
1106 * \brief
1107 *    This function populate intra mode info to strcut
1108 *
1109 * \param[in] ps_cu_intra_cand : pointer to Structure contain cu intra candidate info
1110 * \param[out] ps_ipe_data : pointer to  IPE L0 analyze structure
1111 * \param[in] ps_cu_tree_data : poniter to cu recursive struct
1112 * \param[in] i1_slice_type : contain slice type value
1113 * \param[in] i4_quality_preset : contain quality preset value
1114 *
1115 * \return
1116 *    None
1117 *
1118 **********************************************************************************/
ihevce_intra_mode_populator(cu_intra_cand_t * ps_cu_intra_cand,ipe_l0_ctb_analyse_for_me_t * ps_ipe_data,cur_ctb_cu_tree_t * ps_cu_tree_data,WORD8 i1_slice_type,WORD32 i4_quality_preset)1119 static void ihevce_intra_mode_populator(
1120     cu_intra_cand_t *ps_cu_intra_cand,
1121     ipe_l0_ctb_analyse_for_me_t *ps_ipe_data,
1122     cur_ctb_cu_tree_t *ps_cu_tree_data,
1123     WORD8 i1_slice_type,
1124     WORD32 i4_quality_preset)
1125 {
1126     WORD32 i4_32x32_id, i4_16x16_id, i4_8x8_id;
1127 
1128     UWORD8 u1_cu_pos_x = ps_cu_tree_data->b3_cu_pos_x;
1129     UWORD8 u1_cu_pos_y = ps_cu_tree_data->b3_cu_pos_y;
1130 
1131     i4_32x32_id = ((u1_cu_pos_x & 4) >> 2) + ((u1_cu_pos_y & 4) >> 1);
1132 
1133     i4_16x16_id = ((u1_cu_pos_x & 2) >> 1) + ((u1_cu_pos_y & 2));
1134 
1135     i4_8x8_id = (u1_cu_pos_x & 1) + ((u1_cu_pos_y & 1) << 1);
1136 
1137     if(i4_quality_preset < IHEVCE_QUALITY_P3)
1138     {
1139         switch(ps_cu_tree_data->u1_cu_size)
1140         {
1141         case 64:
1142         {
1143             memcpy(
1144                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1145                 ps_ipe_data->au1_best_modes_32x32_tu,
1146                 MAX_INTRA_CU_CANDIDATES + 1);
1147 
1148             break;
1149         }
1150         case 32:
1151         {
1152             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1153 
1154             memcpy(
1155                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1156                 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1157                 MAX_INTRA_CU_CANDIDATES + 1);
1158 
1159             if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1160             {
1161                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1162             }
1163             else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1164             {
1165                 if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
1166                    (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
1167                 {
1168                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1169                 }
1170                 else
1171                 {
1172                     memcpy(
1173                         ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1174                         ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1175                         MAX_INTRA_CU_CANDIDATES + 1);
1176                 }
1177             }
1178             else
1179             {
1180                 memcpy(
1181                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1182                     ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1183                     MAX_INTRA_CU_CANDIDATES + 1);
1184             }
1185 
1186             break;
1187         }
1188         case 16:
1189         {
1190             /* Copy best 16x16 CU modes */
1191             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1192 
1193             intra16_analyse_t *ps_16x16_ipe_analyze =
1194                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1195 
1196             memcpy(
1197                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1198                 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1199                 MAX_INTRA_CU_CANDIDATES + 1);
1200 
1201             if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1202             {
1203                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1204             }
1205             else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1206             {
1207                 if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
1208                    (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
1209                 {
1210                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1211                 }
1212                 else
1213                 {
1214                     memcpy(
1215                         ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1216                         ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1217                         MAX_INTRA_CU_CANDIDATES + 1);
1218                 }
1219             }
1220             else
1221             {
1222                 memcpy(
1223                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1224                     ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1225                     MAX_INTRA_CU_CANDIDATES + 1);
1226             }
1227 
1228             break;
1229         }
1230         case 8:
1231         {
1232             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1233 
1234             intra16_analyse_t *ps_16x16_ipe_analyze =
1235                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1236 
1237             intra8_analyse_t *ps_8x8_ipe_analyze =
1238                 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1239 
1240             memcpy(
1241                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1242                 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1243                 MAX_INTRA_CU_CANDIDATES + 1);
1244 
1245             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1246 
1247             /* Initialise the hash */
1248             {
1249                 WORD32 i, j;
1250 
1251                 for(i = 0; i < NUM_PU_PARTS; i++)
1252                 {
1253                     ps_cu_intra_cand->au1_num_modes_added[i] = 0;
1254 
1255                     for(j = 0; j < MAX_INTRA_CANDIDATES; j++)
1256                     {
1257                         ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i][j] = 0;
1258                     }
1259                 }
1260 
1261                 for(i = 0; i < NUM_PU_PARTS; i++)
1262                 {
1263                     for(j = 0; j < MAX_INTRA_CU_CANDIDATES; j++)
1264                     {
1265                         if(ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j] == 255)
1266                         {
1267                             ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] = 255;
1268                             break;
1269                         }
1270 
1271                         ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] =
1272                             ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j];
1273 
1274                         ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash
1275                             [i][ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j]] = 1;
1276 
1277                         ps_cu_intra_cand->au1_num_modes_added[i]++;
1278                     }
1279 
1280                     if(ps_cu_intra_cand->au1_num_modes_added[i] == MAX_INTRA_CU_CANDIDATES)
1281                     {
1282                         if(i1_slice_type != BSLICE)
1283                         {
1284                             ps_cu_intra_cand->au1_num_modes_added[i] =
1285                                 ihevce_intra_mode_nxn_hash_updater(
1286                                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1287                                     ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i],
1288                                     ps_cu_intra_cand->au1_num_modes_added[i]);
1289                         }
1290                     }
1291                 }
1292             }
1293 
1294             break;
1295         }
1296         }
1297     }
1298     else if(i4_quality_preset == IHEVCE_QUALITY_P6)
1299     {
1300         switch(ps_cu_tree_data->u1_cu_size)
1301         {
1302         case 64:
1303         {
1304             memcpy(
1305                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1306                 ps_ipe_data->au1_best_modes_32x32_tu,
1307                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1308 
1309             ps_cu_intra_cand->b1_eval_tx_cusize = 0;
1310             ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
1311             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1312 
1313 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1314             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1315                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1316 #endif
1317 
1318             break;
1319         }
1320         case 32:
1321         {
1322             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1323 
1324             memcpy(
1325                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1326                 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1327                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1328 
1329             memcpy(
1330                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1331                 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1332                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1333 
1334 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1335             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1336                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1337             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1338                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1339 #endif
1340 
1341             break;
1342         }
1343         case 16:
1344         {
1345             /* Copy best 16x16 CU modes */
1346             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1347 
1348             intra16_analyse_t *ps_16x16_ipe_analyze =
1349                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1350 
1351             memcpy(
1352                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1353                 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1354                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1355 
1356             memcpy(
1357                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1358                 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1359                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1360 
1361 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1362             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1363                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1364             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1365                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1366 #endif
1367 
1368             break;
1369         }
1370         case 8:
1371         {
1372             WORD32 i;
1373 
1374             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1375 
1376             intra16_analyse_t *ps_16x16_ipe_analyze =
1377                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1378 
1379             intra8_analyse_t *ps_8x8_ipe_analyze =
1380                 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1381 
1382             memcpy(
1383                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1384                 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1385                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1386 
1387 #if !ENABLE_INTRA_MODE_FILTERING_IN_XS25
1388             memcpy(
1389                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1390                 ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1391                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1392 
1393             for(i = 0; i < 4; i++)
1394             {
1395                 memcpy(
1396                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1397                     ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1398                     (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1399 
1400                 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
1401             }
1402 #else
1403             if(255 == ps_8x8_ipe_analyze->au1_4x4_best_modes[0][0])
1404             {
1405                 memcpy(
1406                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1407                     ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1408                     (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1409 
1410                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1411                     [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1412             }
1413             else
1414             {
1415                 for(i = 0; i < 4; i++)
1416                 {
1417                     memcpy(
1418                         ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1419                         ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1420                         (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1421 
1422                     ps_cu_intra_cand->au1_intra_luma_modes_nxn
1423                         [i][MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1424                 }
1425             }
1426 
1427             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1428                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1429 #endif
1430 
1431 #if FORCE_NXN_MODE_BASED_ON_OL_IPE
1432             if((i4_quality_preset == IHEVCE_QUALITY_P6) && (i1_slice_type != ISLICE))
1433             {
1434                 /*Evaluate nxn mode for 8x8 if ol ipe wins for nxn over cu=tu and cu=4tu.*/
1435                 /*Disbale CU=TU and CU=4TU modes */
1436                 if(ps_8x8_ipe_analyze->b1_enable_nxn == 1)
1437                 {
1438                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1439                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1440                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[0][1] = 255;
1441                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[1][1] = 255;
1442                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[2][1] = 255;
1443                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[3][1] = 255;
1444                 }
1445             }
1446 #endif
1447 
1448             break;
1449         }
1450         }
1451     }
1452     else
1453     {
1454         switch(ps_cu_tree_data->u1_cu_size)
1455         {
1456         case 64:
1457         {
1458             memcpy(
1459                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1460                 ps_ipe_data->au1_best_modes_32x32_tu,
1461                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1462 
1463             ps_cu_intra_cand->b1_eval_tx_cusize = 0;
1464             ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
1465             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1466 
1467             break;
1468         }
1469         case 32:
1470         {
1471             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1472 
1473             memcpy(
1474                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1475                 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1476                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1477 
1478             memcpy(
1479                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1480                 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1481                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1482 
1483             break;
1484         }
1485         case 16:
1486         {
1487             /* Copy best 16x16 CU modes */
1488             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1489 
1490             intra16_analyse_t *ps_16x16_ipe_analyze =
1491                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1492 
1493             memcpy(
1494                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1495                 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1496                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1497 
1498             memcpy(
1499                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1500                 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1501                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1502 
1503             break;
1504         }
1505         case 8:
1506         {
1507             WORD32 i;
1508 
1509             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1510 
1511             intra16_analyse_t *ps_16x16_ipe_analyze =
1512                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1513 
1514             intra8_analyse_t *ps_8x8_ipe_analyze =
1515                 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1516 
1517             memcpy(
1518                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1519                 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1520                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1521 
1522             memcpy(
1523                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1524                 ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1525                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1526 
1527             for(i = 0; i < 4; i++)
1528             {
1529                 memcpy(
1530                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1531                     ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1532                     (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1533 
1534                 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
1535             }
1536 
1537             break;
1538         }
1539         }
1540     }
1541 }
1542 /**
1543 ******************************************************************************
1544 * \if Function name : ihevce_compute_rdo \endif
1545 *
1546 * \brief
1547 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
1548 *
1549 * \param[in] pv_ctxt : pointer to enc_loop module
1550 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
1551 * \param[in] ps_cu_analyse : pointer to cu analyse
1552 * \param[out] ps_cu_final : pointer to cu final
1553 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
1554 * \param[out]ps_row_col_pu; colocated pu buffer pointer
1555 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
1556 * \param[in]col_start_pu_idx : pu index start value
1557 *
1558 * \return
1559 *    None
1560 *
1561 *
1562 * \author
1563 *  Ittiam
1564 *
1565 *****************************************************************************
1566 */
ihevce_compute_rdo(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cur_ctb_cu_tree_t * ps_cu_tree_analyse,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,me_ctb_data_t * ps_cu_me_data,pu_col_mv_t * ps_col_pu,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_col_pu_map,UWORD8 * pu1_ecd_data,WORD32 col_start_pu_idx,WORD32 i4_ctb_x_off,WORD32 i4_ctb_y_off)1567 LWORD64 ihevce_compute_rdo(
1568     ihevce_enc_loop_ctxt_t *ps_ctxt,
1569     enc_loop_cu_prms_t *ps_cu_prms,
1570     cur_ctb_cu_tree_t *ps_cu_tree_analyse,
1571     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
1572     me_ctb_data_t *ps_cu_me_data,
1573     pu_col_mv_t *ps_col_pu,
1574     final_mode_state_t *ps_final_mode_state,
1575     UWORD8 *pu1_col_pu_map,
1576     UWORD8 *pu1_ecd_data,
1577     WORD32 col_start_pu_idx,
1578     WORD32 i4_ctb_x_off,
1579     WORD32 i4_ctb_y_off)
1580 {
1581     /* Populate the rdo candiates to the structure */
1582     cu_analyse_t s_cu_analyse;
1583     LWORD64 rdopt_best_cost;
1584     /* Populate candidates of child nodes to CU analyse struct for further evaluation */
1585     cu_analyse_t *ps_cu_analyse;
1586     WORD32 curr_cu_pos_in_row;
1587     WORD32 cu_top_right_offset, cu_top_right_dep_pos;
1588     WORD32 is_first_cu_in_ctb, is_ctb_level_quant_rounding, is_nctb_level_quant_rounding;
1589 
1590     WORD32 cu_pos_x = ps_cu_tree_analyse->b3_cu_pos_x;
1591     WORD32 cu_pos_y = ps_cu_tree_analyse->b3_cu_pos_y;
1592 
1593     /*Derive the indices of 32*32, 16*16 and 8*8 blocks*/
1594     WORD32 i4_32x32_id = ((cu_pos_x & 4) >> 2) + ((cu_pos_y & 4) >> 1);
1595 
1596     WORD32 i4_16x16_id = ((cu_pos_x & 2) >> 1) + ((cu_pos_y & 2));
1597 
1598     WORD32 i4_8x8_id = (cu_pos_x & 1) + ((cu_pos_y & 1) << 1);
1599     if(i4_ctb_y_off == 0)
1600     {
1601         /* No wait for 1st row */
1602         cu_top_right_offset = -(MAX_CTB_SIZE);
1603         {
1604             ihevce_tile_params_t *ps_col_tile_params =
1605                 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + ps_ctxt->i4_tile_col_idx);
1606 
1607             cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1608         }
1609 
1610         cu_top_right_dep_pos = 0;
1611     }
1612     else
1613     {
1614         cu_top_right_offset = ps_cu_tree_analyse->u1_cu_size << 1;
1615         cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1616     }
1617     ps_cu_analyse = &s_cu_analyse;
1618 
1619     ps_cu_analyse->b3_cu_pos_x = cu_pos_x;
1620     ps_cu_analyse->b3_cu_pos_y = cu_pos_y;
1621     ps_cu_analyse->u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
1622 
1623     /* Default initializations */
1624     ps_cu_analyse->u1_num_intra_rdopt_cands = MAX_INTRA_CU_CANDIDATES;
1625     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1626     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1627     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1628 
1629     ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize = 1;
1630     ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
1631 
1632     switch(ps_cu_tree_analyse->u1_cu_size)
1633     {
1634     case 64:
1635     {
1636         memcpy(
1637             ps_cu_analyse[0].i4_act_factor,
1638             ps_cur_ipe_ctb->i4_64x64_act_factor,
1639             4 * 2 * sizeof(WORD32));
1640 
1641         ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize = 0;
1642         ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
1643         ps_cu_analyse[0].s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1644 
1645         break;
1646     }
1647     case 32:
1648     {
1649         memcpy(
1650             ps_cu_analyse[0].i4_act_factor,
1651             ps_cur_ipe_ctb->i4_32x32_act_factor[i4_32x32_id],
1652             3 * 2 * sizeof(WORD32));
1653 
1654         break;
1655     }
1656     case 16:
1657     {
1658         memcpy(
1659             ps_cu_analyse[0].i4_act_factor,
1660             ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
1661             2 * 2 * sizeof(WORD32));
1662 
1663         break;
1664     }
1665     case 8:
1666     {
1667         memcpy(
1668             ps_cu_analyse[0].i4_act_factor,
1669             ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
1670             2 * 2 * sizeof(WORD32));
1671 
1672         break;
1673     }
1674     }
1675 
1676     /* Populate the me data in cu_analyse struct */
1677     /* For CU size 32 and 64, add me data to array of cu analyse struct */
1678     if(ISLICE != ps_ctxt->i1_slice_type)
1679     {
1680         if((ps_cu_tree_analyse->u1_cu_size >= 32) && (ps_cu_tree_analyse->u1_inter_eval_enable))
1681         {
1682             if(32 == ps_cu_tree_analyse->u1_cu_size)
1683             {
1684                 ihevce_populate_cu_struct(
1685                     ps_ctxt,
1686                     ps_cur_ipe_ctb,
1687                     ps_cu_tree_analyse,
1688                     ps_cu_me_data->as_32x32_block_data[i4_32x32_id].as_best_results,
1689                     ps_cu_analyse,
1690                     i4_32x32_id,
1691 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1692                     ps_cu_prms->u1_is_cu_noisy,
1693 #endif
1694                     ps_cu_me_data->as_32x32_block_data[i4_32x32_id].num_best_results);
1695             }
1696             else
1697             {
1698                 ihevce_populate_cu_struct(
1699                     ps_ctxt,
1700                     ps_cur_ipe_ctb,
1701                     ps_cu_tree_analyse,
1702                     ps_cu_me_data->s_64x64_block_data.as_best_results,
1703                     ps_cu_analyse,
1704                     i4_32x32_id,
1705 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1706                     ps_cu_prms->u1_is_cu_noisy,
1707 #endif
1708                     ps_cu_me_data->s_64x64_block_data.num_best_results);
1709             }
1710         }
1711         else if(ps_cu_tree_analyse->u1_cu_size < 32)
1712         {
1713             i4_8x8_id += (i4_32x32_id << 4) + (i4_16x16_id << 2);
1714             i4_16x16_id += (i4_32x32_id << 2);
1715 
1716             if(16 == ps_cu_tree_analyse->u1_cu_size)
1717             {
1718                 block_data_16x16_t *ps_data = &ps_cu_me_data->as_block_data[i4_16x16_id];
1719 
1720                 if(ps_cu_tree_analyse->u1_inter_eval_enable)
1721                 {
1722                     ihevce_populate_cu_struct(
1723                         ps_ctxt,
1724                         ps_cur_ipe_ctb,
1725                         ps_cu_tree_analyse,
1726                         ps_data->as_best_results,
1727                         ps_cu_analyse,
1728                         i4_32x32_id,
1729 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1730                         ps_cu_prms->u1_is_cu_noisy,
1731 #endif
1732                         ps_data->num_best_results);
1733                 }
1734                 else
1735                 {
1736                     ps_cu_analyse->u1_num_inter_cands = 0;
1737                     ps_cu_analyse->u1_best_is_intra = 1;
1738                 }
1739             }
1740             else /* If CU size is 8 */
1741             {
1742                 block_data_8x8_t *ps_data = &ps_cu_me_data->as_8x8_block_data[i4_8x8_id];
1743 
1744                 if(ps_cu_tree_analyse->u1_inter_eval_enable)
1745                 {
1746                     ihevce_populate_cu_struct(
1747                         ps_ctxt,
1748                         ps_cur_ipe_ctb,
1749                         ps_cu_tree_analyse,
1750                         ps_data->as_best_results,
1751                         ps_cu_analyse,
1752                         i4_32x32_id,
1753 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1754                         ps_cu_prms->u1_is_cu_noisy,
1755 #endif
1756                         ps_data->num_best_results);
1757                 }
1758                 else
1759                 {
1760                     ps_cu_analyse->u1_num_inter_cands = 0;
1761                     ps_cu_analyse->u1_best_is_intra = 1;
1762                 }
1763             }
1764         }
1765         else
1766         {
1767             ps_cu_analyse->u1_num_inter_cands = 0;
1768             ps_cu_analyse->u1_best_is_intra = 1;
1769         }
1770     }
1771     else
1772     {
1773         ps_cu_analyse->u1_num_inter_cands = 0;
1774         ps_cu_analyse->u1_best_is_intra = 1;
1775     }
1776 
1777     if(!ps_ctxt->i1_cu_qp_delta_enable)
1778     {
1779         ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_frame_qp;
1780 
1781         /*cu qp must be populated in cu_analyse_t struct*/
1782         ps_ctxt->i4_cu_qp = ps_cu_analyse->i1_cu_qp;
1783     }
1784     else
1785     {
1786         ASSERT(ps_cu_analyse->i4_act_factor[0] > 0);
1787         ASSERT(
1788             ((ps_cu_analyse->i4_act_factor[1] > 0) && (ps_cu_analyse->u1_cu_size != 8)) ||
1789             ((ps_cu_analyse->u1_cu_size == 8)));
1790         ASSERT(
1791             ((ps_cu_analyse->i4_act_factor[2] > 0) && (ps_cu_analyse->u1_cu_size == 32)) ||
1792             ((ps_cu_analyse->u1_cu_size != 32)));
1793     }
1794 
1795     if(ps_ctxt->u1_disable_intra_eval)
1796     {
1797         /* rdopt evaluation of intra disabled as inter is clear winner */
1798         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1799 
1800         /* all the modes invalidated */
1801         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1802         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1803         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1804         ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
1805 
1806         /* no intra candt to verify */
1807         ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
1808     }
1809 
1810 #if DISABLE_L2_IPE_IN_PB_L1_IN_B
1811     if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_cu_analyse->u1_cu_size == 32) &&
1812        (ps_ctxt->i1_slice_type != ISLICE))
1813     {
1814         /* rdopt evaluation of intra disabled as inter is clear winner */
1815         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1816 
1817         /* all the modes invalidated */
1818         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1819         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1820         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1821         ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
1822 
1823         /* no intra candt to verify */
1824         ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
1825     }
1826 #endif
1827 
1828     if(DISABLE_INTRA_WHEN_NOISY && ps_cu_prms->u1_is_cu_noisy)
1829     {
1830         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1831     }
1832 
1833     if(ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_tree_analyse->u1_intra_eval_enable)
1834     {
1835         ihevce_intra_mode_populator(
1836             &ps_cu_analyse->s_cu_intra_cand,
1837             ps_cur_ipe_ctb,
1838             ps_cu_tree_analyse,
1839             ps_ctxt->i1_slice_type,
1840             ps_ctxt->i4_quality_preset);
1841 
1842         ps_cu_analyse->u1_num_intra_rdopt_cands = 1;
1843     }
1844 
1845     ASSERT(!!ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_analyse->u1_num_inter_cands);
1846 
1847     if(ps_ctxt->u1_use_top_at_ctb_boundary)
1848     {
1849         /* Wait till top data is ready          */
1850         /* Currently checking till top right CU */
1851         curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1852 
1853         if(0 == ps_cu_analyse->b3_cu_pos_y)
1854         {
1855             ihevce_dmgr_chk_row_row_sync(
1856                 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1857                 curr_cu_pos_in_row,
1858                 cu_top_right_offset,
1859                 cu_top_right_dep_pos,
1860                 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1861                 ps_ctxt->thrd_id);
1862         }
1863     }
1864 
1865 #if !DISABLE_TOP_SYNC
1866     {
1867         if(0 == ps_cu_analyse->b3_cu_pos_y)
1868         {
1869             if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
1870             {
1871                 if(ps_cu_analyse->b3_cu_pos_x == 0)
1872                 {
1873                     if(!ps_ctxt->u1_use_top_at_ctb_boundary)
1874                     {
1875                         /* Wait till top data is ready          */
1876                         /* Currently checking till top right CU */
1877                         curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1878 
1879                         if(0 == ps_cu_analyse->b3_cu_pos_y)
1880                         {
1881                             ihevce_dmgr_chk_row_row_sync(
1882                                 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1883                                 curr_cu_pos_in_row,
1884                                 cu_top_right_offset,
1885                                 cu_top_right_dep_pos,
1886                                 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1887                                 ps_ctxt->thrd_id);
1888                         }
1889                     }
1890 
1891                     ihevce_entropy_rdo_copy_states(
1892                         &ps_ctxt->s_rdopt_entropy_ctxt,
1893                         ps_ctxt->pu1_top_rt_cabac_state,
1894                         UPDATE_ENT_SYNC_RDO_STATE);
1895                 }
1896             }
1897         }
1898     }
1899 #else
1900     {
1901         if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset))
1902         {
1903             if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
1904             {
1905                 if(ps_cu_analyse->b3_cu_pos_x == 0)
1906                 {
1907                     if(!ps_ctxt->u1_use_top_at_ctb_boundary)
1908                     {
1909                         /* Wait till top data is ready          */
1910                         /* Currently checking till top right CU */
1911                         curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1912 
1913                         if(0 == ps_cu_analyse->b3_cu_pos_y)
1914                         {
1915                             ihevce_dmgr_chk_row_row_sync(
1916                                 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1917                                 curr_cu_pos_in_row,
1918                                 cu_top_right_offset,
1919                                 cu_top_right_dep_pos,
1920                                 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1921                                 ps_ctxt->thrd_id);
1922                         }
1923                     }
1924 
1925                     ihevce_entropy_rdo_copy_states(
1926                         &ps_ctxt->s_rdopt_entropy_ctxt,
1927                         ps_ctxt->pu1_top_rt_cabac_state,
1928                         UPDATE_ENT_SYNC_RDO_STATE);
1929                 }
1930             }
1931         }
1932         else if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
1933         {
1934             UWORD8 u1_cabac_init_idc;
1935             WORD8 i1_cabac_init_flag =
1936                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt->ps_slice_hdr->i1_cabac_init_flag;
1937 
1938             if(ps_ctxt->i1_slice_type == ISLICE)
1939             {
1940                 u1_cabac_init_idc = 0;
1941             }
1942             else if(ps_ctxt->i1_slice_type == PSLICE)
1943             {
1944                 u1_cabac_init_idc = i1_cabac_init_flag ? 2 : 1;
1945             }
1946             else
1947             {
1948                 u1_cabac_init_idc = i1_cabac_init_flag ? 1 : 2;
1949             }
1950 
1951             ihevce_entropy_rdo_copy_states(
1952                 &ps_ctxt->s_rdopt_entropy_ctxt,
1953                 (UWORD8 *)gau1_ihevc_cab_ctxts[u1_cabac_init_idc][ps_ctxt->i4_frame_qp],
1954                 UPDATE_ENT_SYNC_RDO_STATE);
1955         }
1956     }
1957 #endif
1958 
1959     /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
1960     /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
1961     /* Currently the complete array will contain only single value*/
1962     /*The rounding factor is calculated with the formula
1963     Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
1964     rounding factor = (1 - DeadZone Val)
1965 
1966     Assumption: Cabac states of All the sub-blocks in the TU are considered independent
1967     */
1968 
1969     /*As long as coef level rdoq is enabled perform this operation */
1970     is_first_cu_in_ctb = ((0 == ps_cu_analyse->b3_cu_pos_x) && (0 == ps_cu_analyse->b3_cu_pos_y));
1971     is_ctb_level_quant_rounding =
1972         ((ps_ctxt->i4_quant_rounding_level == CTB_LEVEL_QUANT_ROUNDING) &&
1973          (1 == is_first_cu_in_ctb));
1974     is_nctb_level_quant_rounding =
1975         ((ps_ctxt->i4_quant_rounding_level == NCTB_LEVEL_QUANT_ROUNDING) &&
1976          (1 == is_first_cu_in_ctb) && (((i4_ctb_x_off >> 6) % NUM_CTB_QUANT_ROUNDING) == 0));
1977 
1978     if((ps_ctxt->i4_quant_rounding_level == CU_LEVEL_QUANT_ROUNDING) ||
1979        (ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) ||
1980        (1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
1981     {
1982         double i4_lamda_modifier, i4_lamda_modifier_uv;
1983         WORD32 trans_size, trans_size_cr;
1984         trans_size = ps_cu_analyse->u1_cu_size;
1985 
1986         if((1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
1987         {
1988             trans_size = MAX_TU_SIZE;
1989         }
1990         else
1991         {
1992             if(ps_cu_analyse->u1_cu_size == 64)
1993             {
1994                 trans_size >>= 1;
1995             }
1996         }
1997 
1998         /*Chroma trans size = half of luma trans size */
1999         trans_size_cr = trans_size >> 1;
2000 
2001         if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
2002         {
2003             i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
2004                                 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
2005             i4_lamda_modifier_uv =
2006                 ps_ctxt->i4_uv_lamda_modifier *
2007                 CLIP3((((double)(ps_ctxt->i4_chrm_cu_qp - 12)) / 6.0), 2.00, 4.00);
2008         }
2009         else
2010         {
2011             i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
2012             i4_lamda_modifier_uv = ps_ctxt->i4_uv_lamda_modifier;
2013         }
2014         if(ps_ctxt->i4_use_const_lamda_modifier)
2015         {
2016             if(ISLICE == ps_ctxt->i1_slice_type)
2017             {
2018                 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
2019                 i4_lamda_modifier_uv = ps_ctxt->f_i_pic_lamda_modifier;
2020             }
2021             else
2022             {
2023                 i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
2024                 i4_lamda_modifier_uv = CONST_LAMDA_MOD_VAL;
2025             }
2026         }
2027 
2028         do
2029         {
2030             memset(
2031                 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
2032                 0,
2033                 trans_size * trans_size * sizeof(WORD32));
2034             memset(
2035                 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
2036                 0,
2037                 trans_size * trans_size * sizeof(WORD32));
2038 
2039             /*ps_ctxt->i4_quant_rnd_factor[intra_flag], is currently not used */
2040             ihevce_quant_rounding_factor_gen(
2041                 trans_size,
2042                 1,  //is_luma = 1
2043                 &ps_ctxt->s_rdopt_entropy_ctxt,
2044                 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
2045                 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
2046                 i4_lamda_modifier,
2047                 0);  //is_tu_level_quant rounding = 0
2048 
2049             trans_size = trans_size >> 1;
2050 
2051         } while(trans_size >= 4);
2052 
2053         /*CHROMA Quant Rounding is to be enabled with CU/TU/CTB/NCTB Luma rounding */
2054         /*Please note chroma is calcualted only for 1st TU at TU level Rounding */
2055         if(ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING)
2056         {
2057             do
2058             {
2059                 memset(
2060                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
2061                     0,
2062                     trans_size_cr * trans_size_cr * sizeof(WORD32));
2063                 memset(
2064                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
2065                     0,
2066                     trans_size_cr * trans_size_cr * sizeof(WORD32));
2067 
2068                 ihevce_quant_rounding_factor_gen(
2069                     trans_size_cr,
2070                     0,  //is_luma = 0
2071                     &ps_ctxt->s_rdopt_entropy_ctxt,
2072                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
2073                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
2074                     i4_lamda_modifier_uv,
2075                     0);  //is_tu_level_quant rounding = 0
2076 
2077                 trans_size_cr = trans_size_cr >> 1;
2078 
2079             } while(trans_size_cr >= 4);
2080         }
2081     }
2082 
2083 #if DISABLE_INTRAS_IN_BPIC
2084     if((ps_ctxt->i1_slice_type == BSLICE) && (ps_cu_analyse->u1_num_inter_cands))
2085     {
2086         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
2087     }
2088 #endif
2089 
2090     rdopt_best_cost = ihevce_cu_mode_decide(
2091         ps_ctxt,
2092         ps_cu_prms,
2093         ps_cu_analyse,
2094         ps_final_mode_state,
2095         pu1_ecd_data,
2096         ps_col_pu,
2097         pu1_col_pu_map,
2098         col_start_pu_idx);
2099 
2100     return rdopt_best_cost;
2101 }
2102 
2103 /**
2104 ******************************************************************************
2105 * \if Function name : ihevce_enc_loop_cu_bot_copy \endif
2106 *
2107 * \brief
2108 *    This function copy the bottom data at CU level to row buffers
2109 *
2110 * \date
2111 *    18/09/2012
2112 *
2113 * \author
2114 *    Ittiam
2115 *
2116 * \return
2117 *
2118 * List of Functions
2119 *
2120 *
2121 ******************************************************************************
2122 */
ihevce_enc_loop_cu_bot_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,ihevce_enc_cu_node_ctxt_t * ps_enc_out_ctxt,WORD32 curr_cu_pos_in_row,WORD32 curr_cu_pos_in_ctb)2123 void ihevce_enc_loop_cu_bot_copy(
2124     ihevce_enc_loop_ctxt_t *ps_ctxt,
2125     enc_loop_cu_prms_t *ps_cu_prms,
2126     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
2127     WORD32 curr_cu_pos_in_row,
2128     WORD32 curr_cu_pos_in_ctb)
2129 {
2130     /* ---------------------------------------------- */
2131     /* copy the bottom row  data to the row buffers   */
2132     /* ---------------------------------------------- */
2133     nbr_4x4_t *ps_top_nbr;
2134     UWORD8 *pu1_buff;
2135     UWORD8 *pu1_luma_top, *pu1_chrm_top;
2136     WORD32 nbr_strd;
2137 
2138     WORD32 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
2139 
2140     /* derive the appropraite pointers */
2141     pu1_luma_top = (UWORD8 *)ps_ctxt->pv_bot_row_luma + curr_cu_pos_in_row;
2142     pu1_chrm_top = (UWORD8 *)ps_ctxt->pv_bot_row_chroma + curr_cu_pos_in_row;
2143     ps_top_nbr = ps_ctxt->ps_bot_row_nbr + (curr_cu_pos_in_row >> 2);
2144     nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
2145 
2146     /* copy bottom luma data */
2147     pu1_buff = ps_cu_prms->pu1_luma_recon +
2148                (ps_cu_prms->i4_luma_recon_stride * (ps_cu_prms->i4_ctb_size - 1));
2149 
2150     pu1_buff += curr_cu_pos_in_ctb;
2151 
2152     memcpy(pu1_luma_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
2153 
2154     /* copy bottom chroma data cb and cr pixel interleaved */
2155     pu1_buff = ps_cu_prms->pu1_chrm_recon + (ps_cu_prms->i4_chrm_recon_stride *
2156                                              ((ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)) - 1));
2157 
2158     pu1_buff += curr_cu_pos_in_ctb;
2159 
2160     memcpy(pu1_chrm_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
2161 
2162     /* store the nbr 4x4 data at cu level */
2163     {
2164         nbr_4x4_t *ps_nbr;
2165 
2166         /* copy bottom nbr data */
2167         ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
2168         ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1) * nbr_strd;
2169 
2170         ps_nbr += (curr_cu_pos_in_ctb >> 2);
2171 
2172         memcpy(ps_top_nbr, ps_nbr, (ps_enc_out_ctxt->u1_cu_size >> 2) * sizeof(nbr_4x4_t));
2173     }
2174     return;
2175 }
2176 
2177 /**
2178 ******************************************************************************
2179 * \if Function name : ihevce_update_final_cu_results \endif
2180 *
2181 * \brief
2182 *
2183 * \return
2184 *    None
2185 *
2186 * \author
2187 *  Ittiam
2188 *
2189 *****************************************************************************
2190 */
ihevce_update_final_cu_results(ihevce_enc_loop_ctxt_t * ps_ctxt,ihevce_enc_cu_node_ctxt_t * ps_enc_out_ctxt,enc_loop_cu_prms_t * ps_cu_prms,pu_col_mv_t ** pps_row_col_pu,WORD32 * pi4_col_pu_map_idx,cu_final_update_prms * ps_cu_update_prms,WORD32 ctb_ctr,WORD32 vert_ctb_ctr)2191 void ihevce_update_final_cu_results(
2192     ihevce_enc_loop_ctxt_t *ps_ctxt,
2193     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
2194     enc_loop_cu_prms_t *ps_cu_prms,
2195     pu_col_mv_t **pps_row_col_pu,
2196     WORD32 *pi4_col_pu_map_idx,
2197     cu_final_update_prms *ps_cu_update_prms,
2198     WORD32 ctb_ctr,
2199     WORD32 vert_ctb_ctr)
2200 {
2201     WORD32 curr_cu_pos_in_row;
2202 
2203     cu_enc_loop_out_t *ps_cu_final = *ps_cu_update_prms->pps_cu_final;
2204     pu_t **pps_row_pu = ps_cu_update_prms->pps_row_pu;
2205     tu_enc_loop_out_t **pps_row_tu = ps_cu_update_prms->pps_row_tu;
2206     UWORD8 **ppu1_row_ecd_data = ps_cu_update_prms->ppu1_row_ecd_data;
2207     WORD32 *pi4_num_pus_in_ctb = ps_cu_update_prms->pi4_num_pus_in_ctb;
2208     UWORD32 u4_cu_size = ps_enc_out_ctxt->u1_cu_size;
2209     ps_cu_final->b3_cu_pos_x = ps_enc_out_ctxt->b3_cu_pos_x;
2210     ps_cu_final->b3_cu_pos_y = ps_enc_out_ctxt->b3_cu_pos_y;
2211 
2212     ps_cu_final->b4_cu_size = ps_enc_out_ctxt->u1_cu_size >> 3;
2213 
2214     /* store the current pu and tu pointes */
2215     ps_cu_final->ps_pu = *pps_row_pu;
2216     ps_cu_final->ps_enc_tu = *pps_row_tu;
2217     curr_cu_pos_in_row = ctb_ctr * ps_cu_prms->i4_ctb_size + (ps_cu_final->b3_cu_pos_x << 3);
2218 
2219     ihevce_store_cu_final(ps_ctxt, ps_cu_final, *ppu1_row_ecd_data, ps_enc_out_ctxt, ps_cu_prms);
2220 
2221     if(NULL != pps_row_col_pu)
2222     {
2223         (*pps_row_col_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2224     }
2225     if(NULL != pi4_col_pu_map_idx)
2226     {
2227         (*pi4_col_pu_map_idx) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2228     }
2229     (*pi4_num_pus_in_ctb) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2230     (*pps_row_tu) += ps_cu_final->u2_num_tus_in_cu;
2231     (*pps_row_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2232     (*ppu1_row_ecd_data) += ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2233 
2234     (*ps_cu_update_prms->pps_cu_final)++;
2235     (*ps_cu_update_prms->pu1_num_cus_in_ctb_out)++;
2236 
2237     /* Updated for each CU in bottom row  of CTB */
2238     if(((ps_cu_final->b3_cu_pos_y << 3) + u4_cu_size) == ps_ctxt->u4_cur_ctb_ht)
2239     {
2240         /* copy the bottom data to row buffers */
2241         ((pf_enc_loop_cu_bot_copy)ps_ctxt->pv_enc_loop_cu_bot_copy)(
2242             ps_ctxt,
2243             ps_cu_prms,
2244             ps_enc_out_ctxt,
2245             curr_cu_pos_in_row,
2246             (ps_enc_out_ctxt->b3_cu_pos_x << 3));
2247 
2248         /* Setting Dependency for CU TopRight */
2249         ihevce_dmgr_set_row_row_sync(
2250             ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
2251             (curr_cu_pos_in_row + ps_enc_out_ctxt->u1_cu_size),
2252             vert_ctb_ctr,
2253             ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2254 
2255         /* Setting Dependency for Entropy to consume is made at CTB level */
2256     }
2257 }
2258 
2259 /**
2260 ******************************************************************************
2261 * \if Function name : ihevce_cu_recurse_decide \endif
2262 *
2263 * \brief
2264 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
2265 *
2266 * \param[in] pv_ctxt : pointer to enc_loop module
2267 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
2268 * \param[in] ps_cu_analyse : pointer to cu analyse
2269 * \param[out] ps_cu_final : pointer to cu final
2270 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
2271 * \param[out]ps_row_col_pu; colocated pu buffer pointer
2272 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
2273 * \param[in]col_start_pu_idx : pu index start value
2274 *
2275 * \return
2276 *    None
2277 *
2278 *
2279 * \author
2280 *  Ittiam
2281 *
2282 *****************************************************************************
2283 */
ihevce_cu_recurse_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cur_ctb_cu_tree_t * ps_cu_tree_analyse,cur_ctb_cu_tree_t * ps_cu_tree_analyse_parent,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,me_ctb_data_t * ps_cu_me_data,pu_col_mv_t ** pps_col_pu,cu_final_update_prms * ps_cu_update_prms,UWORD8 * pu1_col_pu_map,WORD32 * pi4_col_start_pu_idx,WORD32 i4_tree_depth,WORD32 i4_ctb_x_off,WORD32 i4_ctb_y_off,WORD32 cur_ctb_ht)2284 WORD32 ihevce_cu_recurse_decide(
2285     ihevce_enc_loop_ctxt_t *ps_ctxt,
2286     enc_loop_cu_prms_t *ps_cu_prms,
2287     cur_ctb_cu_tree_t *ps_cu_tree_analyse,
2288     cur_ctb_cu_tree_t *ps_cu_tree_analyse_parent,
2289     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
2290     me_ctb_data_t *ps_cu_me_data,
2291     pu_col_mv_t **pps_col_pu,
2292     cu_final_update_prms *ps_cu_update_prms,
2293     UWORD8 *pu1_col_pu_map,
2294     WORD32 *pi4_col_start_pu_idx,
2295     WORD32 i4_tree_depth,
2296     WORD32 i4_ctb_x_off,
2297     WORD32 i4_ctb_y_off,
2298     WORD32 cur_ctb_ht)
2299 {
2300     cur_ctb_cu_tree_t *ps_cu_tree_analyse_child[4];
2301     final_mode_state_t s_final_mode_state;
2302 
2303     WORD32 i;
2304     WORD32 child_nodes_null;
2305     LWORD64 i8_least_child_cost;
2306 
2307     WORD32 num_children_encoded = 0;
2308 
2309     /* Take backup of collocated start PU index for parent node rdo for PQ */
2310     WORD32 i4_col_pu_idx_bkup = *pi4_col_start_pu_idx;
2311     pu_col_mv_t *ps_col_mv_bkup = *pps_col_pu;
2312 
2313 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2314     WORD32 x0_frm = i4_ctb_x_off + (ps_cu_tree_analyse->b3_cu_pos_x << 3);
2315     WORD32 y0_frm = i4_ctb_y_off + (ps_cu_tree_analyse->b3_cu_pos_y << 3);
2316     WORD32 pic_wd = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_width_in_luma_samples;
2317     WORD32 pic_ht = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_height_in_luma_samples;
2318     WORD32 log2_min_cb_size = ps_ctxt->s_sao_ctxt_t.ps_sps->i1_log2_min_coding_block_size;
2319     WORD32 cu_size = ps_cu_tree_analyse->u1_cu_size;
2320 
2321     /* bits for coding split_cu_flag = 1 */
2322     WORD32 split_cu1_bits_q12 = 0;
2323 
2324     /* bits for coding split_cu_flag = 0 */
2325     WORD32 split_cu0_bits_q12 = 0;
2326 #endif
2327 
2328     UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_stasino_enabled
2329                                 ? ihevce_determine_cu_noise_based_on_8x8Blk_data(
2330                                       ps_cu_prms->pu1_is_8x8Blk_noisy,
2331                                       ((ps_cu_tree_analyse->b3_cu_pos_x << 3) >> 4) << 4,
2332                                       ((ps_cu_tree_analyse->b3_cu_pos_y << 3) >> 4) << 4,
2333                                       MAX(16, ps_cu_tree_analyse->u1_cu_size))
2334                                 : 0;
2335 
2336 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2337     LWORD64 i8_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
2338 #endif
2339 
2340     (void)ps_cu_tree_analyse_parent;
2341 
2342 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
2343     if(!ps_ctxt->u1_enable_psyRDOPT && u1_is_cu_noisy)
2344     {
2345         ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
2346         ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
2347     }
2348 #endif
2349 
2350     if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
2351     {
2352         i8_lambda_qf = ((float)i8_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
2353     }
2354 
2355     ps_cu_tree_analyse_child[0] = ps_cu_tree_analyse->ps_child_node_tl;
2356     ps_cu_tree_analyse_child[1] = ps_cu_tree_analyse->ps_child_node_tr;
2357     ps_cu_tree_analyse_child[2] = ps_cu_tree_analyse->ps_child_node_bl;
2358     ps_cu_tree_analyse_child[3] = ps_cu_tree_analyse->ps_child_node_br;
2359 
2360     child_nodes_null =
2361         ((ps_cu_tree_analyse_child[0] == NULL) + (ps_cu_tree_analyse_child[1] == NULL) +
2362          (ps_cu_tree_analyse_child[2] == NULL) + (ps_cu_tree_analyse_child[3] == NULL));
2363 
2364 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2365 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2366     if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2367 #endif
2368     {
2369         /*----------------------------------------------*/
2370         /* ---------- CU Depth Bit Estimation --------- */
2371         /*----------------------------------------------*/
2372 
2373         /* Encode cu split flags based on following conditions; See section 7.3.8*/
2374         if(((x0_frm + cu_size) <= pic_wd) && ((y0_frm + cu_size) <= pic_ht) &&
2375            (cu_size > (1 << log2_min_cb_size))) /* &&(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0)) */
2376         {
2377             WORD32 left_cu_depth = 0;
2378             WORD32 top_cu_depth = 0;
2379             WORD32 pos_x_4x4 = ps_cu_tree_analyse->b3_cu_pos_x << 1;
2380             WORD32 pos_y_4x4 = ps_cu_tree_analyse->b3_cu_pos_y << 1;
2381             WORD32 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
2382             WORD32 cur_4x4_in_ctb = pos_x_4x4 + (pos_y_4x4 * num_4x4_in_ctb);
2383             UWORD8 u1_split_cu_flag_cab_model;
2384             WORD32 split_cu_ctxt_inc;
2385 
2386             /* Left and Top CU depth is required for cabac context */
2387 
2388             /* CU left */
2389             if(0 == pos_x_4x4)
2390             {
2391                 /* CTB boundary */
2392                 if(i4_ctb_x_off)
2393                 {
2394                     left_cu_depth = ps_ctxt->as_left_col_nbr[pos_y_4x4].b2_cu_depth;
2395                 }
2396             }
2397             else
2398             {
2399                 /* inside CTB */
2400                 left_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - 1].b2_cu_depth;
2401             }
2402 
2403             /* CU top */
2404             if(0 == pos_y_4x4)
2405             {
2406                 /* CTB boundary */
2407                 if(i4_ctb_y_off)
2408                 {
2409                     /* Wait till top cu depth is available */
2410                     ihevce_dmgr_chk_row_row_sync(
2411                         ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
2412                         (i4_ctb_x_off) + (pos_x_4x4 << 2),
2413                         4,
2414                         ((i4_ctb_y_off >> 6) - 1),
2415                         ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2416                         ps_ctxt->thrd_id);
2417 
2418                     top_cu_depth =
2419                         ps_ctxt->ps_top_row_nbr[(i4_ctb_x_off >> 2) + pos_x_4x4].b2_cu_depth;
2420                 }
2421             }
2422             else
2423             {
2424                 /* inside CTB */
2425                 top_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - num_4x4_in_ctb].b2_cu_depth;
2426             }
2427 
2428             split_cu_ctxt_inc = IHEVC_CAB_SPLIT_CU_FLAG + (left_cu_depth > i4_tree_depth) +
2429                                 (top_cu_depth > i4_tree_depth);
2430 
2431             u1_split_cu_flag_cab_model =
2432                 ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc];
2433 
2434             /* bits for coding split_cu_flag = 1 */
2435             split_cu1_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 1];
2436 
2437             /* bits for coding split_cu_flag = 0 */
2438             split_cu0_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 0];
2439 
2440             /* update the cu split cabac context of all child nodes before evaluating child */
2441             for(i = (i4_tree_depth + 1); i < 4; i++)
2442             {
2443                 ps_ctxt->au1_rdopt_recur_ctxt_models[i][split_cu_ctxt_inc] =
2444                     gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 1];
2445             }
2446 
2447             /* update the cu split cabac context of the parent node with split flag = 0 */
2448             ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc] =
2449                 gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 0];
2450         }
2451     }
2452 #endif
2453 
2454     /* If all the child nodes are null, then do rdo for this node and return the cost */
2455     if((1 == ps_cu_tree_analyse->is_node_valid) && (4 == child_nodes_null))
2456     {
2457         WORD32 i4_num_bytes_ecd_data;
2458 
2459 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2460         COPY_CABAC_STATES(
2461             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2462             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2463             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2464 #else
2465         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2466         {
2467             COPY_CABAC_STATES(
2468                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2469                 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2470                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2471         }
2472 #endif
2473 
2474         ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
2475         ihevce_update_pred_qp(
2476             ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
2477         /* DO rdo for current node here */
2478         /* return rdo cost for current node*/
2479         ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
2480             ps_ctxt,
2481             ps_cu_prms,
2482             ps_cu_tree_analyse,
2483             ps_cur_ipe_ctb,
2484             ps_cu_me_data,
2485             *pps_col_pu,
2486             &s_final_mode_state,
2487             pu1_col_pu_map,
2488             *ps_cu_update_prms->ppu1_row_ecd_data,
2489             *pi4_col_start_pu_idx,
2490             i4_ctb_x_off,
2491             i4_ctb_y_off);
2492 
2493         if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
2494             cur_ctb_ht) &&
2495            (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
2496         {
2497             /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
2498             /* copy current ctb CU states into a entropy sync state */
2499             /* to be used for next row                              */
2500             COPY_CABAC_STATES(
2501                 ps_ctxt->pu1_curr_row_cabac_state,
2502                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2503                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2504         }
2505 
2506 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2507         {
2508 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2509             /* Add parent split cu = 0 cost signalling */
2510             ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2511                 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2512 #endif
2513             for(i = (i4_tree_depth); i < 4; i++)
2514             {
2515                 COPY_CABAC_STATES(
2516                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2517                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2518                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2519             }
2520         }
2521 #else
2522         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2523         {
2524 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2525             /* Add parent split cu = 0 cost signalling */
2526             ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2527                 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2528 #endif
2529 
2530             for(i = (i4_tree_depth); i < 4; i++)
2531             {
2532                 COPY_CABAC_STATES(
2533                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2534                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2535                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2536             }
2537         }
2538 #endif
2539 
2540         ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
2541             ps_ctxt, ps_cu_prms, &s_final_mode_state);
2542 
2543 #if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2544         if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
2545         {
2546             ihevce_update_final_cu_results(
2547                 ps_ctxt,
2548                 ps_ctxt->ps_enc_out_ctxt,
2549                 ps_cu_prms,
2550                 pps_col_pu,
2551                 pi4_col_start_pu_idx,
2552                 ps_cu_update_prms,
2553                 i4_ctb_x_off >> 6,
2554                 i4_ctb_y_off >> 6);
2555         }
2556         else
2557         {
2558             /* ---- copy the luma & chroma coeffs to final output -------- */
2559             i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2560 
2561             if(0 != i4_num_bytes_ecd_data)
2562             {
2563                 memcpy(
2564                     ps_ctxt->pu1_ecd_data,
2565                     &ps_ctxt->pu1_cu_recur_coeffs[0],
2566                     i4_num_bytes_ecd_data * sizeof(UWORD8));
2567 
2568                 ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2569             }
2570 
2571             /* Collocated PU updates */
2572             *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2573             *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2574         }
2575 #else
2576         /* ---- copy the luma & chroma coeffs to final output -------- */
2577         i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2578         if(0 != i4_num_bytes_ecd_data)
2579         {
2580             memcpy(
2581                 ps_ctxt->pu1_ecd_data,
2582                 &ps_ctxt->pu1_cu_recur_coeffs[0],
2583                 i4_num_bytes_ecd_data * sizeof(UWORD8));
2584 
2585             ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2586         }
2587 
2588         /* Collocated PU updates */
2589         *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2590         *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2591 #endif
2592 
2593         ps_ctxt->ps_enc_out_ctxt++;
2594         num_children_encoded++;
2595     }
2596     else
2597     {
2598         i8_least_child_cost = 0;
2599 
2600         for(i = 0; i < 4; i++)
2601         {
2602             if(ps_cu_tree_analyse_child[i] != NULL)
2603             {
2604                 num_children_encoded += ihevce_cu_recurse_decide(
2605                     ps_ctxt,
2606                     ps_cu_prms,
2607                     ps_cu_tree_analyse_child[i],
2608                     ps_cu_tree_analyse,
2609                     ps_cur_ipe_ctb,
2610                     ps_cu_me_data,
2611                     pps_col_pu,
2612                     ps_cu_update_prms,
2613                     pu1_col_pu_map,
2614                     pi4_col_start_pu_idx,
2615                     i4_tree_depth + 1,
2616                     i4_ctb_x_off,
2617                     i4_ctb_y_off,
2618                     cur_ctb_ht);
2619 
2620                 /* In case of incomplete ctb, */
2621                 //if(MAX_COST != ps_cu_tree_analyse_child[i]->i4_best_rdopt_cost)
2622                 if(((ULWORD64)(
2623                        i8_least_child_cost + ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost)) >
2624                    MAX_COST_64)
2625                 {
2626                     i8_least_child_cost = MAX_COST_64;
2627                 }
2628                 else
2629                 {
2630                     i8_least_child_cost += ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost;
2631                 }
2632             }
2633             else
2634             {
2635                 /* If the child node is NULL, return MAX_COST*/
2636                 i8_least_child_cost = MAX_COST_64;
2637             }
2638         }
2639 
2640         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2641         {
2642 #if !ENABLE_4CTB_EVALUATION
2643             if((ps_cu_tree_analyse->u1_cu_size == 64) && (num_children_encoded > 10) &&
2644                (ps_ctxt->i1_slice_type != ISLICE))
2645             {
2646                 ps_cu_tree_analyse->is_node_valid = 0;
2647             }
2648 #endif
2649         }
2650 
2651         /* If current CU node is valid, do rdo for the node and decide btwn child nodes and parent nodes  */
2652         if(ps_cu_tree_analyse->is_node_valid)
2653         {
2654             UWORD8 au1_cu_pu_map[(MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE)];
2655             pu_col_mv_t as_col_mv[2]; /* Max of 2 PUs only per CU */
2656 
2657             WORD32 i4_col_pu_idx_start = i4_col_pu_idx_bkup;
2658 
2659             /* Copy the collocated PU map to the local array */
2660             memcpy(
2661                 au1_cu_pu_map,
2662                 pu1_col_pu_map,
2663                 (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2664 
2665 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2666             COPY_CABAC_STATES(
2667                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2668                 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2669                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2670 
2671             /* Reset the nbr maps while computing Parent CU node ()*/
2672             /* set the neighbour map to 0 */
2673             ihevce_set_nbr_map(
2674                 ps_ctxt->pu1_ctb_nbr_map,
2675                 ps_ctxt->i4_nbr_map_strd,
2676                 (ps_cu_tree_analyse->b3_cu_pos_x << 1),
2677                 (ps_cu_tree_analyse->b3_cu_pos_y << 1),
2678                 (ps_cu_tree_analyse->u1_cu_size >> 2),
2679                 0);
2680 #else
2681             if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2682             {
2683                 COPY_CABAC_STATES(
2684                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2685                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2686                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2687 
2688                 /* Reset the nbr maps while computing Parent CU node ()*/
2689                 /* set the neighbour map to 0 */
2690                 ihevce_set_nbr_map(
2691                     ps_ctxt->pu1_ctb_nbr_map,
2692                     ps_ctxt->i4_nbr_map_strd,
2693                     (ps_cu_tree_analyse->b3_cu_pos_x << 1),
2694                     (ps_cu_tree_analyse->b3_cu_pos_y << 1),
2695                     (ps_cu_tree_analyse->u1_cu_size >> 2),
2696                     0);
2697             }
2698 #endif
2699 
2700             /* Do rdo for the parent node */
2701             /* Compare parent node cost vs child node costs */
2702             ps_ctxt->is_parent_cu_rdopt = 1;
2703 
2704             ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
2705 
2706             ihevce_update_pred_qp(
2707                 ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
2708 
2709             ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
2710                 ps_ctxt,
2711                 ps_cu_prms,
2712                 ps_cu_tree_analyse,
2713                 ps_cur_ipe_ctb,
2714                 ps_cu_me_data,
2715                 as_col_mv,
2716                 &s_final_mode_state,
2717                 au1_cu_pu_map,
2718                 *ps_cu_update_prms->ppu1_row_ecd_data,
2719                 i4_col_pu_idx_start,
2720                 i4_ctb_x_off,
2721                 i4_ctb_y_off);
2722 
2723             ps_ctxt->is_parent_cu_rdopt = 0;
2724 
2725 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2726             /* Add parent split cu cost signalling */
2727             ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2728                 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2729 
2730             COPY_CABAC_STATES(
2731                 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2732                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2733                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2734 
2735             /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
2736             + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
2737             ;
2738             /* bits for coding cu split flag as  1 */
2739             i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
2740                 split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2741 #else
2742 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2743             if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2744             {
2745                 /* Add parent split cu cost signalling */
2746                 ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2747                     split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2748 
2749                 COPY_CABAC_STATES(
2750                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2751                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2752                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2753 
2754                 /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
2755                 + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
2756                 ;
2757                 /* bits for coding cu split flag as  1 */
2758                 i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
2759                     split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2760             }
2761 #else
2762             i8_least_child_cost +=
2763                 (num_children_encoded * ps_ctxt->i4_sad_lamda + ((1 << (LAMBDA_Q_SHIFT)))) >>
2764                 (LAMBDA_Q_SHIFT + 1);
2765 #endif
2766 #endif
2767 
2768             /* If child modes win over parent, discard parent enc ctxt */
2769             /* else discard child ctxt */
2770             if(ps_cu_tree_analyse->i8_best_rdopt_cost > i8_least_child_cost)
2771             {
2772 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2773                 /* Store child node Models for evalution of next CU */
2774                 for(i = (i4_tree_depth); i < 4; i++)
2775                 {
2776                     COPY_CABAC_STATES(
2777                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2778                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2779                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2780                 }
2781                 /* Reset cabac states if child has won */
2782                 COPY_CABAC_STATES(
2783                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2784                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2785                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2786 #else
2787                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2788                 {
2789                     for(i = i4_tree_depth; i < 4; i++)
2790                     {
2791                         COPY_CABAC_STATES(
2792                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2793                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2794                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2795                     }
2796                     /* Reset cabac states if child has won */
2797                     COPY_CABAC_STATES(
2798                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2799                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2800                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2801                 }
2802 #endif
2803                 ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
2804                 ps_cu_tree_analyse->is_node_valid = 0;
2805             }
2806             else
2807             {
2808                 /* Parent node wins over child node */
2809                 ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
2810                 WORD32 i4_num_bytes_ecd_data;
2811                 WORD32 num_child_nodes = 0;
2812                 WORD32 i4_num_pus_in_cu;
2813 
2814                 if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
2815                     cur_ctb_ht) &&
2816                    (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
2817                 {
2818                     /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
2819                     /* copy current ctb CU states into a entropy sync state */
2820                     /* to be used for next row                              */
2821                     COPY_CABAC_STATES(
2822                         ps_ctxt->pu1_curr_row_cabac_state,
2823                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2824                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2825                 }
2826 
2827 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2828                 /* Store parent node Models for evalution of next CU */
2829                 for(i = (i4_tree_depth + 1); i < 4; i++)
2830                 {
2831                     COPY_CABAC_STATES(
2832                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2833                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2834                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2835                 }
2836 #else
2837                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2838                 {
2839                     for(i = (i4_tree_depth + 1); i < 4; i++)
2840                     {
2841                         COPY_CABAC_STATES(
2842                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2843                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2844                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2845                     }
2846                 }
2847 #endif
2848                 ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
2849                     ps_ctxt, ps_cu_prms, &s_final_mode_state);
2850 
2851 #if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2852                 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
2853                 {
2854                     ihevce_update_final_cu_results(
2855                         ps_ctxt,
2856                         ps_ctxt->ps_enc_out_ctxt,
2857                         ps_cu_prms,
2858                         pps_col_pu,
2859                         pi4_col_start_pu_idx,
2860                         ps_cu_update_prms,
2861                         i4_ctb_x_off >> 6,
2862                         i4_ctb_y_off >> 6);
2863 
2864                     ps_ctxt->ps_enc_out_ctxt++;
2865                 }
2866                 else
2867                 {
2868                     ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
2869 
2870                     num_child_nodes = num_children_encoded;
2871 
2872                     /* ---- copy the luma & chroma coeffs to final output -------- */
2873                     for(i = 0; i < num_child_nodes; i++)
2874                     {
2875                         i4_num_bytes_ecd_data =
2876                             (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
2877                         ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
2878                     }
2879 
2880                     i4_num_bytes_ecd_data =
2881                         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2882                     if(0 != i4_num_bytes_ecd_data)
2883                     {
2884                         memcpy(
2885                             ps_ctxt->pu1_ecd_data,
2886                             &ps_ctxt->pu1_cu_recur_coeffs[0],
2887                             i4_num_bytes_ecd_data);
2888 
2889                         ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2890                     }
2891 
2892                     ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
2893 
2894                     memcpy(
2895                         ps_enc_tmp_out_ctxt,
2896                         ps_ctxt->ps_enc_out_ctxt,
2897                         sizeof(ihevce_enc_cu_node_ctxt_t));
2898                     ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
2899 
2900                     /* Collocated PU updates */
2901                     i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2902                     /* Copy the collocated MVs and the PU map to frame buffers */
2903                     memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
2904                     memcpy(
2905                         pu1_col_pu_map,
2906                         au1_cu_pu_map,
2907                         (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2908                     /* Update the frame buffer pointer and the map index */
2909                     *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
2910                     *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
2911 
2912                     ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
2913                 }
2914 #else
2915 
2916                 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
2917 
2918                 num_child_nodes = num_children_encoded;
2919 
2920                 /* ---- copy the luma & chroma coeffs to final output -------- */
2921                 for(i = 0; i < num_child_nodes; i++)
2922                 {
2923                     i4_num_bytes_ecd_data =
2924                         (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
2925                     ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
2926                 }
2927 
2928                 i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2929                 if(0 != i4_num_bytes_ecd_data)
2930                 {
2931                     memcpy(
2932                         ps_ctxt->pu1_ecd_data,
2933                         &ps_ctxt->pu1_cu_recur_coeffs[0],
2934                         i4_num_bytes_ecd_data * sizeof(UWORD8));
2935 
2936                     ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2937                 }
2938 
2939                 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
2940 
2941                 memcpy(
2942                     ps_enc_tmp_out_ctxt,
2943                     ps_ctxt->ps_enc_out_ctxt,
2944                     sizeof(ihevce_enc_cu_node_ctxt_t));
2945 
2946                 ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
2947 
2948                 /* Collocated PU updates */
2949                 i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2950                 /* Copy the collocated MVs and the PU map to frame buffers */
2951                 memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
2952                 memcpy(
2953                     pu1_col_pu_map,
2954                     au1_cu_pu_map,
2955                     (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2956                 /* Update the frame buffer pointer and the map index */
2957                 *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
2958                 *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
2959 
2960                 ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
2961 #endif
2962 
2963                 num_children_encoded = 1;
2964                 DISABLE_THE_CHILDREN_NODES(ps_cu_tree_analyse);
2965             }
2966         }
2967         else /* if(ps_cu_tree_analyse->is_node_valid) */
2968         {
2969             ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
2970 
2971             /* Tree depth of four will occur for Incomplete CTB */
2972             if((i8_least_child_cost > 0) && (i4_tree_depth != 3))
2973             {
2974 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2975                 /* Store child node Models for evalution of next CU */
2976                 for(i = i4_tree_depth; i < 4; i++)
2977                 {
2978                     COPY_CABAC_STATES(
2979                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2980                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2981                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2982                 }
2983 #else
2984                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2985                 {
2986                     for(i = (i4_tree_depth); i < 4; i++)
2987                     {
2988                         COPY_CABAC_STATES(
2989                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2990                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2991                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2992                     }
2993                 }
2994 #endif
2995             }
2996         }
2997     }
2998 
2999     return num_children_encoded;
3000 }
3001 
ihevce_intraData_availability_extractor(WORD8 * pi1_8x8CULevel_intraData_availability_indicator,UWORD8 u1_cu_size,UWORD8 u1_x_8x8CU_units,UWORD8 u1_y_8x8CU_units)3002 static UWORD8 ihevce_intraData_availability_extractor(
3003     WORD8 *pi1_8x8CULevel_intraData_availability_indicator,
3004     UWORD8 u1_cu_size,
3005     UWORD8 u1_x_8x8CU_units,
3006     UWORD8 u1_y_8x8CU_units)
3007 {
3008     if(8 == u1_cu_size)
3009     {
3010         return (!pi1_8x8CULevel_intraData_availability_indicator
3011                     [u1_x_8x8CU_units + MAX_CU_IN_CTB_ROW * u1_y_8x8CU_units]);
3012     }
3013     else
3014     {
3015         UWORD8 u1_data_availability = 0;
3016         UWORD8 u1_child_cu_size = u1_cu_size / 2;
3017 
3018         u1_data_availability |= ihevce_intraData_availability_extractor(
3019             pi1_8x8CULevel_intraData_availability_indicator,
3020             u1_child_cu_size,
3021             u1_x_8x8CU_units,
3022             u1_y_8x8CU_units);
3023 
3024         u1_data_availability |= ihevce_intraData_availability_extractor(
3025             pi1_8x8CULevel_intraData_availability_indicator,
3026             u1_child_cu_size,
3027             u1_x_8x8CU_units + u1_child_cu_size / 8,
3028             u1_y_8x8CU_units);
3029 
3030         u1_data_availability |= ihevce_intraData_availability_extractor(
3031             pi1_8x8CULevel_intraData_availability_indicator,
3032             u1_child_cu_size,
3033             u1_x_8x8CU_units,
3034             u1_y_8x8CU_units + u1_child_cu_size / 8);
3035 
3036         u1_data_availability |= ihevce_intraData_availability_extractor(
3037             pi1_8x8CULevel_intraData_availability_indicator,
3038             u1_child_cu_size,
3039             u1_x_8x8CU_units + u1_child_cu_size / 8,
3040             u1_y_8x8CU_units + u1_child_cu_size / 8);
3041 
3042         return u1_data_availability;
3043     }
3044 }
3045 
ihevce_intra_and_inter_cuTree_merger(cur_ctb_cu_tree_t * ps_merged_tree,cur_ctb_cu_tree_t * ps_intra_tree,cur_ctb_cu_tree_t * ps_inter_tree,WORD8 * pi1_8x8CULevel_intraData_availability_indicator)3046 void ihevce_intra_and_inter_cuTree_merger(
3047     cur_ctb_cu_tree_t *ps_merged_tree,
3048     cur_ctb_cu_tree_t *ps_intra_tree,
3049     cur_ctb_cu_tree_t *ps_inter_tree,
3050     WORD8 *pi1_8x8CULevel_intraData_availability_indicator)
3051 {
3052     /* 0 => Intra and inter children valid */
3053     /* 1 => Only Intra valid */
3054     /* 2 => Only Inter valid */
3055     /* 3 => Neither */
3056     UWORD8 au1_children_recursive_call_type[4];
3057 
3058     if(NULL != ps_intra_tree)
3059     {
3060         ps_intra_tree->is_node_valid =
3061             ps_intra_tree->is_node_valid &
3062             ihevce_intraData_availability_extractor(
3063                 pi1_8x8CULevel_intraData_availability_indicator,
3064                 ps_intra_tree->u1_cu_size,
3065                 ps_intra_tree->b3_cu_pos_x & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff),
3066                 ps_intra_tree->b3_cu_pos_y & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff));
3067     }
3068 
3069     switch(((NULL == ps_intra_tree) << 1) | (NULL == ps_inter_tree))
3070     {
3071     case 0:
3072     {
3073         ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid ||
3074                                         ps_inter_tree->is_node_valid;
3075         ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
3076         ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
3077 
3078         au1_children_recursive_call_type[POS_TL] =
3079             ((NULL == ps_intra_tree->ps_child_node_tl) << 1) |
3080             (NULL == ps_inter_tree->ps_child_node_tl);
3081         au1_children_recursive_call_type[POS_TR] =
3082             ((NULL == ps_intra_tree->ps_child_node_tr) << 1) |
3083             (NULL == ps_inter_tree->ps_child_node_tr);
3084         au1_children_recursive_call_type[POS_BL] =
3085             ((NULL == ps_intra_tree->ps_child_node_bl) << 1) |
3086             (NULL == ps_inter_tree->ps_child_node_bl);
3087         au1_children_recursive_call_type[POS_BR] =
3088             ((NULL == ps_intra_tree->ps_child_node_br) << 1) |
3089             (NULL == ps_inter_tree->ps_child_node_br);
3090 
3091         break;
3092     }
3093     case 1:
3094     {
3095         ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid;
3096         ps_merged_tree->u1_inter_eval_enable = 0;
3097         ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
3098 
3099         au1_children_recursive_call_type[POS_TL] =
3100             ((NULL == ps_intra_tree->ps_child_node_tl) << 1) + 1;
3101         au1_children_recursive_call_type[POS_TR] =
3102             ((NULL == ps_intra_tree->ps_child_node_tr) << 1) + 1;
3103         au1_children_recursive_call_type[POS_BL] =
3104             ((NULL == ps_intra_tree->ps_child_node_bl) << 1) + 1;
3105         au1_children_recursive_call_type[POS_BR] =
3106             ((NULL == ps_intra_tree->ps_child_node_br) << 1) + 1;
3107 
3108         break;
3109     }
3110     case 2:
3111     {
3112         ps_merged_tree->is_node_valid = ps_inter_tree->is_node_valid;
3113         ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
3114         ps_merged_tree->u1_intra_eval_enable = 0;
3115 
3116         au1_children_recursive_call_type[POS_TL] = 2 + (NULL == ps_inter_tree->ps_child_node_tl);
3117         au1_children_recursive_call_type[POS_TR] = 2 + (NULL == ps_inter_tree->ps_child_node_tr);
3118         au1_children_recursive_call_type[POS_BL] = 2 + (NULL == ps_inter_tree->ps_child_node_bl);
3119         au1_children_recursive_call_type[POS_BR] = 2 + (NULL == ps_inter_tree->ps_child_node_br);
3120 
3121         break;
3122     }
3123     case 3:
3124     {
3125         /* The swamps of Dagobah! */
3126         ASSERT(0);
3127 
3128         break;
3129     }
3130     }
3131 
3132     switch(au1_children_recursive_call_type[POS_TL])
3133     {
3134     case 0:
3135     {
3136         ihevce_intra_and_inter_cuTree_merger(
3137             ps_merged_tree->ps_child_node_tl,
3138             ps_intra_tree->ps_child_node_tl,
3139             ps_inter_tree->ps_child_node_tl,
3140             pi1_8x8CULevel_intraData_availability_indicator);
3141 
3142         break;
3143     }
3144     case 2:
3145     {
3146         ihevce_intra_and_inter_cuTree_merger(
3147             ps_merged_tree->ps_child_node_tl,
3148             NULL,
3149             ps_inter_tree->ps_child_node_tl,
3150             pi1_8x8CULevel_intraData_availability_indicator);
3151 
3152         break;
3153     }
3154     case 1:
3155     {
3156         ihevce_intra_and_inter_cuTree_merger(
3157             ps_merged_tree->ps_child_node_tl,
3158             ps_intra_tree->ps_child_node_tl,
3159             NULL,
3160             pi1_8x8CULevel_intraData_availability_indicator);
3161 
3162         break;
3163     }
3164     }
3165 
3166     switch(au1_children_recursive_call_type[POS_TR])
3167     {
3168     case 0:
3169     {
3170         ihevce_intra_and_inter_cuTree_merger(
3171             ps_merged_tree->ps_child_node_tr,
3172             ps_intra_tree->ps_child_node_tr,
3173             ps_inter_tree->ps_child_node_tr,
3174             pi1_8x8CULevel_intraData_availability_indicator);
3175 
3176         break;
3177     }
3178     case 2:
3179     {
3180         ihevce_intra_and_inter_cuTree_merger(
3181             ps_merged_tree->ps_child_node_tr,
3182             NULL,
3183             ps_inter_tree->ps_child_node_tr,
3184             pi1_8x8CULevel_intraData_availability_indicator);
3185 
3186         break;
3187     }
3188     case 1:
3189     {
3190         ihevce_intra_and_inter_cuTree_merger(
3191             ps_merged_tree->ps_child_node_tr,
3192             ps_intra_tree->ps_child_node_tr,
3193             NULL,
3194             pi1_8x8CULevel_intraData_availability_indicator);
3195 
3196         break;
3197     }
3198     }
3199 
3200     switch(au1_children_recursive_call_type[POS_BL])
3201     {
3202     case 0:
3203     {
3204         ihevce_intra_and_inter_cuTree_merger(
3205             ps_merged_tree->ps_child_node_bl,
3206             ps_intra_tree->ps_child_node_bl,
3207             ps_inter_tree->ps_child_node_bl,
3208             pi1_8x8CULevel_intraData_availability_indicator);
3209 
3210         break;
3211     }
3212     case 2:
3213     {
3214         ihevce_intra_and_inter_cuTree_merger(
3215             ps_merged_tree->ps_child_node_bl,
3216             NULL,
3217             ps_inter_tree->ps_child_node_bl,
3218             pi1_8x8CULevel_intraData_availability_indicator);
3219 
3220         break;
3221     }
3222     case 1:
3223     {
3224         ihevce_intra_and_inter_cuTree_merger(
3225             ps_merged_tree->ps_child_node_bl,
3226             ps_intra_tree->ps_child_node_bl,
3227             NULL,
3228             pi1_8x8CULevel_intraData_availability_indicator);
3229 
3230         break;
3231     }
3232     }
3233 
3234     switch(au1_children_recursive_call_type[POS_BR])
3235     {
3236     case 0:
3237     {
3238         ihevce_intra_and_inter_cuTree_merger(
3239             ps_merged_tree->ps_child_node_br,
3240             ps_intra_tree->ps_child_node_br,
3241             ps_inter_tree->ps_child_node_br,
3242             pi1_8x8CULevel_intraData_availability_indicator);
3243 
3244         break;
3245     }
3246     case 2:
3247     {
3248         ihevce_intra_and_inter_cuTree_merger(
3249             ps_merged_tree->ps_child_node_br,
3250             NULL,
3251             ps_inter_tree->ps_child_node_br,
3252             pi1_8x8CULevel_intraData_availability_indicator);
3253 
3254         break;
3255     }
3256     case 1:
3257     {
3258         ihevce_intra_and_inter_cuTree_merger(
3259             ps_merged_tree->ps_child_node_br,
3260             ps_intra_tree->ps_child_node_br,
3261             NULL,
3262             pi1_8x8CULevel_intraData_availability_indicator);
3263 
3264         break;
3265     }
3266     }
3267 }
3268