1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_enc_loop_structs.h
23 *
24 * \brief
25 *    This file contains strcutures of enc_loop pass
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _IHEVCE_ENC_LOOP_STRUCTS_H_
37 #define _IHEVCE_ENC_LOOP_STRUCTS_H_
38 
39 #include "ihevc_macros.h"
40 
41 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
42 
43 /*****************************************************************************/
44 /* Constant Macros                                                           */
45 /*****************************************************************************/
46 /** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
47 #define NUM_TRANS_TYPES 5
48 #define INTRA_PLANAR 0
49 #define INTRA_DC 1
50 #define NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD 2
51 #define MAX_TU_IN_TU_EQ_DIV_2 4
52 #define MAX_MVP_LIST_CAND 2
53 #define MAX_COST 0x7ffffff
54 #define MAX_COST_64 0x7ffffffffffffff
55 #define NUM_32CU_AND_64CU_IN_CTB 5 /* 4 - 32x32 + 1 64x64*/
56 #define PING_PONG 2
57 #define MAX_SAO_RD_CAND 10
58 #define SCRATCH_BUF_STRIDE 80
59 
60 /*****************************************************************************/
61 /* Function Macros                                                           */
62 /*****************************************************************************/
63 #define INTRA_ANGULAR(x) (x)
64 
65 /** @breif max 30bit value */
66 #define MAX30 ((1 << 30) - 1)
67 
68 /* @brief macro to clip a data to max of 30bits (assuming unsgined) */
69 #define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
70 
71 /* @brief compute the (lambda * rate) with a qshift and clip result to 30bits */
72 #define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
73 
74 #define IHEVCE_INV_WT_PRED(inp, wt, off, shift)                                                    \
75     (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
76 
77 #define POPULATE_PU_STRUCT(ps_pu, mvx, mvy, offset_x, offset_y, wd, ht, ref_idx, pred_lx)          \
78     {                                                                                              \
79         (ps_pu)->b4_pos_x = (offset_x) >> 2;                                                       \
80         (ps_pu)->b4_pos_y = (offset_y) >> 2;                                                       \
81         (ps_pu)->b4_wd = ((wd) >> 2) - 1;                                                          \
82         (ps_pu)->b4_ht = ((ht) >> 2) - 1;                                                          \
83         (ps_pu)->b1_intra_flag = 0;                                                                \
84         (ps_pu)->b2_pred_mode = pred_lx;                                                           \
85         if(pred_lx)                                                                                \
86         {                                                                                          \
87             (ps_pu)->mv.i1_l0_ref_idx = -1;                                                        \
88             (ps_pu)->mv.i1_l1_ref_idx = ref_idx;                                                   \
89             (ps_pu)->mv.s_l1_mv.i2_mvx = mvx;                                                      \
90             (ps_pu)->mv.s_l1_mv.i2_mvy = mvy;                                                      \
91         }                                                                                          \
92         else                                                                                       \
93         {                                                                                          \
94             (ps_pu)->mv.i1_l0_ref_idx = ref_idx;                                                   \
95             (ps_pu)->mv.i1_l1_ref_idx = -1;                                                        \
96             (ps_pu)->mv.s_l0_mv.i2_mvx = mvx;                                                      \
97             (ps_pu)->mv.s_l0_mv.i2_mvy = mvy;                                                      \
98         }                                                                                          \
99     }
100 
101 #define GET_FRAME_QSTEP_FROM_QP(frame_qp, frame_qstep)                                             \
102     {                                                                                              \
103         double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 };                             \
104                                                                                                    \
105         frame_qstep = (WORD32)((1 << ((frame_qp) / 6)) * q_steps[(frame_qp) % 6]);                 \
106     }
107 
108 #define INITIALISE_MERGE_RESULT_STRUCT(ps_merge_data, pas_pu_results)                              \
109     {                                                                                              \
110         WORD32 i, j, k;                                                                            \
111                                                                                                    \
112         for(i = 0; i < TOT_NUM_PARTS; i++)                                                         \
113         {                                                                                          \
114             (ps_merge_data)->s_pu_results.u1_num_results_per_part_l0[i] = 0;                       \
115             (ps_merge_data)->s_pu_results.u1_num_results_per_part_l1[i] = 0;                       \
116         }                                                                                          \
117         for(i = 0; i < 2; i++)                                                                     \
118         {                                                                                          \
119             for(j = 0; j < TOT_NUM_PARTS; j++)                                                     \
120             {                                                                                      \
121                 (ps_merge_data)->s_pu_results.aps_pu_results[i][j] = pas_pu_results[i][j];         \
122                 for(k = 0; k < MAX_NUM_RESULTS_PER_PART_LIST; k++)                                 \
123                 {                                                                                  \
124                     pas_pu_results[i][j][k].i4_tot_cost = MAX_COST;                                \
125                     pas_pu_results[i][j][k].pu.mv.i1_l0_ref_idx = -1;                              \
126                     pas_pu_results[i][j][k].pu.mv.i1_l1_ref_idx = -1;                              \
127                 }                                                                                  \
128             }                                                                                      \
129         }                                                                                          \
130     }
131 
132 #define POPULATE_CTB_PARAMS                                                                        \
133     (ps_common_frm_prms,                                                                           \
134      apu1_wt_inp,                                                                                  \
135      i4_ctb_x_off,                                                                                 \
136      i4_ctb_y_off,                                                                                 \
137      ppu1_pred,                                                                                    \
138      cu_size,                                                                                      \
139      ref_stride,                                                                                   \
140      bidir_enabled,                                                                                \
141      num_refs,                                                                                     \
142      pps_rec_list_l0,                                                                              \
143      pps_rec_list_l1,                                                                              \
144      pu1_non_wt_inp,                                                                               \
145      lambda,                                                                                       \
146      lambda_q_shift,                                                                               \
147      wpred_log_wdc)                                                                                \
148     {                                                                                              \
149         WORD32 i, j;                                                                               \
150         (ps_common_frm_prms)->i4_bidir_enabled = bidir_enabled;                                    \
151         (ps_common_frm_prms)->i4_ctb_x_off = i4_ctb_x_off;                                         \
152         (ps_common_frm_prms)->i4_ctb_y_off = i4_ctb_y_off;                                         \
153         (ps_common_frm_prms)->i4_inp_stride = cu_size;                                             \
154         (ps_common_frm_prms)->i4_lamda = lambda;                                                   \
155         (ps_common_frm_prms)->i4_pred_stride = cu_size;                                            \
156         (ps_common_frm_prms)->i4_rec_stride = ref_stride;                                          \
157         (ps_common_frm_prms)->pps_rec_list_l0 = pps_rec_list_l0;                                   \
158         (ps_common_frm_prms)->pps_rec_list_l1 = pps_rec_list_l1;                                   \
159         (ps_common_frm_prms)->ppu1_pred = ppu1_pred;                                               \
160         (ps_common_frm_prms)->pu1_non_wt_inp = pu1_non_wt_inp;                                     \
161         (ps_common_frm_prms)->pu1_wkg_mem = NULL;                                                  \
162         (ps_common_frm_prms)->u1_lamda_qshift = lambda_q_shift;                                    \
163         (ps_common_frm_prms)->u1_num_ref = num_refs;                                               \
164         (ps_common_frm_prms)->wpred_log_wdc = wpred_log_wdc;                                       \
165         for(i = 0; i < 2; i++)                                                                     \
166         {                                                                                          \
167             for(j = 0; j < MAX_NUM_REF; j++)                                                       \
168             {                                                                                      \
169                 (ps_common_frm_prms)->apu1_wt_inp = (apu1_wt_inp)[i][j];                           \
170             }                                                                                      \
171         }                                                                                          \
172     }
173 
174 #define COMPUTE_MERGE_IDX_COST(merge_idx_0_model, merge_idx, max_merge_cand, lambda, cost)         \
175     {                                                                                              \
176         WORD32 cab_bits_q12 = 0;                                                                   \
177                                                                                                    \
178         /* sanity checks */                                                                        \
179         ASSERT((merge_idx >= 0) && (merge_idx < max_merge_cand));                                  \
180                                                                                                    \
181         /* encode the merge idx only if required */                                                \
182         if(max_merge_cand > 1)                                                                     \
183         {                                                                                          \
184             WORD32 bin = (merge_idx > 0);                                                          \
185                                                                                                    \
186             /* bits for the context modelled first bin */                                          \
187             cab_bits_q12 += gau2_ihevce_cabac_bin_to_bits[merge_idx_0_model ^ bin];                \
188                                                                                                    \
189             /* bits for larged merge idx coded as bypass tunary */                                 \
190             if((max_merge_cand > 2) && (merge_idx > 0))                                            \
191             {                                                                                      \
192                 cab_bits_q12 += (MIN(merge_idx, (max_merge_cand - 2))) << CABAC_FRAC_BITS_Q;       \
193             }                                                                                      \
194                                                                                                    \
195             cost = COMPUTE_RATE_COST_CLIP30(                                                       \
196                 cab_bits_q12, lambda, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));                       \
197         }                                                                                          \
198         else                                                                                       \
199         {                                                                                          \
200             cost = 0;                                                                              \
201         }                                                                                          \
202     }
203 
204 /*****************************************************************************/
205 /* Typedefs                                                                  */
206 /*****************************************************************************/
207 
208 typedef FT_CALC_HAD_SATD_8BIT *pf_res_trans_luma_had_chroma;
209 
210 /** \breif function pointer prototype for residue and transform enc_loop */
211 typedef UWORD32 (*pf_res_trans_chroma)(
212     UWORD8 *pu1_src,
213     UWORD8 *pu1_pred,
214     WORD32 *pi4_tmp,
215     WORD16 *pi2_dst,
216     WORD32 src_strd,
217     WORD32 pred_strd,
218     WORD32 dst_strd_chr_flag);
219 
220 /** \breif function pointer prototype for quantization and inv Quant for ssd
221 calc. for all transform sizes */
222 typedef WORD32 (*pf_quant_iquant_ssd)(
223     WORD16 *pi2_coeffs,
224     WORD16 *pi2_quant_coeff,
225     WORD16 *pi2_q_dst,
226     WORD16 *pi2_iq_dst,
227     WORD32 trans_size,
228     WORD32 qp_div, /* qpscaled / 6 */
229     WORD32 qp_rem, /* qpscaled % 6 */
230     WORD32 q_add,
231     WORD32 *pi4_quant_round_factor_0_1,
232     WORD32 *pi4_quant_round_factor_1_2,
233     WORD32 src_strd,
234     WORD32 dst_q_strd,
235     WORD32 dst_iq_strd,
236     UWORD8 *csbf,
237     WORD32 csbf_strd,
238     WORD32 *zero_col,
239     WORD32 *zero_row,
240     WORD16 *pi2_dequant_coeff,
241     LWORD64 *pi8_cost);
242 
243 /** \breif function pointer prototype for quantization and inv Quant for ssd
244 calc. for all transform sizes (in case of RDOQ + SBH) */
245 typedef WORD32 (*pf_quant_iquant_ssd_sbh)(
246     WORD16 *pi2_coeffs,
247     WORD16 *pi2_quant_coeff,
248     WORD16 *pi2_q_dst,
249     WORD16 *pi2_iq_dst,
250     WORD32 trans_size,
251     WORD32 qp_div, /* qpscaled / 6 */
252     WORD32 qp_rem, /* qpscaled % 6 */
253     WORD32 q_add,
254     WORD32 src_strd,
255     WORD32 dst_q_strd,
256     WORD32 dst_iq_strd,
257     UWORD8 *csbf,
258     WORD32 csbf_strd,
259     WORD32 *zero_col,
260     WORD32 *zero_row,
261     WORD16 *pi2_dequant_coeff,
262     WORD32 *pi4_cost,
263     WORD32 i4_scan_idx,
264     WORD32 i4_perform_rdoq);
265 
266 /** \breif function pointer prototype for inverse transform and recon
267 for all transform sizes : Luma */
268 typedef void (*pf_it_recon)(
269     WORD16 *pi2_src,
270     WORD16 *pi2_tmp,
271     UWORD8 *pu1_pred,
272     UWORD8 *pu1_dst,
273     WORD32 src_strd,
274     WORD32 pred_strd,
275     WORD32 dst_strd,
276     WORD32 zero_cols,
277     WORD32 zero_rows);
278 
279 /** \breif function pointer prototype for inverse transform and recon
280 for all transform sizes : Chroma */
281 typedef void (*pf_it_recon_chroma)(
282     WORD16 *pi2_src,
283     WORD16 *pi2_tmp,
284     UWORD8 *pu1_pred,
285     UWORD8 *pu1_dst,
286     WORD32 src_strd,
287     WORD32 pred_strd,
288     WORD32 dst_strd,
289     WORD32 zero_cols,
290     WORD32 zero_rows);
291 
292 /** \breif function pointer prototype for luma sao. */
293 typedef void (*pf_sao_luma)(
294     UWORD8 *pu1_src,
295     WORD32 src_strd,
296     UWORD8 *pu1_src_left,
297     UWORD8 *pu1_src_top,
298     UWORD8 *pu1_src_top_left,
299     UWORD8 *pu1_src_top_right,
300     UWORD8 *pu1_src_bot_left,
301     UWORD8 *pu1_avail,
302     WORD8 *pi1_sao_offset,
303     WORD32 wd,
304     WORD32 ht);
305 
306 /** \breif function pointer prototype for chroma sao. */
307 typedef void (*pf_sao_chroma)(
308     UWORD8 *pu1_src,
309     WORD32 src_strd,
310     UWORD8 *pu1_src_left,
311     UWORD8 *pu1_src_top,
312     UWORD8 *pu1_src_top_left,
313     UWORD8 *pu1_src_top_right,
314     UWORD8 *pu1_src_bot_left,
315     UWORD8 *pu1_avail,
316     WORD8 *pi1_sao_offset_u,
317     WORD8 *pi1_sao_offset_v,
318     WORD32 wd,
319     WORD32 ht);
320 
321 /*****************************************************************************/
322 /* Enums                                                                     */
323 /*****************************************************************************/
324 
325 typedef enum
326 {
327     IP_FUNC_MODE_0 = 0,
328     IP_FUNC_MODE_1,
329     IP_FUNC_MODE_2,
330     IP_FUNC_MODE_3TO9,
331     IP_FUNC_MODE_10,
332     IP_FUNC_MODE_11TO17,
333     IP_FUNC_MODE_18_34,
334     IP_FUNC_MODE_19TO25,
335     IP_FUNC_MODE_26,
336     IP_FUNC_MODE_27TO33,
337 
338     NUM_IP_FUNCS
339 
340 } IP_FUNCS_T;
341 
342 typedef enum
343 {
344     /* currently only cu and cu/2 modes are supported */
345     TU_EQ_CU = 0,
346     TU_EQ_CU_DIV2,
347     TU_EQ_SUBCU, /* only applicable for NXN mode at mincusize */
348 
349     /* support for below modes needs to be added */
350     TU_EQ_CU_DIV4,
351     TU_EQ_CU_DIV8,
352     TU_EQ_CU_DIV16,
353 
354     NUM_TU_WRT_CU,
355 
356 } TU_SIZE_WRT_CU_T;
357 
358 typedef enum
359 {
360     RDOPT_MODE = 0,
361     RDOPT_SKIP_MODE = 1,
362 
363     NUM_CORE_CALL_MODES,
364 
365 } CORE_FUNC_CALL_MODE_T;
366 
367 typedef enum
368 {
369     ENC_LOOP_CTXT = 0,
370     ENC_LOOP_THRDS_CTXT,
371     ENC_LOOP_SCALE_MAT,
372     ENC_LOOP_RESCALE_MAT,
373     ENC_LOOP_TOP_LUMA,
374     ENC_LOOP_TOP_CHROMA,
375     ENC_LOOP_TOP_NBR4X4,
376     ENC_LOOP_RC_PARAMS, /* memory to dump rate control parameters by each thread for each bit-rate instance */
377     ENC_LOOP_QP_TOP_4X4,
378     ENC_LOOP_DEBLOCKING,
379     ENC_LOOP_422_CHROMA_INTRA_PRED,
380     ENC_LOOP_INTER_PRED,
381     ENC_LOOP_CHROMA_PRED_INTRA,
382     ENC_LOOP_REF_SUB_OUT,
383     ENC_LOOP_REF_FILT_OUT,
384     ENC_LOOP_CU_RECUR_LUMA_RECON,
385     ENC_LOOP_CU_RECUR_CHROMA_RECON,
386     ENC_LOOP_CU_RECUR_LUMA_PRED,
387     ENC_LOOP_CU_RECUR_CHROMA_PRED,
388     ENC_LOOP_LEFT_LUMA_DATA,
389     ENC_LOOP_LEFT_CHROMA_DATA,
390     ENC_LOOP_SAO,
391     ENC_LOOP_CU_COEFF_DATA,
392     ENC_LOOP_CU_RECUR_COEFF_DATA,
393     ENC_LOOP_CU_DEQUANT_DATA,
394     ENC_LOOP_RECON_DATA_STORE,
395     /* should always be the last entry */
396     NUM_ENC_LOOP_MEM_RECS
397 
398 } ENC_LOOP_MEM_TABS_T;
399 
400 /** This is for assigning the pred buiffers for luma (2 ping-pong) and
401 chroma(1)   */
402 typedef enum
403 {
404     CU_ME_INTRA_PRED_LUMA_IDX0 = 0,
405     CU_ME_INTRA_PRED_LUMA_IDX1,
406     CU_ME_INTRA_PRED_CHROMA_IDX,
407 
408     /* should be always the last entry */
409     NUM_CU_ME_INTRA_PRED_IDX
410 
411 } CU_ME_INTRA_PRED_IDX_T;
412 
413 /*****************************************************************************/
414 /* Structure                                                                 */
415 /*****************************************************************************/
416 
417 /**
418 ******************************************************************************
419 *  @brief     Structure to store TU prms req. for enc_loop only
420 ******************************************************************************
421 */
422 typedef struct
423 {
424     /** Zero_col info. for the current TU Luma */
425     UWORD32 u4_luma_zero_col;
426     /** Zero_row info. for the current TU Luma */
427     UWORD32 u4_luma_zero_row;
428 
429     /** Zero_col info. for the current TU Chroma Cb */
430     UWORD32 au4_cb_zero_col[2];
431     /** Zero_row info. for the current TU Chroma Cb */
432     UWORD32 au4_cb_zero_row[2];
433     /** Zero_col info. for the current TU Chroma Cr */
434     UWORD32 au4_cr_zero_col[2];
435     /** Zero_row info. for the current TU Chroma Cr */
436     UWORD32 au4_cr_zero_row[2];
437 
438     /** bytes consumed by the luma ecd data */
439     WORD16 i2_luma_bytes_consumed;
440     /** bytes consumed by the Cb ecd data */
441     WORD16 ai2_cb_bytes_consumed[2];
442     /** bytes consumed by the Cr ecd data */
443     WORD16 ai2_cr_bytes_consumed[2];
444 
445     /** flag to re-evaluate IQ and Coeff data of luma in the final_recon
446     function. If zero, uses the data from RDOPT cand.                   */
447     UWORD16 b1_eval_luma_iq_and_coeff_data : 1;
448     /** flag to re-evaluate IQ and Coeff data of chroma in the final_recon
449     function. If zero, uses the data from RDOPT cand.                   */
450     UWORD16 b1_eval_chroma_iq_and_coeff_data : 1;
451 
452     /* TO DO : No support now, need to add. Always comapre ZERO_CBF cost */
453     /** Luma ZERO_CBF cost is compared with residue coding cost only if this
454     flag is enabled */
455     UWORD16 b1_eval_luma_zero_cbf_cost : 1;
456     /** Chroma ZERO_CBF cost is compared with residue coding cost only if this
457     flag is enabled */
458     UWORD16 b1_eval_chroma_zero_cbf_cost : 1;
459 
460     /** Reserved to make WORD32 alignment */
461     UWORD16 b12_reserved : 12;
462 
463 } tu_enc_loop_temp_prms_t;
464 
465 typedef struct recon_datastore_t
466 {
467     /* 2 to store current and best */
468     void *apv_luma_recon_bufs[2];
469 
470     /* 0 to store cur chroma mode recon */
471     /* 1 to store winning independent chroma mode with a single TU's recon */
472     /* 2 to store winning independent chroma mode with 4 TUs' recon */
473     void *apv_chroma_recon_bufs[3];
474 
475     /* The following two arrays are used to store the ID's of the buffers */
476     /* where the winning recon is being stored */
477     /* For Luma buffers, the permissible values are 0, 1 and UCHAR_MAX */
478     /* For Chroma buffers, the permissible values are 0, 1, 2 and UCHAR_MAX */
479     /* The value 'UCHAR_MAX' indicates the absence of Recon for that particular TU */
480     UWORD8 au1_bufId_with_winning_LumaRecon[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
481 
482     /* 2 - 2 Chroma planes */
483     /* 2 - 2 possible subTU's */
484     UWORD8 au1_bufId_with_winning_ChromaRecon[2][MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW][2];
485 
486     WORD32 i4_lumaRecon_stride;
487 
488     WORD32 i4_chromaRecon_stride;
489 
490     UWORD8 au1_is_chromaRecon_available[3];
491 
492     UWORD8 u1_is_lumaRecon_available;
493 
494 } recon_datastore_t;
495 
496 typedef struct enc_loop_cu_final_prms_t
497 {
498     recon_datastore_t s_recon_datastore;
499 
500     /**
501     * Cu size of the current cu being processed
502     */
503     UWORD8 u1_cu_size;
504     /**
505     * flags to indicate the final cu prediction mode
506     */
507     UWORD8 u1_intra_flag;
508 
509     /**
510     * flags to indicate Skip mode for CU
511     */
512     UWORD8 u1_skip_flag;
513 
514     /**
515     * number of tu in current cu for a given mode
516     * if skip then this value should be 1
517     */
518     UWORD16 u2_num_tus_in_cu;
519 
520     /**
521     * number of pu in current cu for a given mode
522     * if skip then this value should be 1
523     */
524     UWORD16 u2_num_pus_in_cu;
525 
526     /**
527     * total bytes produced in ECD data buffer
528     * if skip then this value should be 0
529     */
530     WORD32 i4_num_bytes_ecd_data;
531 
532     /**
533     * Partition mode of the best candidate
534     * if skip then this value should be SIZE_2Nx2N
535     * @sa PART_SIZE_E
536     */
537     UWORD8 u1_part_mode;
538 
539     /**
540     * indicates if inter cu has coded coeffs 1: coded, 0: not coded
541     * if skip then this value shoudl be ignored
542     */
543     UWORD8 u1_is_cu_coded;
544 
545     /**
546     * Chroma pred mode as signalled in bitstream
547     */
548     UWORD8 u1_chroma_intra_pred_mode;
549 
550     /**
551     * To store the best chroma mode for TU. Will be same for NxN case.
552     * Actual Chroma pred
553     */
554     UWORD8 u1_chroma_intra_pred_actual_mode;
555 
556     /**
557     * sad accumulated over all Tus of given CU
558     */
559     UWORD32 u4_cu_sad;
560 
561     /**
562     * sad accumulated over all Tus of given CU
563     */
564     LWORD64 i8_cu_ssd;
565 
566     /**
567     * open loop intra sad
568     */
569     UWORD32 u4_cu_open_intra_sad;
570 
571     /**
572     * header bits of cu estimated during RDO evaluation.
573     * Includes tu splits flags excludes cbf flags
574     */
575     UWORD32 u4_cu_hdr_bits;
576     /**
577     * luma residual bits of a cu estimated during RDO evaluation.
578     */
579     UWORD32 u4_cu_luma_res_bits;
580 
581     /**
582     * chroma residual bits of a cu estimated during RDO evaluation.
583     */
584     UWORD32 u4_cu_chroma_res_bits;
585 
586     /**
587     * cbf bits of a cu estimated during RDO evaluation (considered as part of texture bits later)
588     */
589     UWORD32 u4_cu_cbf_bits;
590 
591     /**
592     * array of PU for current CU
593     * For Inter PUs this will contain the follwoing
594     *   - merge flag
595     *   - (MVD and reference indicies) or (Merge Index)
596     *   - (if Cu is skipped then Merge index for skip
597     *      will be in 1st PU entry in array)
598     * for intra PU only intra flag will be set to 1
599     *
600     */
601     pu_t as_pu_enc_loop[NUM_PU_PARTS];
602 
603     /**
604     * array of PU for chroma usage
605     * in case of Merge MVs and reference idx of the final candidate
606     * used by luma need sto be stored
607     * for intra PU this will not be used
608     */
609     pu_t as_pu_chrm_proc[NUM_PU_PARTS];
610 
611     /**
612     * array of colocated PU for current CU
613     * MV and Ref pic id should be stored in this
614     * for intra PU only intra flag will be set to 1
615     */
616     pu_col_mv_t as_col_pu_enc_loop[NUM_INTER_PU_PARTS];
617 
618     /** array to store the intra mode pred related params
619     * if nxn mode the all 4 lcoations will be used
620     */
621     intra_prev_rem_flags_t as_intra_prev_rem[NUM_PU_PARTS];
622 
623     /**
624     * array to store TU propeties of the each tu in a CU
625     */
626     tu_enc_loop_out_t as_tu_enc_loop[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
627 
628     /**
629     * array to store TU propeties (req. for enc_loop only and not for
630     * entropy) of the each tu in a CU
631     */
632     tu_enc_loop_temp_prms_t as_tu_enc_loop_temp_prms[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
633 
634     /**
635     * Neighbour flags stored for chroma reuse
636     */
637     UWORD32 au4_nbr_flags[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
638 
639     /**
640     * intra pred modes stored for chroma reuse
641     */
642     UWORD8 au1_intra_pred_mode[4];
643 
644     /**
645     * array for storing coeffs during RD opt stage at CU level.
646     * Luma and chroma together
647     */
648     UWORD8 *pu1_cu_coeffs;
649 
650     /**
651     * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
652     */
653     WORD32 i4_chrm_cu_coeff_strt_idx;
654 
655     /**
656     * array for storing dequantized vals. during RD opt stage at CU level
657     * Luma and chroma together.
658     * Stride is assumed to be cu_size
659     * u-v interleaved storing is at TU level
660     */
661     WORD16 *pi2_cu_deq_coeffs;
662 
663     /**
664     * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
665     */
666     WORD32 i4_chrm_deq_coeff_strt_idx;
667 
668     /**
669     * The total RDOPT cost of the CU for the best mode
670     */
671     LWORD64 i8_best_rdopt_cost;
672 
673     /**
674     * The current running RDOPT cost for the current mode
675     */
676     LWORD64 i8_curr_rdopt_cost;
677 
678     LWORD64 i8_best_distortion;
679 
680 } enc_loop_cu_final_prms_t;
681 
682 typedef struct
683 {
684     /** Current Cu chroma recon pointer in pic buffer */
685     UWORD8 *pu1_final_recon;
686 
687     UWORD16 *pu2_final_recon;
688 
689     /** Current Cu chroma source pointer in pic buffer */
690     UWORD8 *pu1_curr_src;
691 
692     UWORD16 *pu2_curr_src;
693 
694     /** Current CU chroma reocn buffer stride */
695     WORD32 i4_chrm_recon_stride;
696 
697     /** Current CU chroma source buffer stride */
698     WORD32 i4_chrm_src_stride;
699 
700     /** Current Cu chroma Left pointer for intra pred */
701     UWORD8 *pu1_cu_left;
702 
703     UWORD16 *pu2_cu_left;
704 
705     /** Left buffer stride */
706     WORD32 i4_cu_left_stride;
707 
708     /** Current Cu chroma top pointer for intra pred */
709     UWORD8 *pu1_cu_top;
710 
711     UWORD16 *pu2_cu_top;
712 
713     /** Current Cu chroma top left pointer for intra pred */
714     UWORD8 *pu1_cu_top_left;
715 
716     UWORD16 *pu2_cu_top_left;
717 
718 } enc_loop_chrm_cu_buf_prms_t;
719 
720 typedef struct
721 {
722     /** cost of the current satd cand */
723     WORD32 i4_cost;
724 
725     /** tu size w.r.t to cu of the current satd cand
726     * @sa TU_SIZE_WRT_CU_T
727     */
728     WORD8 i4_tu_depth;
729 
730     /**
731     *  access valid number of entries in this array based on u1_part_size
732     */
733     UWORD8 au1_intra_luma_modes[NUM_PU_PARTS];
734 
735     /** @remarks u1_part_size 2Nx2N or  NxN  */
736     UWORD8 u1_part_mode; /* @sa: PART_SIZE_E */
737 
738     /** Flag to indicate whether current candidate needs to be evaluated */
739     UWORD8 u1_eval_flag;
740 
741 } cu_intra_satd_out_t;
742 
743 /** \brief cu level parameters for SATD / RDOPT function */
744 
745 typedef struct
746 {
747     /** pointer to source luma pointer
748     *  pointer will be pointing to CTB start location
749     *  At CU level based on the CU position this pointer
750     *  has to appropriately incremented
751     */
752     UWORD8 *pu1_luma_src;
753 
754     UWORD16 *pu2_luma_src;
755 
756     /** pointer to source chroma pointer
757     *  pointer will be pointing to CTB start location
758     *  At CU level based on the CU position this pointer
759     *  has to appropriately incremented
760     */
761     UWORD8 *pu1_chrm_src;
762 
763     UWORD16 *pu2_chrm_src;
764 
765     /** pointer to recon luma pointer
766     *  pointer will be pointing to CTB start location
767     *  At CU level based on the CU position this pointer
768     *  has to appropriately incremented
769     */
770     UWORD8 *pu1_luma_recon;
771 
772     UWORD16 *pu2_luma_recon;
773 
774     /** pointer to recon chroma pointer
775     *  pointer will be pointing to CTB start location
776     *  At CU level based on the CU position this pointer
777     *  has to appropriately incremented
778     */
779     UWORD8 *pu1_chrm_recon;
780 
781     UWORD16 *pu2_chrm_recon;
782 
783     /*1st pass parallel dpb buffer pointers aimilar to the above*/
784     UWORD8 *pu1_luma_recon_src;
785 
786     UWORD16 *pu2_luma_recon_src;
787 
788     UWORD8 *pu1_chrm_recon_src;
789 
790     UWORD16 *pu2_chrm_recon_src;
791 
792     /** Pointer to Subpel Plane Buffer */
793     UWORD8 *pu1_sbpel_hxfy;
794 
795     /** Pointer to Subpel Plane Buffer */
796     UWORD8 *pu1_sbpel_fxhy;
797 
798     /** Pointer to Subpel Plane Buffer */
799     UWORD8 *pu1_sbpel_hxhy;
800 
801     /** Luma source stride */
802     WORD32 i4_luma_src_stride;
803 
804     /** chroma soruce stride */
805     WORD32 i4_chrm_src_stride;
806 
807     /** Luma recon stride */
808     WORD32 i4_luma_recon_stride;
809 
810     /** chroma recon stride */
811     WORD32 i4_chrm_recon_stride;
812 
813     /** ctb size */
814     WORD32 i4_ctb_size;
815 
816     /** current ctb postion horz */
817     WORD32 i4_ctb_pos;
818 
819     /** number of PU finalized for curr CU  */
820     WORD32 i4_num_pus_in_cu;
821 
822     /** number of bytes consumed for current in ecd data buf */
823     WORD32 i4_num_bytes_cons;
824 
825     UWORD8 u1_is_cu_noisy;
826 
827     UWORD8 *pu1_is_8x8Blk_noisy;
828 
829 } enc_loop_cu_prms_t;
830 
831 /**
832 ******************************************************************************
833 *  @brief Pad inter pred recon context
834 ******************************************************************************
835 */
836 typedef struct
837 {
838     /** Pointer to Subpel Plane Buffer */
839     UWORD8 *pu1_sbpel_hxfy;
840 
841     /** Pointer to Subpel Plane Buffer */
842     UWORD8 *pu1_sbpel_fxhy;
843 
844     /** Pointer to Subpel Plane Buffer */
845     UWORD8 *pu1_sbpel_hxhy;
846 
847     /** pointer to recon luma pointer
848     *  pointer will be pointing to CTB start location
849     *  At CU level based on the CU position this pointer
850     *  has to appropriately incremented
851     */
852     UWORD8 *pu1_luma_recon;
853 
854     /** pointer to recon chroma pointer
855     *  pointer will be pointing to CTB start location
856     *  At CU level based on the CU position this pointer
857     *  has to appropriately incremented
858     */
859     UWORD8 *pu1_chrm_recon;
860 
861     /*FOr recon source 1st pass starts*/
862 
863     UWORD8 *pu1_luma_recon_src;
864 
865     /** pointer to recon chroma pointer
866     *  pointer will be pointing to CTB start location
867     *  At CU level based on the CU position this pointer
868     *  has to appropriately incremented
869     */
870     UWORD8 *pu1_chrm_recon_src;
871     /*FOr recon source 1st pass ends */
872     /** Luma recon stride */
873     WORD32 i4_luma_recon_stride;
874 
875     /** chroma recon stride */
876     WORD32 i4_chrm_recon_stride;
877 
878     /** ctb size */
879     WORD32 i4_ctb_size;
880 
881     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
882     UWORD8 u1_chroma_array_type;
883 
884 } pad_interp_recon_frm_t;
885 
886 /**
887 ******************************************************************************
888 *  @brief inter prediction (MC) context for enc loop
889 ******************************************************************************
890 */
891 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
892 typedef struct
893 {
894     /** pointer to reference lists */
895     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
896 
897     /** scratch buffer for horizontal interpolation destination */
898     WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
899 
900     /** scratch 16 bit buffer for interpolation in l0 direction */
901     WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
902 
903     /** scratch 16 bit buffer for interpolation in l1 direction */
904     WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
905 
906     /** Pointer to struct containing function pointers to
907     functions in the 'common' library' */
908     func_selector_t *ps_func_selector;
909 
910     /** common denominator used for luma weights */
911     WORD32 i4_log2_luma_wght_denom;
912 
913     /** common denominator used for chroma weights */
914     WORD32 i4_log2_chroma_wght_denom;
915 
916     /**  offset w.r.t frame start in horz direction (pels) */
917     WORD32 i4_ctb_frm_pos_x;
918 
919     /**  offset w.r.t frame start in vert direction (pels) */
920     WORD32 i4_ctb_frm_pos_y;
921 
922     /* Bit Depth of Input */
923     WORD32 i4_bit_depth;
924 
925     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
926     UWORD8 u1_chroma_array_type;
927 
928     /** weighted_pred_flag      */
929     WORD8 i1_weighted_pred_flag;
930 
931     /** weighted_bipred_flag    */
932     WORD8 i1_weighted_bipred_flag;
933 
934     /** Structure to describe extra CTBs around frame due to search
935     range associated with distributed-mode. Entries are top, left,
936     right and bottom */
937     WORD32 ai4_tile_xtra_pel[4];
938 
939 } inter_pred_ctxt_t;
940 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
941 
942 typedef IV_API_CALL_STATUS_T (*PF_LUMA_INTER_PRED_PU)(
943     void *pv_inter_pred_ctxt,
944     pu_t *ps_pu,
945     void *pv_dst_buf,
946     WORD32 dst_stride,
947     WORD32 i4_flag_inter_pred_source);
948 
949 /**
950 ******************************************************************************
951 *  @brief  Motion predictor context structure
952 ******************************************************************************
953 */
954 typedef struct
955 {
956     /** pointer to reference lists */
957     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
958 
959     /** pointer to the slice header */
960     slice_header_t *ps_slice_hdr;
961 
962     /** pointer to SPS */
963     sps_t *ps_sps;
964 
965     /** CTB x. In CTB unit*/
966     WORD32 i4_ctb_x;
967 
968     /** CTB y. In CTB unit */
969     WORD32 i4_ctb_y;
970 
971     /** Log2 Parallel Merge Level - 2  */
972     WORD32 i4_log2_parallel_merge_level_minus2;
973 
974     /* Number of extra CTBs external to tile due to fetched search-range around Tile */
975     /* TOP, left, right and bottom */
976     WORD32 ai4_tile_xtra_ctb[4];
977 
978 } mv_pred_ctxt_t;
979 
980 /**
981 ******************************************************************************
982 *  @brief  Deblocking and Boundary strength CTB level structure
983 ******************************************************************************
984 */
985 typedef struct
986 {
987     /** Array to store the packed BS values in horizontal direction  */
988     UWORD32 au4_horz_bs[(MAX_CTB_SIZE >> 3) + 1];
989 
990     /** Array to store the packed BS values in vertical direction  */
991     UWORD32 au4_vert_bs[(MAX_CTB_SIZE >> 3) + 1];
992 
993     /** CTB neighbour availability flags for deblocking */
994     UWORD8 u1_not_first_ctb_col_of_frame;
995     UWORD8 u1_not_first_ctb_row_of_frame;
996 
997 } deblk_bs_ctb_ctxt_t;
998 
999 /**
1000 ******************************************************************************
1001 *  @brief  Deblocking and CTB level structure
1002 ******************************************************************************
1003 */
1004 typedef struct
1005 {
1006     /**
1007     * BS of the last vertical 4x4 column of previous CTB
1008     */
1009     UWORD8 au1_prev_bs[MAX_CTB_SIZE >> 3];
1010 
1011     /**
1012     * BS of the last vertical 4x4 column of previous CTB
1013     */
1014     UWORD8 au1_prev_bs_uv[MAX_CTB_SIZE >> 3];
1015 
1016     /** pointer to top 4x4 ctb nbr structure; for accessing qp  */
1017     nbr_4x4_t *ps_top_ctb_nbr_4x4;
1018 
1019     /** pointer to left 4x4 ctb nbr structure; for accessing qp */
1020     nbr_4x4_t *ps_left_ctb_nbr_4x4;
1021 
1022     /** pointer to current 4x4 ctb nbr structure; for accessing qp */
1023     nbr_4x4_t *ps_cur_ctb_4x4;
1024 
1025     /** max of 8 such contiguous bs to be computed for 64x64 ctb */
1026     UWORD32 *pu4_bs_horz;
1027 
1028     /** max of 8 such contiguous bs to be computed for 64x64 ctb */
1029     UWORD32 *pu4_bs_vert;
1030 
1031     /** ptr to current ctb luma pel in frame */
1032     UWORD8 *pu1_ctb_y;
1033 
1034     UWORD16 *pu2_ctb_y;
1035 
1036     /** ptr to current ctb sp interleaved chroma pel in frame */
1037     UWORD8 *pu1_ctb_uv;
1038 
1039     UWORD16 *pu2_ctb_uv;
1040 
1041     func_selector_t *ps_func_selector;
1042 
1043     /** left nbr buffer stride in terms of 4x4 units */
1044     WORD32 i4_left_nbr_4x4_strd;
1045 
1046     /** current  buffer stride in terms of 4x4 units */
1047     WORD32 i4_cur_4x4_strd;
1048 
1049     /** size in pels 16 / 32 /64 */
1050     WORD32 i4_ctb_size;
1051 
1052     /** stride for luma       */
1053     WORD32 i4_luma_pic_stride;
1054 
1055     /** stride for  chroma */
1056     WORD32 i4_chroma_pic_stride;
1057 
1058     /** boolean indicating if left ctb edge is to be deblocked or not */
1059     WORD32 i4_deblock_left_ctb_edge;
1060 
1061     /** boolean indicating if top ctb edge is to be deblocked or not */
1062     WORD32 i4_deblock_top_ctb_edge;
1063 
1064     /** beta offset index */
1065     WORD32 i4_beta_offset_div2;
1066 
1067     /** tc offset index */
1068     WORD32 i4_tc_offset_div2;
1069 
1070     /** chroma cb qp offset index */
1071     WORD32 i4_cb_qp_indx_offset;
1072 
1073     /** chroma cr qp offset index */
1074     WORD32 i4_cr_qp_indx_offset;
1075 
1076     WORD32 i4_bit_depth;
1077 
1078     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
1079     UWORD8 u1_chroma_array_type;
1080 
1081 } deblk_ctb_params_t;
1082 
1083 /**
1084 ******************************************************************************
1085 *  @brief  Stores the BS and Qp of a CTB row. For CTB-row level deblocking
1086 ******************************************************************************
1087 */
1088 typedef struct deblk_ctbrow_prms
1089 {
1090     /**
1091     * Refer to ihevce_enc_loop_get_mem_recs() and
1092     * ihevce_enc_loop_init()for more info
1093     * regarding memory allocation to each one below.
1094     */
1095 
1096     /**
1097     * Stores the vertical boundary strength of a CTB row.
1098     */
1099     UWORD32 *pu4_ctb_row_bs_vert;
1100 
1101     /**
1102     * Storage is same as above. Contains horizontal BS.
1103     */
1104     UWORD32 *pu4_ctb_row_bs_horz;
1105 
1106     /**
1107     * Pointer to the CTB row's Qp storage
1108     */
1109     WORD8 *pi1_ctb_row_qp;
1110 
1111     /**
1112     * Stride of the pu1_ctb_row_qp_p buffer in WORD32 unit
1113     */
1114     WORD32 u4_qp_buffer_stride;
1115 
1116     /*
1117     *   Pointer to the  memory which contains the Qp of
1118     *   top4x4 neighbour blocks for each CTB row.
1119     *   This memory is at frame level.
1120     */
1121     WORD8 *api1_qp_top_4x4_ctb_row[MAX_NUM_ENC_LOOP_PARALLEL];
1122 
1123     /*
1124     *   Stride of the above memory location.
1125     *   Values in one-stride correspondes to one CTB row.
1126     */
1127     WORD32 u4_qp_top_4x4_buf_strd;
1128 
1129     /*size of frm level qp buffer*/
1130     WORD32 u4_qp_top_4x4_buf_size;
1131 
1132 } deblk_ctbrow_prms_t;
1133 
1134 /**
1135 ******************************************************************************
1136 *  @brief  Entropy rd opt context for cabac bit estimation and RDO
1137 ******************************************************************************
1138 */
1139 typedef struct rdopt_entropy_ctxt
1140 {
1141     /**
1142     * array for entropy contexts during RD opt stage at CU level
1143     * one best and one current is required
1144     */
1145     entropy_context_t as_cu_entropy_ctxt[2];
1146 
1147     /**
1148     * init state of entropy context models during CU RD opt stage,
1149     * required for saving and restoring the cabac states
1150     */
1151     UWORD8 au1_init_cabac_ctxt_states[IHEVC_CAB_CTXT_END];
1152 
1153     /*
1154     * ptr to top row cu skip flags (1 bit per 8x8CU)
1155     */
1156     UWORD8 *pu1_cu_skip_top_row;
1157 
1158     /**
1159     * Current entropy ctxt idx
1160     */
1161     WORD32 i4_curr_buf_idx;
1162 
1163 } rdopt_entropy_ctxt_t;
1164 
1165 /**
1166 ******************************************************************************
1167 *  @brief  structure to save predicted data from Inter SATD stage to Inter RD opt stage
1168 ******************************************************************************
1169 */
1170 typedef struct
1171 {
1172     /*Buffer to store the predicted data after motion compensation for merge and
1173     * skip candidates.
1174     * [2] Because for a given candidate we do motion compensation for 5 merge candidates.
1175     *     store the pred data after mc for the first 2 candidates and from 3rd candidate
1176     *     onwards, overwrite the data which has higher SATD cost.
1177     */
1178     void *apv_pred_data[2];
1179 
1180     /** Stride to store the predicted data
1181     */
1182     WORD32 i4_pred_data_stride;
1183 
1184 } merge_skip_pred_data_t;
1185 /**
1186 ******************************************************************************
1187 *  @brief  Structure to hold Rate control related parameters
1188 *          for each bit-rate instance and each thread
1189 ******************************************************************************
1190 */
1191 typedef struct
1192 {
1193     /**
1194     *frame level open loop intra sad
1195     *
1196     */
1197     LWORD64 i8_frame_open_loop_ssd;
1198 
1199     /**
1200     *frame level open loop intra sad
1201     *
1202     */
1203     UWORD32 u4_frame_open_loop_intra_sad;
1204     /**
1205     * frame level intra sad accumulator
1206     */
1207     UWORD32 u4_frame_intra_sad;
1208 
1209     /**
1210     *  frame level sad accumulator
1211     */
1212     UWORD32 u4_frame_sad_acc;
1213 
1214     /**
1215     *  frame level intra sad accumulator
1216     */
1217     UWORD32 u4_frame_inter_sad_acc;
1218 
1219     /**
1220     *  frame level inter sad accumulator
1221     */
1222     UWORD32 u4_frame_intra_sad_acc;
1223 
1224     /**
1225     *  frame level cost accumulator
1226     */
1227     LWORD64 i8_frame_cost_acc;
1228 
1229     /**
1230     *  frame level intra cost accumulator
1231     */
1232     LWORD64 i8_frame_inter_cost_acc;
1233 
1234     /**
1235     *  frame level inter cost accumulator
1236     */
1237     LWORD64 i8_frame_intra_cost_acc;
1238 
1239     /**
1240     * frame level rdopt bits accumulator
1241     */
1242     UWORD32 u4_frame_rdopt_bits;
1243 
1244     /**
1245     * frame level rdopt header bits accumulator
1246     */
1247     UWORD32 u4_frame_rdopt_header_bits;
1248 
1249     /* Sum the Qps of each 8*8 block in CU
1250     * 8*8 block is considered as Min CU size possible as per standard is 8
1251     * 0 corresponds to INTER and 1 corresponds to INTRA
1252     */
1253     WORD32 i4_qp_normalized_8x8_cu_sum[2];
1254 
1255     /* Count the number of 8x8 blocks in each CU type (INTER/INTRA)
1256     * 0 corresponds to INTER and 1 corresponds to INTRA
1257     */
1258     WORD32 i4_8x8_cu_sum[2];
1259 
1260     /* SAD/Qscale accumulated over all CUs. CU size is inherently
1261     * taken care in SAD
1262     */
1263     LWORD64 i8_sad_by_qscale[2];
1264 
1265 } enc_loop_rc_params_t;
1266 /**
1267 ******************************************************************************
1268 *  @brief  CU information structure. This is to store the
1269 *  CU final out after Recursion
1270 ******************************************************************************
1271 */
1272 typedef struct ihevce_enc_cu_node_ctxt_t
1273 {
1274     /* CU params */
1275     /** CU X position in terms of min CU (8x8) units */
1276     UWORD8 b3_cu_pos_x : 3;
1277 
1278     /** CU Y position in terms of min CU (8x8) units */
1279     UWORD8 b3_cu_pos_y : 3;
1280 
1281     /** reserved bytes */
1282     UWORD8 b2_reserved : 2;
1283 
1284     /** CU size 2N (width or height) in pixels */
1285     UWORD8 u1_cu_size;
1286 
1287     /**
1288     * array for storing cu level final params for a given mode
1289     * one best and one current is required
1290     */
1291     enc_loop_cu_final_prms_t s_cu_prms;
1292 
1293     /**
1294     * array for storing cu level final params for a given mode
1295     * one best and one current is required
1296     */
1297     enc_loop_cu_final_prms_t *ps_cu_prms;
1298 
1299     /* flag to indicate if current CU is the first
1300     CU of the Quantisation group*/
1301     UWORD32 b1_first_cu_in_qg : 1;
1302 
1303     /** qp used during for CU
1304     * @remarks :
1305     */
1306     WORD8 i1_cu_qp;
1307 
1308 } ihevce_enc_cu_node_ctxt_t;
1309 
1310 typedef struct
1311 {
1312     WORD32 i4_sad;
1313 
1314     WORD32 i4_mv_cost;
1315 
1316     WORD32 i4_tot_cost;
1317 
1318     WORD8 i1_ref_idx;
1319 
1320     mv_t s_mv;
1321 
1322 } block_merge_nodes_t;
1323 
1324 /**
1325 ******************************************************************************
1326 *  @brief  This struct is used for storing output of block merge
1327 ******************************************************************************
1328 */
1329 typedef struct
1330 {
1331     block_merge_nodes_t *aps_best_results[MAX_NUM_PARTS];
1332 
1333     /* Contains the best uni dir for each partition type */
1334     WORD32 ai4_best_uni_dir[MAX_NUM_PARTS];
1335 
1336     /* Contains the best pred dir for each partition type */
1337     WORD32 ai4_best_pred_dir[MAX_NUM_PARTS];
1338 
1339     WORD32 i4_tot_cost;
1340 
1341     PART_TYPE_T e_part_type;
1342 } block_merge_results_t;
1343 
1344 /**
1345 ******************************************************************************
1346 *  @brief  This struct is used for storing output of block merge and also
1347 *          all of the intermediate results required
1348 ******************************************************************************
1349 */
1350 typedef struct
1351 {
1352     block_merge_results_t as_best_results[3 + 1][NUM_BEST_ME_OUTPUTS];
1353 
1354     block_merge_nodes_t as_nodes[3][TOT_NUM_PARTS][NUM_BEST_ME_OUTPUTS];
1355 
1356     WORD32 part_mask;
1357 
1358     WORD32 num_results_per_part;
1359 
1360     WORD32 num_best_results;
1361 
1362     /**
1363     * Overall best CU cost, while other entries store CU costs
1364     * in single direction, this is best CU cost, where each
1365     * partition cost is evaluated as best of uni/bi
1366     */
1367     WORD32 best_cu_cost;
1368 
1369 } block_merge_data_t;
1370 /**
1371 ******************************************************************************
1372 *  @brief  CU nbr information structure. This is to store the
1373 *  neighbour information for final reconstruction function
1374 ******************************************************************************
1375 */
1376 typedef struct
1377 {
1378     /* Pointer to top-left nbr */
1379     nbr_4x4_t *ps_topleft_nbr_4x4;
1380     /* Pointer to left nbr */
1381     nbr_4x4_t *ps_left_nbr_4x4;
1382     /* Pointer to top nbr */
1383     nbr_4x4_t *ps_top_nbr_4x4;
1384     /* stride of left_nbr_4x4 */
1385     WORD32 nbr_4x4_left_strd;
1386 
1387     /* Pointer to CU top */
1388     UWORD8 *pu1_cu_top;
1389 
1390     UWORD16 *pu2_cu_top;
1391 
1392     /* Pointer to CU top-left */
1393     UWORD8 *pu1_cu_top_left;
1394 
1395     UWORD16 *pu2_cu_top_left;
1396 
1397     /* Pointer to CU left */
1398     UWORD8 *pu1_cu_left;
1399 
1400     UWORD16 *pu2_cu_left;
1401 
1402     /* stride of left pointer */
1403     WORD32 cu_left_stride;
1404 } cu_nbr_prms_t;
1405 
1406 /** Structure to save the flags required for Final mode Reconstruction
1407 function. These flags are set based on quality presets and
1408 the bit-rate we are working on */
1409 typedef struct
1410 {
1411     /** Flag to indicate whether Luma pred data need to recomputed in the
1412     final_recon function. Now disabled for all modes */
1413     UWORD8 u1_eval_luma_pred_data;
1414 
1415     /** Flag to indicate whether Chroma pred data need to recomputed in the
1416     final_recon function. Now disabled for MedSpeed only */
1417     UWORD8 u1_eval_chroma_pred_data;
1418 
1419     /** Flag to indicate whether header data need to recomputed in the
1420     final_recon function. Now disabled for all modes */
1421     UWORD8 u1_eval_header_data;
1422 
1423     UWORD8 u1_eval_recon_data;
1424 } cu_final_recon_flags_t;
1425 
1426 /**
1427 ******************************************************************************
1428 *  @brief  structure to save pred data of ME cand. 1 ping-pong to store the
1429 *  the best and current luma cand. 1 buffer to store the best chroma pred
1430 ******************************************************************************
1431 */
1432 typedef struct
1433 {
1434     /** Pointers to store luma pred data of me/intra cand.(2) and chroma(1) */
1435     UWORD8 *pu1_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
1436 
1437     UWORD16 *pu2_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
1438 
1439     /** Stride to store the predicted data of me/intra cand.(2) and chroma(1) */
1440     WORD32 ai4_pred_data_stride[NUM_CU_ME_INTRA_PRED_IDX];
1441     /** Counter saying how many pointers are assigned */
1442     WORD32 i4_pointer_count;
1443 
1444 } cu_me_intra_pred_prms_t;
1445 
1446 /**
1447 ******************************************************************************
1448 *  @brief  Chroma RDOPT context structure
1449 ******************************************************************************
1450 */
1451 typedef struct
1452 {
1453     /** Storing the inverse quantized data (cb) for the special modes*/
1454     WORD16 ai2_iq_data_cb[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
1455 
1456     /** Storing the inverse quantized data (cr) for the special modes*/
1457     WORD16 ai2_iq_data_cr[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
1458 
1459     /** Storing the scan coeffs (cb) for the special modes*/
1460     UWORD8 au1_scan_coeff_cb[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
1461 
1462     /** Storing the scan coeffs (cb) for the special modes*/
1463     UWORD8 au1_scan_coeff_cr[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
1464 
1465     /** Max number of bytes filled in scan coeff data (cb) per TU*/
1466     WORD32 ai4_num_bytes_scan_coeff_cb_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
1467 
1468     /** Max number of bytes filled in scan coeff data (cr) per TU*/
1469     WORD32 ai4_num_bytes_scan_coeff_cr_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
1470 
1471     /** Stride of the iq buffer*/
1472     WORD32 i4_iq_buff_stride;
1473 
1474     /** Storing the pred data
1475     The predicted data is always interleaved. Therefore the size of this array will be
1476     ((MAX_TU_SIZE * MAX_TU_SIZE) >> 2) * 2)*/
1477     void *pv_pred_data;
1478 
1479     /** Predicted data stride*/
1480     WORD32 i4_pred_stride;
1481 
1482     /** Storing the cbfs for each tu
1483     For 1 tu case, only the 0th element will be valid*/
1484     UWORD8 au1_cbf_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1485 
1486     /** Storing the cbfs for each tu
1487     For 1 tu case, only the 0th element will be valid*/
1488     UWORD8 au1_cbf_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1489 
1490     /** To store the cabac ctxt model updated by the RDOPT of best chroma mode
1491     [0] : for 1 TU case, [1] : for 4 TU case */
1492     UWORD8 au1_chrm_satd_updated_ctxt_models[IHEVC_CAB_CTXT_END];
1493 
1494     /** Best SATD chroma mode, [0] : for 1 TU case (TU_EQ_CU) , [1] : for 4 TU case
1495     Values : 0(PLANAR), 1(VERT), 2(HOR), 3(DC) chroma mode per each TU */
1496     UWORD8 u1_best_cr_mode;
1497 
1498     /** Best SATD chroma mode's RDOPT cost, [0] : for 1 TU case, [1] : for 4 TU case */
1499     LWORD64 i8_chroma_best_rdopt;
1500 
1501     /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
1502     /* This is done by adding the bits for signalling chroma mode (0-3)    */
1503     /* and subtracting the bits for chroma mode same as luma mode (4)      */
1504     LWORD64 i8_cost_to_encode_chroma_mode;
1505 
1506     /** Best SATD chroma mode's tu bits, [0] : for 1 TU case, [1] : for 4 TU case */
1507     WORD32 i4_chrm_tu_bits;
1508 
1509     /** Storing the zero col values for each TU for cb*/
1510     WORD32 ai4_zero_col_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1511 
1512     /** Storing the zero col values for each TU for cr*/
1513     WORD32 ai4_zero_col_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1514 
1515     /** Storing the zero row values for each TU for cb*/
1516     WORD32 ai4_zero_row_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1517 
1518     /** Storing the zero row values for each TU for cr*/
1519     WORD32 ai4_zero_row_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1520 } chroma_intra_satd_ctxt_t;
1521 
1522 /**
1523 ******************************************************************************
1524 *  @brief  Chroma RDOPT context structure
1525 ******************************************************************************
1526 */
1527 typedef struct
1528 {
1529     /** Chroma SATD context structure. It is an array of two to account for the TU_EQ_CU candidate
1530     and the TU_EQ_CU_DIV2 candidate*/
1531     chroma_intra_satd_ctxt_t as_chr_intra_satd_ctxt[NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD];
1532 
1533     /** Chroma SATD has has to be evaluated only for the HIGH QUALITY */
1534     UWORD8 u1_eval_chrm_satd;
1535 
1536     /** Chroma RDOPT has to be evaluated only for the HIGH QUALITY / MEDIUM SPEED preset */
1537     UWORD8 u1_eval_chrm_rdopt;
1538 
1539 } ihevce_chroma_rdopt_ctxt_t;
1540 
1541 typedef struct
1542 {
1543     inter_cu_results_t s_cu_results;
1544 
1545     inter_pu_results_t s_pu_results;
1546 } block_merge_output_t;
1547 
1548 /**
1549 ******************************************************************************
1550 *  @brief  Structure to store the Merge/Skip Cand. for EncLoop
1551 ******************************************************************************
1552 */
1553 typedef struct
1554 {
1555     /** List of all  merge/skip candidates to be evalauted (SATD/RDOPT) for
1556     * this CU
1557     */
1558     cu_inter_cand_t as_cu_inter_merge_skip_cand[MAX_NUM_CU_MERGE_SKIP_CAND];
1559 
1560     /** number of merge/skip candidates
1561     */
1562     UWORD8 u1_num_merge_cands;
1563 
1564     UWORD8 u1_num_skip_cands;
1565 
1566     UWORD8 u1_num_merge_skip_cands;
1567 
1568 } cu_inter_merge_skip_t;
1569 
1570 /** Structure to store the Mixed mode Cand. for EncLoop */
1571 typedef struct
1572 {
1573     cu_inter_cand_t as_cu_data[MAX_NUM_MIXED_MODE_INTER_RDO_CANDS];
1574 
1575     UWORD8 u1_num_mixed_mode_type0_cands;
1576 
1577     UWORD8 u1_num_mixed_mode_type1_cands;
1578 
1579 } cu_mixed_mode_inter_t;
1580 
1581 typedef struct
1582 {
1583     /* +2 because an additional buffer is required for */
1584     /* storing both cur and best during merge eval */
1585     void *apv_inter_pred_data[MAX_NUM_INTER_RDO_CANDS + 4];
1586 
1587     /* Bit field used to determine the indices of free bufs in 'apv_pred_data' buf array */
1588     UWORD32 u4_is_buf_in_use;
1589 
1590     /* Assumption is that the same stride is used for the */
1591     /* entire set of buffers above and is equal to the */
1592     /* CU size */
1593     WORD32 i4_pred_stride;
1594 
1595 } ihevce_inter_pred_buf_data_t;
1596 /** Structure to store the Inter Cand. info in EncLoop */
1597 typedef struct
1598 {
1599     cu_inter_cand_t *aps_cu_data[MAX_NUM_INTER_RDO_CANDS];
1600 
1601     UWORD32 au4_cost[MAX_NUM_INTER_RDO_CANDS];
1602 
1603     UWORD8 au1_pred_buf_idx[MAX_NUM_INTER_RDO_CANDS];
1604 
1605     UWORD32 u4_src_variance;
1606 
1607     UWORD8 u1_idx_of_worst_cost_in_cost_array;
1608 
1609     UWORD8 u1_idx_of_worst_cost_in_pred_buf_array;
1610 
1611     UWORD8 u1_num_inter_cands;
1612 
1613 } inter_cu_mode_info_t;
1614 typedef struct
1615 {
1616     /*Frame level base pointer of buffers for each ctb row to store the top pixels
1617     *and top left pixel for the next ctb row.These buffers are common accross all threads
1618     */
1619     UWORD8 *apu1_sao_src_frm_top_luma[MAX_NUM_ENC_LOOP_PARALLEL];
1620     /*Ctb level pointer to buffer to store the top pixels
1621     *and top left pixel for the next ctb row.These buffers are common accross all threads
1622     */
1623     UWORD8 *pu1_curr_sao_src_top_luma;
1624     /*Buffer to store the left boundary before
1625     * doing sao on current ctb for the next ctb in the current row
1626     */
1627     UWORD8 au1_sao_src_left_luma[MAX_CTB_SIZE];
1628     /*Frame level base pointer of buffers for each ctb row to store the top pixels
1629     *and top left pixel for the next ctb row.These buffers are common accross all threads
1630     */
1631     UWORD8 *apu1_sao_src_frm_top_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
1632 
1633     WORD32 i4_frm_top_chroma_buf_stride;
1634 
1635     /*Ctb level pointer to buffer to store the top chroma pixels
1636     *and top left pixel for the next ctb row.These buffers are common accross all threads
1637     */
1638     UWORD8 *pu1_curr_sao_src_top_chroma;
1639 
1640     /*Scratch buffer to store the left boundary before
1641     * doing sao on current ctb for the next ctb in the current row
1642     */
1643     UWORD8 au1_sao_src_left_chroma[MAX_CTB_SIZE * 2];
1644 
1645     /**
1646     * Luma recon buffer
1647     */
1648     UWORD8 *pu1_frm_luma_recon_buf;
1649     /**
1650     * Chroma recon buffer
1651     */
1652     UWORD8 *pu1_frm_chroma_recon_buf;
1653     /**
1654     * Luma recon buffer for curr ctb
1655     */
1656     UWORD8 *pu1_cur_luma_recon_buf;
1657     /**
1658     * Chroma recon buffer for curr ctb
1659     */
1660     UWORD8 *pu1_cur_chroma_recon_buf;
1661     /**
1662     * Luma src buffer
1663     */
1664     UWORD8 *pu1_frm_luma_src_buf;
1665     /**
1666     * Chroma src buffer
1667     */
1668     UWORD8 *pu1_frm_chroma_src_buf;
1669     /**
1670     * Luma src(input yuv) buffer for curr ctb
1671     */
1672     UWORD8 *pu1_cur_luma_src_buf;
1673     /**
1674     * Chroma src buffer for curr ctb
1675     */
1676     UWORD8 *pu1_cur_chroma_src_buf;
1677     /* Left luma scratch buffer required for sao RD optimisation*/
1678     UWORD8 au1_left_luma_scratch[MAX_CTB_SIZE];
1679 
1680     /* Left chroma scratch buffer required for sao RD optimisation*/
1681     /* Min size required= MAX_CTB_SIZE/2 * 2
1682     * Multiplied by 2 because size reuired is MAX_CTB_SIZE/2 each for U and V
1683     */
1684     UWORD8 au1_left_chroma_scratch[MAX_CTB_SIZE * 2];
1685 
1686     /* Top luma scratch buffer required for sao RD optimisation*/
1687     UWORD8 au1_top_luma_scratch[MAX_CTB_SIZE + 2];  // +1 for top left pixel and +1 for top right
1688 
1689     /* Top chroma scratch buffer required for sao RD optimisation*/
1690     UWORD8 au1_top_chroma_scratch[MAX_CTB_SIZE + 4];  // +2 for top left pixel and +2 for top right
1691 
1692     /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
1693     /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
1694     MAX_CTB _SIZE + 4*/
1695     UWORD8 au1_sao_luma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
1696 
1697     /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
1698     /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
1699     MAX_CTB _SIZE + 4*/
1700     UWORD8 au1_sao_chroma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
1701 
1702     /**
1703     * CTB size
1704     */
1705     WORD32 i4_ctb_size;
1706     /**
1707     * Luma recon buffer stride
1708     */
1709     WORD32 i4_frm_luma_recon_stride;
1710     /**
1711     * Chroma recon buffer stride
1712     */
1713     WORD32 i4_frm_chroma_recon_stride;
1714     /**
1715     * Luma recon buffer stride for curr ctb
1716     */
1717     WORD32 i4_cur_luma_recon_stride;
1718     /**
1719     * Chroma recon buffer stride for curr ctb
1720     */
1721     WORD32 i4_cur_chroma_recon_stride;
1722     /**
1723     * Luma src buffer stride
1724     */
1725     WORD32 i4_frm_luma_src_stride;
1726     /**
1727     * Chroma src buffer stride
1728     */
1729     WORD32 i4_frm_chroma_src_stride;
1730 
1731     WORD32 i4_frm_top_luma_buf_stride;
1732     /**
1733     * Luma src buffer stride for curr ctb
1734     */
1735     WORD32 i4_cur_luma_src_stride;
1736     /**
1737     * Chroma src buffer stride for curr ctb
1738     */
1739     WORD32 i4_cur_chroma_src_stride;
1740 
1741     /* Top luma buffer size */
1742     WORD32 i4_top_luma_buf_size;
1743 
1744     /* Top Chroma buffer size */
1745     WORD32 i4_top_chroma_buf_size;
1746 
1747     /*** Number of CTB units **/
1748     WORD32 i4_num_ctb_units;
1749 
1750     /**
1751     * CTB x pos
1752     */
1753     WORD32 i4_ctb_x;
1754     /**
1755     * CTB y pos
1756     */
1757     WORD32 i4_ctb_y;
1758     /* SAO block width*/
1759     WORD32 i4_sao_blk_wd;
1760 
1761     /* SAO block height*/
1762     WORD32 i4_sao_blk_ht;
1763 
1764     /* Last ctb row flag*/
1765     WORD32 i4_is_last_ctb_row;
1766 
1767     /* Last ctb col flag*/
1768     WORD32 i4_is_last_ctb_col;
1769 
1770     /* CTB aligned width */
1771     UWORD32 u4_ctb_aligned_wd;
1772 
1773     /* Number of ctbs in a row*/
1774     UWORD32 u4_num_ctbs_horz;
1775 
1776     UWORD32 u4_num_ctbs_vert;
1777     /**
1778     * Closed loop SSD Lambda
1779     * This is multiplied with bits for RD cost computations in SSD mode
1780     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1781     */
1782     LWORD64 i8_cl_ssd_lambda_qf;
1783 
1784     /**
1785     * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
1786     * This is multiplied with bits for RD cost computations in SSD mode
1787     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1788     */
1789     LWORD64 i8_cl_ssd_lambda_chroma_qf;
1790     /**
1791     * Pointer to current PPS
1792     */
1793     pps_t *ps_pps;  //not used currently
1794     /**
1795     * Pointer to current SPS
1796     */
1797     sps_t *ps_sps;
1798 
1799     /**
1800     * Pointer to current slice header structure
1801     */
1802     slice_header_t *ps_slice_hdr;
1803     /**
1804     * Pointer to current frame ctb out array of structures
1805     */
1806     ctb_enc_loop_out_t *ps_ctb_out;
1807     /**
1808     *  context for cabac bit estimation used during rdopt stage
1809     */
1810     rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt;
1811     /**
1812     * Pointer to sao_enc_t for the current ctb
1813     */
1814     sao_enc_t *ps_sao;
1815     /*
1816     * Pointer to an array to store the sao information of the top ctb
1817     * This is required for to decide top merge
1818     */
1819     sao_enc_t *aps_frm_top_ctb_sao[MAX_NUM_ENC_LOOP_PARALLEL];
1820 
1821     /*
1822     * Pointer to structure to store the sao parameters of (x,y)th ctb
1823     * for top merge of (x,y+1)th ctb
1824     */
1825     sao_enc_t *ps_top_ctb_sao;
1826 
1827     /* structure to store the sao parameters of (x,y)th ctb for
1828     * the left merge of (x+1,y)th ctb
1829     */
1830     sao_enc_t s_left_ctb_sao;
1831 
1832     /* Array of structures for SAO RDO candidates*/
1833     sao_enc_t as_sao_rd_cand[MAX_SAO_RD_CAND];
1834 
1835     /** array of function pointers for luma sao */
1836     pf_sao_luma apf_sao_luma[4];
1837 
1838     /** array of function pointers for chroma sao */
1839     pf_sao_chroma apf_sao_chroma[4];
1840 
1841     /* Flag to do SAO luma and chroma filtering*/
1842     WORD8 i1_slice_sao_luma_flag;
1843 
1844     WORD8 i1_slice_sao_chroma_flag;
1845 
1846 #if DISABLE_SAO_WHEN_NOISY
1847     ctb_analyse_t *ps_ctb_data;
1848 
1849     WORD32 i4_ctb_data_stride;
1850 #endif
1851 
1852     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
1853 
1854 } sao_ctxt_t;
1855 
1856 /**
1857 ******************************************************************************
1858 *  @brief  Encode loop module context structure
1859 ******************************************************************************
1860 */
1861 typedef struct
1862 {
1863 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
1864     void *pv_err_func_selector;
1865 #endif
1866 
1867     /**
1868     * Quality preset for comtrolling numbe of RD opt cand
1869     * @sa : IHEVCE_QUALITY_CONFIG_T
1870     */
1871     WORD32 i4_quality_preset;
1872     /**
1873     *
1874     *
1875     */
1876     WORD32 i4_rc_pass;
1877     /**
1878     * Lamda to be mulitplied with bits for SATD
1879     * should be equal to Lamda*Qp
1880     */
1881     WORD32 i4_satd_lamda;
1882 
1883     /**
1884     * Lamda to be mulitplied with bits for SAD
1885     * should be equal to Lamda*Qp
1886     */
1887     WORD32 i4_sad_lamda;
1888 
1889     /**
1890     * Closed loop SSD Lambda
1891     * This is multiplied with bits for RD cost computations in SSD mode
1892     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1893     */
1894     LWORD64 i8_cl_ssd_lambda_qf;
1895 
1896     /**
1897     * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
1898     * This is multiplied with bits for RD cost computations in SSD mode
1899     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1900     */
1901     LWORD64 i8_cl_ssd_lambda_chroma_qf;
1902 
1903     /**
1904     * Ratio of Closed loop SSD Lambda and Closed loop SSD Lambda for chroma
1905     * This is multiplied with (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)
1906     * to keep the precision of the ratio
1907     */
1908     UWORD32 u4_chroma_cost_weighing_factor;
1909     /**
1910     * Frame level QP to be used
1911     */
1912     WORD32 i4_frame_qp;
1913 
1914     WORD32 i4_frame_mod_qp;
1915 
1916     WORD32 i4_frame_qstep;
1917 
1918     UWORD8 u1_max_tr_depth;
1919 
1920     /**
1921     * CU level Qp
1922     */
1923     WORD32 i4_cu_qp;
1924 
1925     /**
1926     * CU level Qp / 6
1927     */
1928     WORD32 i4_cu_qp_div6;
1929 
1930     /**
1931     * CU level Qp % 6
1932     */
1933     WORD32 i4_cu_qp_mod6;
1934 
1935     /**
1936     *  CU level QP to be used
1937     */
1938     WORD32 i4_chrm_cu_qp;
1939 
1940     /**
1941     * CU level Qp / 6
1942     */
1943     WORD32 i4_chrm_cu_qp_div6;
1944 
1945     /**
1946     * CU level Qp % 6
1947     */
1948     WORD32 i4_chrm_cu_qp_mod6;
1949 
1950     /** previous cu qp
1951     * @remarks : This needs to be remembered to handle skip cases in deblocking.
1952     */
1953     WORD32 i4_prev_cu_qp;
1954 
1955     /** chroma qp offset
1956     * @remarks : Used to calculate chroma qp and other qp related parameter at CU level
1957     */
1958     WORD32 i4_chroma_qp_offset;
1959 
1960     /**
1961     * Buffer Pointer to populate the scale matrix for all transform size
1962     */
1963     WORD16 *pi2_scal_mat;
1964 
1965     /**
1966     * Buffer Pointer to populate the rescale matrix for all transform size
1967     */
1968     WORD16 *pi2_rescal_mat;
1969 
1970     /** array of pointer to store the scaling matrices for
1971     *  all transform sizes and qp % 6 (pre computed)
1972     */
1973     WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
1974 
1975     /** array of pointer to store the re-scaling matrices for
1976     *  all transform sizes and qp % 6 (pre computed)
1977     */
1978     WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
1979 
1980     /** array of function pointers for residual and
1981     *  forward transform for all transform sizes
1982     */
1983     pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
1984 
1985     /** array of function pointers for residual and
1986     *  forward HAD transform for all transform sizes
1987     */
1988     pf_res_trans_luma_had_chroma apf_chrm_resd_trns_had[NUM_TRANS_TYPES - 2];
1989 
1990     /** array of function pointers for residual and
1991     *  forward transform for all transform sizes
1992     *  for chroma
1993     */
1994     pf_res_trans_chroma apf_chrm_resd_trns[NUM_TRANS_TYPES - 2];
1995 
1996     /** array of function pointers for qunatization and
1997     *  inv Quant for ssd calc. for all transform sizes
1998     */
1999     pf_quant_iquant_ssd apf_quant_iquant_ssd[4];
2000 
2001     /** array of function pointers for inv.transform and
2002     *  recon for all transform sizes
2003     */
2004     pf_it_recon apf_it_recon[NUM_TRANS_TYPES];
2005 
2006     /** array of function pointers for inverse transform
2007     * and recon for all transform sizes for chroma
2008     */
2009     pf_it_recon_chroma apf_chrm_it_recon[NUM_TRANS_TYPES - 2];
2010 
2011     /** array of luma intra prediction function pointers */
2012     pf_intra_pred apf_lum_ip[NUM_IP_FUNCS];
2013 
2014     /** array of chroma intra prediction function pointers */
2015     pf_intra_pred apf_chrm_ip[NUM_IP_FUNCS];
2016 
2017     /* - Function pointer to cu_mode_decide function */
2018     /* - The 'void *' is used since one of the parameters of */
2019     /* this class of functions is the current structure */
2020     /* - This function pointer is used to choose the */
2021     /* appropriate function depending on whether bit_depth is */
2022     /* chosen as 8 bits or greater */
2023     /* - This function pointer's type is defined at the end */
2024     /* of this file */
2025     void *pv_cu_mode_decide;
2026 
2027     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2028     void *pv_inter_rdopt_cu_mc_mvp;
2029 
2030     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2031     void *pv_inter_rdopt_cu_ntu;
2032 
2033     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2034     void *pv_intra_chroma_pred_mode_selector;
2035 
2036     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2037     void *pv_intra_rdopt_cu_ntu;
2038 
2039     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2040     void *pv_final_rdopt_mode_prcs;
2041 
2042     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2043     void *pv_store_cu_results;
2044 
2045     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2046     void *pv_enc_loop_cu_bot_copy;
2047 
2048     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2049     void *pv_final_mode_reevaluation_with_modified_cu_qp;
2050 
2051     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2052     void *pv_enc_loop_ctb_left_copy;
2053 
2054     /** Qunatization rounding factor for inter and intra CUs */
2055     WORD32 i4_quant_rnd_factor[2];
2056 
2057     /**
2058     * Frame Buffer Pointer to store the top row luma data.
2059     * one pixel row in every ctb row
2060     */
2061     void *apv_frm_top_row_luma[MAX_NUM_ENC_LOOP_PARALLEL];
2062 
2063     /**
2064     * One CTB row size of Top row luma data buffer
2065     */
2066     WORD32 i4_top_row_luma_stride;
2067 
2068     /**
2069     * One frm of Top row luma data buffer
2070     */
2071     WORD32 i4_frm_top_row_luma_size;
2072 
2073     /**
2074     * Current luma row bottom data store pointer
2075     */
2076     void *pv_bot_row_luma;
2077 
2078     /**
2079     * Top luma row top data access pointer
2080     */
2081     void *pv_top_row_luma;
2082 
2083     /**
2084     * Frame Buffer Pointer to store the top row chroma data (Cb  Cr pixel interleaved )
2085     * one pixel row in every ctb row
2086     */
2087     void *apv_frm_top_row_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
2088 
2089     /**
2090     * One CTB row size of Top row chroma data buffer (Cb  Cr pixel interleaved )
2091     */
2092     WORD32 i4_top_row_chroma_stride;
2093 
2094     /**
2095     * One frm size of Top row chroma data buffer (Cb  Cr pixel interleaved )
2096     */
2097     WORD32 i4_frm_top_row_chroma_size;
2098 
2099     /**
2100     * Current chroma row bottom data store pointer
2101     */
2102     void *pv_bot_row_chroma;
2103 
2104     /**
2105     * Top chroma row top data access pointer
2106     */
2107     void *pv_top_row_chroma;
2108 
2109     /**
2110     * Frame Buffer Pointer to store the top row neighbour modes stored at 4x4 level
2111     * one 4x4 row in every ctb row
2112     */
2113     nbr_4x4_t *aps_frm_top_row_nbr[MAX_NUM_ENC_LOOP_PARALLEL];
2114 
2115     /**
2116     * One CTB row size of Top row nbr 4x4 params buffer
2117     */
2118     WORD32 i4_top_row_nbr_stride;
2119 
2120     /**
2121     * One frm size of Top row nbr 4x4 params buffer
2122     */
2123     WORD32 i4_frm_top_row_nbr_size;
2124 
2125     /**
2126     * Current row nbr prms bottom data store pointer
2127     */
2128     nbr_4x4_t *ps_bot_row_nbr;
2129 
2130     /**
2131     * Top row nbr prms top data access pointer
2132     */
2133     nbr_4x4_t *ps_top_row_nbr;
2134 
2135     /**
2136     * Pointer to (1,1) location in au1_nbr_ctb_map
2137     */
2138     UWORD8 *pu1_ctb_nbr_map;
2139 
2140     /**
2141     * neigbour map buffer stride;
2142     */
2143     WORD32 i4_nbr_map_strd;
2144 
2145     /**
2146     * Array at ctb level to store the neighour map
2147     * its size is 25x25 for ctb size of 64x64
2148     */
2149     UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
2150 
2151     /**
2152     * Array to store left ctb data for luma
2153     * some padding is added to take care of unconditional access
2154     */
2155     void *pv_left_luma_data;
2156 
2157     /**
2158     * Array to store left ctb data for chroma (cb abd cr pixel interleaved
2159     * some padding is added to take care of unconditional access
2160     */
2161     void *pv_left_chrm_data;
2162 
2163     /**
2164     * Array to store the left neighbour modes at 4x4 level
2165     */
2166     nbr_4x4_t as_left_col_nbr[MAX_PU_IN_CTB_ROW];
2167 
2168     /**
2169     * Array to store currrent CTb pred modes at a 4x4 level
2170     * used for prediction inside ctb
2171     */
2172     nbr_4x4_t as_ctb_nbr_arr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2173 
2174     /**
2175     * array for storing csbf during RD opt stage at CU level
2176     * one best and one current is required
2177     */
2178     UWORD8 au1_cu_csbf[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
2179 
2180     /**
2181     * Stride of csbf buffer. will be useful for scanning access
2182     * if stored in a 2D order. right now set to max tx size >> 4;
2183     */
2184     WORD32 i4_cu_csbf_strd;
2185 
2186     /**
2187     * Array to store pred modes  during SATD and RD opt stage at CU level
2188     * one best and one current is required
2189     */
2190     nbr_4x4_t as_cu_nbr[2][MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2191 
2192     /**
2193     * array to store the output of reference substitution process output
2194     * for intra CUs
2195     * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
2196     */
2197     void *pv_ref_sub_out;
2198 
2199     /**
2200     * array to store the filtered reference samples for intra CUs
2201     * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
2202     */
2203     void *pv_ref_filt_out;
2204 
2205     /**
2206     * Used for 3 purposes
2207     *
2208     * 1. MC Intermediate buffer
2209     * array for storing intermediate 16-bit value for hxhy subpel
2210     * generation at CTB level (+ 16) for subpel planes boundary
2211     * +4 is for horizontal 4pels
2212     *
2213     * 2. Temprory scratch buffer for transform and coeffs storage
2214     * MAX_TRANS_SIZE *2 for trans_scratch(32bit) and MAX_TRANS_SIZE *1 for trans_values
2215     * The first part i.e. from 0 to MAX_TRANS_SIZE is then reused for storing the quant coeffs
2216     * Max of both are used
2217     *
2218     * 3. MC Intermediate buffer
2219     * buffer for storing intermediate 16 bit values prior to conversion to 8bit in HBD
2220     *
2221     */
2222     MEM_ALIGN16 WORD16 ai2_scratch[(MAX_CTB_SIZE + 8 + 8) * (MAX_CTB_SIZE + 8 + 8 + 8) * 2];
2223 
2224     /**
2225     * array for storing cu level final params for a given mode
2226     * one best and one current is required
2227     */
2228     enc_loop_cu_final_prms_t as_cu_prms[2];
2229 
2230     /**
2231     * Scan index to be used for any gien transform
2232     * this is a scartch variable used to communicate
2233     * scan idx at every transform level
2234     */
2235     WORD32 i4_scan_idx;
2236 
2237     /**
2238     * Buffer index in ping pong buffers
2239     * to be used SATD mode evaluations
2240     */
2241     WORD32 i4_satd_buf_idx;
2242 
2243     /**
2244     * Motion Compensation module context structre
2245     */
2246     inter_pred_ctxt_t s_mc_ctxt;
2247 
2248     /**
2249     * MV pred module context structre
2250     */
2251     mv_pred_ctxt_t s_mv_pred_ctxt;
2252 
2253     /**
2254     * Deblock BS ctb structure
2255     */
2256     deblk_bs_ctb_ctxt_t s_deblk_bs_prms;
2257 
2258     /**
2259     * Deblocking ctb structure
2260     */
2261     deblk_ctb_params_t s_deblk_prms;
2262 
2263     /**
2264     * Deblocking structure. For ctb-row level
2265     */
2266     deblk_ctbrow_prms_t s_deblk_ctbrow_prms;
2267 
2268     /**
2269     * Deblocking enable flag
2270     */
2271     WORD32 i4_deblock_type;
2272 
2273     /**
2274     *  context for cabac bit estimation used during rdopt stage
2275     */
2276     rdopt_entropy_ctxt_t s_rdopt_entropy_ctxt;
2277 
2278     /**
2279     * Context models stored for RDopt store and restore purpose
2280     */
2281     UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
2282 
2283     /**
2284     * current picture slice type
2285     */
2286     WORD8 i1_slice_type;
2287 
2288     /**
2289     * strong_intra_smoothing_enable_flag
2290     */
2291     WORD8 i1_strong_intra_smoothing_enable_flag;
2292 
2293     /** Pointer to Dep Mngr for controlling Top-Right CU dependency */
2294     void *pv_dep_mngr_enc_loop_cu_top_right;
2295 
2296     /** Pointer to Dep Mngr for controlling Deblocking Top dependency */
2297     void *pv_dep_mngr_enc_loop_dblk;
2298 
2299     /** pointer to store the cabac states at end of second CTB in current row */
2300     UWORD8 *pu1_curr_row_cabac_state;
2301 
2302     /** pointer to copy the cabac states at start of first CTB in current row */
2303     UWORD8 *pu1_top_rt_cabac_state;
2304     /** flag to indicate rate control mode.
2305     * @remarks :  To enable CU level qp modulation only when required.
2306     */
2307     WORD8 i1_cu_qp_delta_enable;
2308 
2309     /** flag to indicate rate control mode.
2310     * @remarks :  Entropy sync enable flag
2311     */
2312     WORD8 i1_entropy_coding_sync_enabled_flag;
2313 
2314     /** Use SATD or SAD for best merge candidate evaluation */
2315     WORD32 i4_use_satd_for_merge_eval;
2316 
2317     UWORD8 u1_use_early_cbf_data;
2318 
2319     /** Use SATD or SAD for best CU merge candidate evaluation */
2320     WORD32 i4_use_satd_for_cu_merge;
2321 
2322     /** Maximum number of merge candidates to be evaluated */
2323     WORD32 i4_max_merge_candidates;
2324 
2325     /** Flag to indicate whether current pictute needs to be deblocked,
2326     padded and hpel planes need to be generated.
2327     These are turned off typically in non referecne pictures when psnr
2328     and recon dump is disabled
2329     */
2330     WORD32 i4_deblk_pad_hpel_cur_pic;
2331 
2332     /* Array of structures for storing mc predicted data for
2333     * merge and skip modes
2334     */
2335     merge_skip_pred_data_t as_merge_skip_pred_data[MAX_NUM_CU_MERGE_SKIP_CAND];
2336 
2337     /* Sum the Qps of each 8*8 block in CU
2338     * 8*8 block is considered as Min CU size possible as per standard is 8
2339     * 0 corresponds to INTER and 1 corresponds to INTRA
2340     */
2341     LWORD64 i8_cl_ssd_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
2342     UWORD32 au4_chroma_cost_weighing_factor_array[MAX_HEVC_QP_12bit + 1];
2343     LWORD64 i8_cl_ssd_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
2344     WORD32 i4_satd_lamda_array[MAX_HEVC_QP_12bit + 1];
2345     WORD32 i4_sad_lamda_array[MAX_HEVC_QP_12bit + 1];
2346 
2347     /************************************************************************/
2348     /* The fields with the string 'type2' in their names are required */
2349     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
2350     /* to the bit_depth != internal_bit_depth are stored in these fields */
2351     /************************************************************************/
2352     LWORD64 i8_cl_ssd_type2_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
2353     LWORD64 i8_cl_ssd_type2_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
2354     WORD32 i4_satd_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
2355     WORD32 i4_sad_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
2356 
2357     /* Lokesh: Added to find if the CU is the first to be coded in the group */
2358     WORD32 i4_is_first_cu_qg_coded;
2359 
2360     /* Chroma RDOPT related parameters */
2361     ihevce_chroma_rdopt_ctxt_t s_chroma_rdopt_ctxt;
2362 
2363     /* Structure to save pred data of ME/Intra cand */
2364     cu_me_intra_pred_prms_t s_cu_me_intra_pred_prms;
2365 
2366     /* Structure to save the flags required for Final mode Reconstruction
2367     function. These flags are set based on quality presets and bit-rate
2368     we are working on */
2369     cu_final_recon_flags_t s_cu_final_recon_flags;
2370 
2371     /* Parameter to how at which level RDOQ will be implemented:
2372     0 - RDOQ disbaled
2373     1 - RDOQ enabled during RDOPT for all candidates
2374     2 - RDOQ enabled only for the final candidate*/
2375     WORD32 i4_rdoq_level;
2376 
2377     /* Parameter to how at which level Quant rounding factors are computed:
2378     FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
2379     NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
2380     CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
2381     CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
2382     TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used*/
2383     WORD32 i4_quant_rounding_level;
2384 
2385     /* Parameter to how at which level Quant rounding factors are computed:
2386     CHROMA_QUANT_ROUNDING    : Chroma Quant rounding values are used for chroma */
2387     WORD32 i4_chroma_quant_rounding_level;
2388 
2389     /* Parameter to how at which level RDOQ will be implemented:
2390     0 - SBH disbaled
2391     1 - SBH enabled during RDOPT for all candidates
2392     2 - SBH enabled only for the final candidate*/
2393     WORD32 i4_sbh_level;
2394 
2395     /* Parameter to how at which level ZERO CBF RDO will be implemented:
2396     0 - ZCBF disbaled
2397     1 - ZCBF enabled during RDOPT for all candidates
2398     2 - ZCBF enabled only for the final candidate
2399     */
2400     WORD32 i4_zcbf_rdo_level;
2401 
2402     /*RDOQ-SBH context structure*/
2403     rdoq_sbh_ctxt_t s_rdoq_sbh_ctxt;
2404 
2405     /** Structure to store the Merge/Skip Cand. for EncLoop */
2406     cu_inter_merge_skip_t s_cu_inter_merge_skip;
2407     /** Structure to store the Mixed mode Cand. for EncLoop */
2408     cu_mixed_mode_inter_t s_mixed_mode_inter_cu;
2409 
2410     ihevce_inter_pred_buf_data_t s_pred_buf_data;
2411 
2412     void *pv_422_chroma_intra_pred_buf;
2413 
2414     WORD32 i4_max_num_inter_rdopt_cands;
2415 
2416     /* Output Struct per each CU during recursions */
2417     ihevce_enc_cu_node_ctxt_t as_enc_cu_ctxt[MAX_CU_IN_CTB + 1];
2418 
2419     /* Used to store best inter candidate. Used only when */
2420     /* 'CU modulated QP override' is enabled */
2421     cu_inter_cand_t as_best_cand[MAX_CU_IN_CTB + 1];
2422 
2423     cu_inter_cand_t *ps_best_cand;
2424 
2425     UWORD8 au1_cu_init_cabac_state_a_priori[MAX_CU_IN_CTB + 1][IHEVC_CAB_CTXT_END];
2426 
2427     UWORD8 (*pau1_curr_cu_a_priori_cabac_state)[IHEVC_CAB_CTXT_END];
2428 
2429     /* Used to store pred data of each CU in the CTB. */
2430     /* Used only when 'CU modulated QP override' is enabled */
2431     void *pv_CTB_pred_luma;
2432 
2433     void *pv_CTB_pred_chroma;
2434 
2435     /**
2436     * array for storing recon during SATD and RD opt stage at CU level
2437     * one best and one current is required.Luma and chroma together
2438     */
2439     void *pv_cu_luma_recon;
2440 
2441     /**
2442     * array for storing recon during SATD and RD opt stage at CU level
2443     * one best and one current is required.Luma and chroma together
2444     */
2445     void *pv_cu_chrma_recon;
2446 
2447     /**
2448     * Array to store pred modes  during SATD and RD opt stage at CU level
2449     * one best and one current is required
2450     */
2451     nbr_4x4_t as_cu_recur_nbr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2452 
2453     /**
2454     * Pointer to Array to store pred modes  during SATD and RD opt stage at CU level
2455     * one best and one current is required
2456     */
2457     nbr_4x4_t *ps_cu_recur_nbr;
2458 
2459     /**
2460     * Context models stored for CU recursion parent evaluation
2461     */
2462     UWORD8 au1_rdopt_recur_ctxt_models[4][IHEVC_CAB_CTXT_END];
2463 
2464     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt;
2465 
2466     /**
2467     * array for storing coeffs during RD opt stage at CU level
2468     * one best and one current is required. Luma and chroma together
2469     */
2470     /*UWORD8 au1_cu_recur_coeffs[MAX_LUMA_COEFFS_CTB + MAX_CHRM_COEFFS_CTB];*/
2471 
2472     UWORD8 *pu1_cu_recur_coeffs;
2473 
2474     UWORD8 *apu1_cu_level_pingpong_coeff_buf_addr[2];
2475 
2476     WORD16 *api2_cu_level_pingpong_deq_buf_addr[2];
2477 
2478     UWORD8 *pu1_ecd_data;
2479 
2480     /* OPT: flag to skip parent CU=4TU eval during recursion */
2481     UWORD8 is_parent_cu_rdopt;
2482 
2483     /**
2484     *   Array of structs containing block merge data for
2485     *   4 32x32 CU's in indices 1 - 4 and 64x64 CU at 0
2486     */
2487     UWORD8 u1_cabac_states_next_row_copied_flag;
2488 
2489     UWORD8 u1_cabac_states_first_cu_copied_flag;
2490 
2491     UWORD32 u4_cur_ctb_wd;
2492 
2493     UWORD32 u4_cur_ctb_ht;
2494 
2495     /* thread id of the current context */
2496     WORD32 thrd_id;
2497 
2498     /** Number of processing threads created run time */
2499     WORD32 i4_num_proc_thrds;
2500 
2501     /* Instance number of bit-rate for multiple bit-rate encode */
2502     WORD32 i4_bitrate_instance_num;
2503 
2504     WORD32 i4_num_bitrates;
2505 
2506     WORD32 i4_enc_frm_id;
2507 
2508     /* Flag to indicate if chroma needs to be considered for cost calculation */
2509     WORD32 i4_consider_chroma_cost;
2510 
2511     /* Number of modes to be evaluated for intra */
2512     WORD32 i4_num_modes_to_evaluate_intra;
2513 
2514     /* Number of modes to be evaluated for inter */
2515     WORD32 i4_num_modes_to_evaluate_inter;
2516     /*pointers for struct to hold RC parameters for each bit-rate instance */
2517     enc_loop_rc_params_t
2518         *aaps_enc_loop_rc_params[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2519 
2520     /** Pointer to structure containing function pointers of common*/
2521     func_selector_t *ps_func_selector;
2522 
2523     /* Flag to control Top Right Sync for during Merge */
2524     UWORD8 u1_use_top_at_ctb_boundary;
2525 
2526     UWORD8 u1_is_input_data_hbd;
2527 
2528     UWORD8 u1_bit_depth;
2529 
2530     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
2531     UWORD8 u1_chroma_array_type;
2532 
2533     rc_quant_t *ps_rc_quant_ctxt;
2534 
2535     sao_ctxt_t s_sao_ctxt_t;
2536 
2537     /* Offset to get the Qp for the last CU of upper CTB-row.
2538     This offset is from the current tile top row QP map start.
2539     This will only be consumed by the first CU of current CTB-row
2540     iff [it is skip && entropy sync is off] */
2541     WORD32 *pi4_offset_for_last_cu_qp;
2542 
2543     double i4_lamda_modifier;
2544     double i4_uv_lamda_modifier;
2545     WORD32 i4_temporal_layer_id;
2546 
2547     UWORD8 u1_disable_intra_eval;
2548 
2549     WORD32 i4_quant_round_tu[2][32 * 32];
2550 
2551     WORD32 *pi4_quant_round_factor_tu_0_1[5];
2552     WORD32 *pi4_quant_round_factor_tu_1_2[5];
2553 
2554     WORD32 i4_quant_round_4x4[2][4 * 4];
2555     WORD32 i4_quant_round_8x8[2][8 * 8];
2556     WORD32 i4_quant_round_16x16[2][16 * 16];
2557     WORD32 i4_quant_round_32x32[2][32 * 32];
2558 
2559     WORD32 *pi4_quant_round_factor_cu_ctb_0_1[5];
2560     WORD32 *pi4_quant_round_factor_cu_ctb_1_2[5];
2561 
2562     WORD32 i4_quant_round_cr_4x4[2][4 * 4];
2563     WORD32 i4_quant_round_cr_8x8[2][8 * 8];
2564     WORD32 i4_quant_round_cr_16x16[2][16 * 16];
2565 
2566     WORD32 *pi4_quant_round_factor_cr_cu_ctb_0_1[3];
2567     WORD32 *pi4_quant_round_factor_cr_cu_ctb_1_2[3];
2568     /* cost for not coding cu residue i.e forcing no residue syntax as 1 */
2569     LWORD64 i8_cu_not_coded_cost;
2570 
2571     /* dependency manager for forward ME  sync */
2572     void *pv_dep_mngr_encloop_dep_me;
2573 
2574     LWORD64 ai4_source_satd_8x8[64];
2575 
2576     LWORD64 ai4_source_chroma_satd[256];
2577 
2578     UWORD8 u1_is_refPic;
2579 
2580     WORD32 i4_qp_mod;
2581 
2582     WORD32 i4_is_ref_pic;
2583 
2584     WORD32 i4_chroma_format;
2585 
2586     WORD32 i4_temporal_layer;
2587 
2588     WORD32 i4_use_const_lamda_modifier;
2589 
2590     double f_i_pic_lamda_modifier;
2591 
2592     LWORD64 i8_distortion;
2593 
2594     WORD32 i4_use_ctb_level_lamda;
2595 
2596     float f_str_ratio;
2597 
2598     /* Flag to indicate if current frame is to be shared with other clients.
2599     Used only in distributed-encoding */
2600     WORD32 i4_share_flag;
2601 
2602     /* Pointer to the current recon being processed.
2603     Needed for enabling TMVP in dist-encoding */
2604     void *pv_frm_recon;
2605 
2606     ihevce_cmn_opt_func_t s_cmn_opt_func;
2607 
2608     /* The ME analogue to the struct above was not included since */
2609     /* that would have entailed inclusion of all ME specific */
2610     /* header files */
2611     /*FT_SAD_EVALUATOR **/
2612 
2613     /*FT_SAD_EVALUATOR **/
2614     void *pv_evalsad_pt_npu_mxn_8bit;
2615     UWORD8 u1_enable_psyRDOPT;
2616 
2617     UWORD8 u1_is_stasino_enabled;
2618 
2619     UWORD32 u4_psy_strength;
2620     /*Sub PIC rc context */
2621 
2622     WORD32 i4_sub_pic_level_rc;
2623     WORD32 i4_num_ctb_for_out_scale;
2624 
2625     /**
2626      * Accumalated bits of all cu for required CTBS estimated during RDO evaluation.
2627      * Required for sup pic level RC. Reset when required CU/CTB count is reached.
2628      */
2629     UWORD32 u4_total_cu_bits;
2630 
2631     UWORD32 u4_total_cu_bits_mul_qs;
2632 
2633     UWORD32 u4_total_cu_hdr_bits;
2634 
2635     UWORD32 u4_cu_tot_bits_into_qscale;
2636 
2637     UWORD32 u4_cu_tot_bits;
2638 
2639     /*Scale added to the current qscale, output from sub pic rc*/
2640     WORD32 i4_cu_qp_sub_pic_rc;
2641 
2642     /*Frame level L1 IPE sad*/
2643     LWORD64 i8_frame_l1_ipe_sad;
2644 
2645     /*Frame level L0 IPE satd*/
2646     LWORD64 i8_frame_l0_ipe_satd;
2647 
2648     /*Frame level L1 ME sad*/
2649     LWORD64 i8_frame_l1_me_sad;
2650 
2651     /*Frame level L1 activity factor*/
2652     LWORD64 i8_frame_l1_activity_fact;
2653     /*bits esimated for frame calulated for sub pic rc bit control */
2654     WORD32 ai4_frame_bits_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2655     /** I Scene cut */
2656     WORD32 i4_is_I_scenecut;
2657 
2658     /** Non Scene cut */
2659     WORD32 i4_is_non_I_scenecut;
2660 
2661     /** Frames for which online/offline model is not valid */
2662     WORD32 i4_is_model_valid;
2663 
2664     /** Steady State Frame */
2665     //WORD32 i4_is_steady_state;
2666 
2667     WORD32 i4_is_first_query;
2668 
2669     /* Pointer to Tile params base */
2670     void *pv_tile_params_base;
2671 
2672     /** The index of column tile for which it is working */
2673     WORD32 i4_tile_col_idx;
2674 
2675     WORD32 i4_max_search_range_horizontal;
2676 
2677     WORD32 i4_max_search_range_vertical;
2678 
2679     WORD32 i4_is_ctb_qp_modified;
2680 
2681     WORD32 i4_display_num;
2682 
2683     WORD32 i4_pred_qp;
2684 
2685     /*assumption of qg size is 8x8 block size*/
2686     WORD32 ai4_qp_qg[8 * 8];
2687 
2688     WORD32 i4_last_cu_qp_from_prev_ctb;
2689 
2690     WORD32 i4_prev_QP;
2691 
2692     UWORD8 u1_max_inter_tr_depth;
2693 
2694     UWORD8 u1_max_intra_tr_depth;
2695 
2696 } ihevce_enc_loop_ctxt_t;
2697 
2698 /*****************************************************************************/
2699 /* Enums                                                                     */
2700 /*****************************************************************************/
2701 
2702 /** @brief RDOQ_LEVELS_T: This enumeration specifies the RDOQ mode of operation
2703 *
2704 *  NO_RDOQ    : RDOQ is not performed
2705 *  BEST_CAND_RDOQ : RDOQ for final candidate only
2706 *  ALL_CAND_RDOQ : RDOQ for all candidates
2707 */
2708 typedef enum
2709 {
2710     NO_RDOQ,
2711     BEST_CAND_RDOQ,
2712     ALL_CAND_RDOQ,
2713 } RDOQ_LEVELS_T;
2714 
2715 /** @brief QUANT_ROUNDING_COEFF_LEVELS_T: This enumeration specifies the Coef level RDOQ mode of operation
2716 *
2717 *  FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
2718 *  NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
2719 *  CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
2720 *  CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
2721 *  TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used
2722 *               Defaulat for all candidtes, based on RDOQ_LEVELS_T choose to best candidate
2723 */
2724 typedef enum
2725 {
2726     FIXED_QUANT_ROUNDING,
2727     NCTB_LEVEL_QUANT_ROUNDING,
2728     CTB_LEVEL_QUANT_ROUNDING,
2729     CU_LEVEL_QUANT_ROUNDING,
2730     TU_LEVEL_QUANT_ROUNDING,
2731     CHROMA_QUANT_ROUNDING
2732 } QUANT_ROUNDING_COEFF_LEVELS_T;
2733 
2734 /*****************************************************************************/
2735 /* Enums                                                                     */
2736 /*****************************************************************************/
2737 
2738 /** @brief SBH_LEVELS_T: This enumeration specifies the RDOQ mode of operation
2739 *
2740 *  NO_SBH    : SBH is not performed
2741 *  BEST_CAND_SBH : SBH for final candidate only
2742 *  ALL_CAND_SBH : SBH for all candidates
2743 */
2744 typedef enum
2745 {
2746     NO_SBH,
2747     BEST_CAND_SBH,
2748     ALL_CAND_SBH,
2749 } SBH_LEVELS_T;
2750 
2751 /** @brief ZCBF_LEVELS_T: This enumeration specifies the ZeroCBF RDO mode of operation
2752 *
2753 *  NO_ZCBF    : ZCBF RDO is not performed
2754 *  ALL_CAND_ZCBF : ZCBF RDO for all candidates
2755 */
2756 typedef enum
2757 {
2758     NO_ZCBF,
2759     ZCBF_ENABLE,
2760 } ZCBF_LEVELS_T;
2761 
2762 /**
2763 ******************************************************************************
2764 *  @brief  Encode loop master context structure
2765 ******************************************************************************
2766 */
2767 typedef struct
2768 {
2769     /** Array of encode loop structure */
2770     ihevce_enc_loop_ctxt_t *aps_enc_loop_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
2771 
2772     /** Number of processing threads created run time */
2773     WORD32 i4_num_proc_thrds;
2774 
2775     /**
2776     *  Array of top row cu skip flags (1 bit per 8x8CU)
2777     */
2778     UWORD8 au1_cu_skip_top_row[HEVCE_MAX_WIDTH >> 6];
2779 
2780     /** Context models stored at the end of second CTB in a row)
2781     *  stored in packed form pState[bits6-1] | MPS[bit0]
2782     *  for each CTB row
2783     *  using entropy sync model in RD opt
2784     */
2785     UWORD8 au1_ctxt_models[MAX_NUM_CTB_ROWS_FRM][IHEVC_CAB_CTXT_END];
2786 
2787     /** Dependency manager for controlling EncLoop Top-Right CU dependency
2788     * One per each bit-rate and one per each frame in parallel
2789     */
2790     void *aapv_dep_mngr_enc_loop_cu_top_right[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2791 
2792     /** Dependency manager for controlling Deblocking Top dependency
2793     * One per each bit-rate and one per each frame in parallel
2794     */
2795     void *aapv_dep_mngr_enc_loop_dblk[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2796 
2797     /** number of bit-rate instances running */
2798     WORD32 i4_num_bitrates;
2799 
2800     /** number of enc frames running in parallel */
2801     WORD32 i4_num_enc_loop_frm_pllel;
2802 
2803     /* Pointer to Tile params base */
2804     void *pv_tile_params_base;
2805     /* Offset to get the Qp for the last CU of upper CTB-row.
2806     This offset is from the current tile top row QP map start.
2807 
2808     This will only be consumed by the first CU of current CTB-row
2809     iff [it is skip && entropy sync is off]
2810     There is one entry of every tile-column bcoz offset remains constant
2811     for all tiles lying in a tile-column */
2812     WORD32 ai4_offset_for_last_cu_qp[MAX_TILE_COLUMNS];
2813 } ihevce_enc_loop_master_ctxt_t;
2814 
2815 /**
2816 ******************************************************************************
2817 *  @brief  This struct is used for storing data required by the block merge
2818 *          function
2819 ******************************************************************************
2820 */
2821 typedef struct
2822 {
2823     block_data_8x8_t *ps_8x8_data;
2824 
2825     block_data_16x16_t *ps_16x16_data;
2826 
2827     block_data_32x32_t *ps_32x32_data;
2828 
2829     block_data_64x64_t *ps_64x64_data;
2830 
2831     part_type_results_t **ps_32x32_results;
2832 
2833     cur_ctb_cu_tree_t *ps_cu_tree;
2834 
2835     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
2836 
2837     mv_pred_ctxt_t *ps_mv_pred_ctxt;
2838 
2839     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
2840 
2841     nbr_4x4_t *ps_top_nbr_4x4;
2842 
2843     nbr_4x4_t *ps_left_nbr_4x4;
2844 
2845     nbr_4x4_t *ps_curr_nbr_4x4;
2846 
2847     UWORD8 *pu1_inp;
2848 
2849     UWORD8 *pu1_ctb_nbr_map;
2850 
2851     WORD32 i4_nbr_map_strd;
2852 
2853     WORD32 inp_stride;
2854 
2855     WORD32 i4_ctb_x_off;
2856 
2857     WORD32 i4_ctb_y_off;
2858 
2859     WORD32 use_satd_for_err_calc;
2860 
2861     WORD32 lambda;
2862 
2863     WORD32 lambda_q_shift;
2864 
2865     WORD32 frm_qstep;
2866 
2867     WORD32 num_4x4_in_ctb;
2868 
2869     UWORD8 *pu1_wkg_mem;
2870 
2871     UWORD8 **ppu1_pred;
2872 
2873     UWORD8 u1_bidir_enabled;
2874 
2875     UWORD8 u1_max_tr_depth;
2876 
2877     WORD32 i4_ctb_pos;
2878 
2879     WORD32 i4_ctb_size;
2880 
2881     UWORD8 *apu1_wt_inp[MAX_REFS_SEARCHABLE + 1];
2882 
2883     /** Pointer of Dep Mngr for EncLoop Top-Right CU dependency */
2884     void *pv_dep_mngr_enc_loop_cu_top_right;
2885     /** The current cu row no. for Dep Manager to Check */
2886     WORD32 i4_dep_mngr_cur_cu_row_no;
2887     /** The Top cu row no. for Dep Manager to Check */
2888     WORD32 i4_dep_mngr_top_cu_row_no;
2889 
2890     WORD8 i1_quality_preset;
2891 
2892     /* Flag to control Top Right Sync for during Merge */
2893     UWORD8 u1_use_top_at_ctb_boundary;
2894 
2895 } block_merge_input_t;
2896 
2897 /* Structure which stores the info regarding the TU's present in the CU*/
2898 typedef struct tu_prms_t
2899 {
2900     UWORD8 u1_tu_size;
2901 
2902     UWORD8 u1_x_off;
2903 
2904     UWORD8 u1_y_off;
2905 
2906     WORD32 i4_tu_cost;
2907 
2908     WORD32 i4_early_cbf;
2909 
2910 } tu_prms_t;
2911 
2912 typedef struct
2913 {
2914     cu_enc_loop_out_t **pps_cu_final;
2915 
2916     pu_t **pps_row_pu;
2917 
2918     tu_enc_loop_out_t **pps_row_tu;
2919 
2920     UWORD8 **ppu1_row_ecd_data;
2921 
2922     WORD32 *pi4_num_pus_in_ctb;
2923 
2924     WORD32 *pi4_last_cu_pos_in_ctb;
2925 
2926     WORD32 *pi4_last_cu_size;
2927 
2928     UWORD8 *pu1_num_cus_in_ctb_out;
2929 
2930 } cu_final_update_prms;
2931 
2932 typedef struct
2933 {
2934     cu_nbr_prms_t *ps_cu_nbr_prms;
2935 
2936     cu_inter_cand_t *ps_best_inter_cand;
2937 
2938     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms;
2939 
2940     WORD32 packed_pred_mode;
2941 
2942     WORD32 rd_opt_best_idx;
2943 
2944     void *pv_src;
2945 
2946     WORD32 src_strd;
2947 
2948     void *pv_pred;
2949 
2950     WORD32 pred_strd;
2951 
2952     void *pv_pred_chrm;
2953 
2954     WORD32 pred_chrm_strd;
2955 
2956     UWORD8 *pu1_final_ecd_data;
2957 
2958     UWORD8 *pu1_csbf_buf;
2959 
2960     WORD32 csbf_strd;
2961 
2962     void *pv_luma_recon;
2963 
2964     WORD32 recon_luma_strd;
2965 
2966     void *pv_chrm_recon;
2967 
2968     WORD32 recon_chrma_strd;
2969 
2970     UWORD8 u1_cu_pos_x;
2971 
2972     UWORD8 u1_cu_pos_y;
2973 
2974     UWORD8 u1_cu_size;
2975 
2976     WORD8 i1_cu_qp;
2977 
2978     UWORD8 u1_will_cabac_state_change;
2979 
2980     UWORD8 u1_recompute_sbh_and_rdoq;
2981 
2982     UWORD8 u1_is_first_pass;
2983 
2984 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2985     UWORD8 u1_is_cu_noisy;
2986 #endif
2987 
2988 } final_mode_process_prms_t;
2989 
2990 typedef struct
2991 {
2992     cu_inter_cand_t s_best_cand;
2993 
2994     /* The size is twice of what is required to ensure availability */
2995     /* of adequate space for 'HBD' case */
2996     UWORD8 au1_pred_luma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
2997 
2998     /* The size is twice of what is required to ensure availability */
2999     /* of adequate space for 422 case */
3000     UWORD8 au1_pred_chroma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
3001 } final_mode_state_t;
3002 
3003 typedef struct
3004 {
3005     cu_mixed_mode_inter_t *ps_mixed_modes_datastore;
3006 
3007     cu_inter_cand_t *ps_me_cands;
3008 
3009     cu_inter_cand_t *ps_merge_cands;
3010 
3011     mv_pred_ctxt_t *ps_mv_pred_ctxt;
3012 
3013     inter_pred_ctxt_t *ps_mc_ctxt;
3014 
3015     UWORD8 *pu1_ctb_nbr_map;
3016 
3017     void *pv_src;
3018 
3019     nbr_4x4_t *ps_cu_nbr_buf;
3020 
3021     nbr_4x4_t *ps_left_nbr_4x4;
3022 
3023     nbr_4x4_t *ps_top_nbr_4x4;
3024 
3025     nbr_4x4_t *ps_topleft_nbr_4x4;
3026 
3027     WORD32 i4_ctb_nbr_map_stride;
3028 
3029     WORD32 i4_src_strd;
3030 
3031     WORD32 i4_nbr_4x4_left_strd;
3032 
3033     UWORD8 u1_cu_size;
3034 
3035     UWORD8 u1_cu_pos_x;
3036 
3037     UWORD8 u1_cu_pos_y;
3038 
3039     UWORD8 u1_num_me_cands;
3040 
3041     UWORD8 u1_num_merge_cands;
3042 
3043     UWORD8 u1_max_num_mixed_mode_cands_to_select;
3044 
3045     UWORD8 u1_max_merge_candidates;
3046 
3047     UWORD8 u1_use_satd_for_merge_eval;
3048 
3049 } ihevce_mixed_inter_modes_selector_prms_t;
3050 
3051 typedef struct
3052 {
3053     LWORD64 i8_ssd;
3054 
3055     LWORD64 i8_cost;
3056 
3057 #if ENABLE_INTER_ZCU_COST
3058     LWORD64 i8_not_coded_cost;
3059 #endif
3060 
3061     UWORD32 u4_sad;
3062 
3063     WORD32 i4_bits;
3064 
3065     WORD32 i4_num_bytes_used_for_ecd;
3066 
3067     WORD32 i4_zero_col;
3068 
3069     WORD32 i4_zero_row;
3070 
3071     UWORD8 u1_cbf;
3072 
3073     UWORD8 u1_reconBufId;
3074 
3075     UWORD8 u1_is_valid_node;
3076 
3077     UWORD8 u1_size;
3078 
3079     UWORD8 u1_posx;
3080 
3081     UWORD8 u1_posy;
3082 } tu_node_data_t;
3083 
3084 typedef struct tu_tree_node_t
3085 {
3086     struct tu_tree_node_t *ps_child_node_tl;
3087 
3088     struct tu_tree_node_t *ps_child_node_tr;
3089 
3090     struct tu_tree_node_t *ps_child_node_bl;
3091 
3092     struct tu_tree_node_t *ps_child_node_br;
3093 
3094     tu_node_data_t s_luma_data;
3095 
3096     /* 2 because of the 2 subTU's when input is 422 */
3097     tu_node_data_t as_cb_data[2];
3098 
3099     tu_node_data_t as_cr_data[2];
3100 
3101     UWORD8 u1_is_valid_node;
3102 
3103 } tu_tree_node_t;
3104 
3105 /*****************************************************************************/
3106 /* Extern Variable Declarations                                              */
3107 /*****************************************************************************/
3108 
3109 /*****************************************************************************/
3110 /* Extern Function Declarations                                              */
3111 /*****************************************************************************/
3112 
3113 /*****************************************************************************/
3114 /* Typedefs                                                                  */
3115 /*****************************************************************************/
3116 typedef LWORD64 (*pf_cu_mode_decide)(
3117     ihevce_enc_loop_ctxt_t *ps_ctxt,
3118     enc_loop_cu_prms_t *ps_cu_prms,
3119     cu_analyse_t *ps_cu_analyse,
3120     final_mode_state_t *ps_final_mode_state,
3121     UWORD8 *pu1_ecd_data,
3122     pu_col_mv_t *ps_col_pu,
3123     UWORD8 *pu1_col_pu_map,
3124     WORD32 col_start_pu_idx);
3125 
3126 typedef LWORD64 (*pf_inter_rdopt_cu_mc_mvp)(
3127     ihevce_enc_loop_ctxt_t *ps_ctxt,
3128     cu_inter_cand_t *ps_inter_cand,
3129     WORD32 cu_size,
3130     WORD32 cu_pos_x,
3131     WORD32 cu_pos_y,
3132     nbr_4x4_t *ps_left_nbr_4x4,
3133     nbr_4x4_t *ps_top_nbr_4x4,
3134     nbr_4x4_t *ps_topleft_nbr_4x4,
3135     WORD32 nbr_4x4_left_strd,
3136     WORD32 curr_buf_idx);
3137 
3138 typedef LWORD64 (*pf_inter_rdopt_cu_ntu)(
3139     ihevce_enc_loop_ctxt_t *ps_ctxt,
3140     enc_loop_cu_prms_t *ps_cu_prms,
3141     void *pv_src,
3142     WORD32 cu_size,
3143     WORD32 cu_pos_x,
3144     WORD32 cu_pos_y,
3145     WORD32 curr_buf_idx,
3146     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3147     cu_inter_cand_t *ps_inter_cand,
3148     cu_analyse_t *ps_cu_analyse,
3149     WORD32 i4_alpha_stim_multiplier);
3150 
3151 typedef void (*pf_intra_chroma_pred_mode_selector)(
3152     ihevce_enc_loop_ctxt_t *ps_ctxt,
3153     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3154     cu_analyse_t *ps_cu_analyse,
3155     WORD32 rd_opt_curr_idx,
3156     WORD32 tu_mode,
3157     WORD32 i4_alpha_stim_multiplier,
3158     UWORD8 u1_is_cu_noisy);
3159 
3160 typedef LWORD64 (*pf_intra_rdopt_cu_ntu)(
3161     ihevce_enc_loop_ctxt_t *ps_ctxt,
3162     enc_loop_cu_prms_t *ps_cu_prms,
3163     void *pv_pred_org,
3164     WORD32 pred_strd_org,
3165     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3166     UWORD8 *pu1_luma_mode,
3167     cu_analyse_t *ps_cu_analyse,
3168     void *pv_curr_src,
3169     void *pv_cu_left,
3170     void *pv_cu_top,
3171     void *pv_cu_top_left,
3172     nbr_4x4_t *ps_left_nbr_4x4,
3173     nbr_4x4_t *ps_top_nbr_4x4,
3174     WORD32 nbr_4x4_left_strd,
3175     WORD32 cu_left_stride,
3176     WORD32 curr_buf_idx,
3177     WORD32 func_proc_mode,
3178     WORD32 i4_alpha_stim_multiplier);
3179 
3180 typedef void (*pf_final_rdopt_mode_prcs)(
3181     ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms);
3182 
3183 typedef void (*pf_store_cu_results)(
3184     ihevce_enc_loop_ctxt_t *ps_ctxt,
3185     enc_loop_cu_prms_t *ps_cu_prms,
3186     final_mode_state_t *ps_final_state);
3187 
3188 typedef void (*pf_enc_loop_cu_bot_copy)(
3189     ihevce_enc_loop_ctxt_t *ps_ctxt,
3190     enc_loop_cu_prms_t *ps_cu_prms,
3191     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
3192     WORD32 curr_cu_pos_in_row,
3193     WORD32 curr_cu_pos_in_ctb);
3194 
3195 typedef void (*pf_enc_loop_ctb_left_copy)(
3196     ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms);
3197 
3198 #endif /* _IHEVCE_ENC_LOOP_STRUCTS_H_ */
3199