1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_enc_loop_structs.h
23 *
24 * \brief
25 *    This file contains strcutures of enc_loop pass
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _IHEVCE_ENC_LOOP_STRUCTS_H_
37 #define _IHEVCE_ENC_LOOP_STRUCTS_H_
38 
39 #include "ihevc_macros.h"
40 
41 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
42 
43 /*****************************************************************************/
44 /* Constant Macros                                                           */
45 /*****************************************************************************/
46 /** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
47 #define NUM_TRANS_TYPES 5
48 #define INTRA_PLANAR 0
49 #define INTRA_DC 1
50 #define NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD 2
51 #define MAX_TU_IN_TU_EQ_DIV_2 4
52 #define MAX_MVP_LIST_CAND 2
53 #define MAX_COST 0x7ffffff
54 #define MAX_COST_64 0x7ffffffffffffff
55 #define NUM_32CU_AND_64CU_IN_CTB 5 /* 4 - 32x32 + 1 64x64*/
56 #define PING_PONG 2
57 #define MAX_SAO_RD_CAND 10
58 #define SCRATCH_BUF_STRIDE 80
59 
60 /*****************************************************************************/
61 /* Function Macros                                                           */
62 /*****************************************************************************/
63 #define INTRA_ANGULAR(x) (x)
64 
65 /** @breif max 30bit value */
66 #define MAX30 ((1 << 30) - 1)
67 
68 /* @brief macro to clip a data to max of 30bits (assuming unsgined) */
69 #define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
70 
71 /* @brief compute the (lambda * rate) with a qshift and clip result to 30bits */
72 #define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
73 
74 #define IHEVCE_INV_WT_PRED(inp, wt, off, shift)                                                    \
75     (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
76 
77 #define POPULATE_PU_STRUCT(ps_pu, mvx, mvy, offset_x, offset_y, wd, ht, ref_idx, pred_lx)          \
78     {                                                                                              \
79         (ps_pu)->b4_pos_x = (offset_x) >> 2;                                                       \
80         (ps_pu)->b4_pos_y = (offset_y) >> 2;                                                       \
81         (ps_pu)->b4_wd = ((wd) >> 2) - 1;                                                          \
82         (ps_pu)->b4_ht = ((ht) >> 2) - 1;                                                          \
83         (ps_pu)->b1_intra_flag = 0;                                                                \
84         (ps_pu)->b2_pred_mode = pred_lx;                                                           \
85         if(pred_lx)                                                                                \
86         {                                                                                          \
87             (ps_pu)->mv.i1_l0_ref_idx = -1;                                                        \
88             (ps_pu)->mv.i1_l1_ref_idx = ref_idx;                                                   \
89             (ps_pu)->mv.s_l1_mv.i2_mvx = mvx;                                                      \
90             (ps_pu)->mv.s_l1_mv.i2_mvy = mvy;                                                      \
91         }                                                                                          \
92         else                                                                                       \
93         {                                                                                          \
94             (ps_pu)->mv.i1_l0_ref_idx = ref_idx;                                                   \
95             (ps_pu)->mv.i1_l1_ref_idx = -1;                                                        \
96             (ps_pu)->mv.s_l0_mv.i2_mvx = mvx;                                                      \
97             (ps_pu)->mv.s_l0_mv.i2_mvy = mvy;                                                      \
98         }                                                                                          \
99     }
100 
101 #define GET_FRAME_QSTEP_FROM_QP(frame_qp, frame_qstep)                                             \
102     {                                                                                              \
103         double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 };                             \
104                                                                                                    \
105         frame_qstep = (WORD32)((1 << ((frame_qp) / 6)) * q_steps[(frame_qp) % 6]);                 \
106     }
107 
108 #define INITIALISE_MERGE_RESULT_STRUCT(ps_merge_data, pas_pu_results)                              \
109     {                                                                                              \
110         WORD32 i, j, k;                                                                            \
111                                                                                                    \
112         for(i = 0; i < TOT_NUM_PARTS; i++)                                                         \
113         {                                                                                          \
114             (ps_merge_data)->s_pu_results.u1_num_results_per_part_l0[i] = 0;                       \
115             (ps_merge_data)->s_pu_results.u1_num_results_per_part_l1[i] = 0;                       \
116         }                                                                                          \
117         for(i = 0; i < 2; i++)                                                                     \
118         {                                                                                          \
119             for(j = 0; j < TOT_NUM_PARTS; j++)                                                     \
120             {                                                                                      \
121                 (ps_merge_data)->s_pu_results.aps_pu_results[i][j] = pas_pu_results[i][j];         \
122                 for(k = 0; k < MAX_NUM_RESULTS_PER_PART_LIST; k++)                                 \
123                 {                                                                                  \
124                     pas_pu_results[i][j][k].i4_tot_cost = MAX_COST;                                \
125                     pas_pu_results[i][j][k].pu.mv.i1_l0_ref_idx = -1;                              \
126                     pas_pu_results[i][j][k].pu.mv.i1_l1_ref_idx = -1;                              \
127                 }                                                                                  \
128             }                                                                                      \
129         }                                                                                          \
130     }
131 
132 #define POPULATE_CTB_PARAMS                                                                        \
133     (ps_common_frm_prms,                                                                           \
134      apu1_wt_inp,                                                                                  \
135      i4_ctb_x_off,                                                                                 \
136      i4_ctb_y_off,                                                                                 \
137      ppu1_pred,                                                                                    \
138      cu_size,                                                                                      \
139      ref_stride,                                                                                   \
140      bidir_enabled,                                                                                \
141      num_refs,                                                                                     \
142      pps_rec_list_l0,                                                                              \
143      pps_rec_list_l1,                                                                              \
144      pu1_non_wt_inp,                                                                               \
145      lambda,                                                                                       \
146      lambda_q_shift,                                                                               \
147      wpred_log_wdc)                                                                                \
148     {                                                                                              \
149         WORD32 i, j;                                                                               \
150         (ps_common_frm_prms)->i4_bidir_enabled = bidir_enabled;                                    \
151         (ps_common_frm_prms)->i4_ctb_x_off = i4_ctb_x_off;                                         \
152         (ps_common_frm_prms)->i4_ctb_y_off = i4_ctb_y_off;                                         \
153         (ps_common_frm_prms)->i4_inp_stride = cu_size;                                             \
154         (ps_common_frm_prms)->i4_lamda = lambda;                                                   \
155         (ps_common_frm_prms)->i4_pred_stride = cu_size;                                            \
156         (ps_common_frm_prms)->i4_rec_stride = ref_stride;                                          \
157         (ps_common_frm_prms)->pps_rec_list_l0 = pps_rec_list_l0;                                   \
158         (ps_common_frm_prms)->pps_rec_list_l1 = pps_rec_list_l1;                                   \
159         (ps_common_frm_prms)->ppu1_pred = ppu1_pred;                                               \
160         (ps_common_frm_prms)->pu1_non_wt_inp = pu1_non_wt_inp;                                     \
161         (ps_common_frm_prms)->pu1_wkg_mem = NULL;                                                  \
162         (ps_common_frm_prms)->u1_lamda_qshift = lambda_q_shift;                                    \
163         (ps_common_frm_prms)->u1_num_ref = num_refs;                                               \
164         (ps_common_frm_prms)->wpred_log_wdc = wpred_log_wdc;                                       \
165         for(i = 0; i < 2; i++)                                                                     \
166         {                                                                                          \
167             for(j = 0; j < MAX_NUM_REF; j++)                                                       \
168             {                                                                                      \
169                 (ps_common_frm_prms)->apu1_wt_inp = (apu1_wt_inp)[i][j];                           \
170             }                                                                                      \
171         }                                                                                          \
172     }
173 
174 #define COMPUTE_MERGE_IDX_COST(merge_idx_0_model, merge_idx, max_merge_cand, lambda, cost)         \
175     {                                                                                              \
176         WORD32 cab_bits_q12 = 0;                                                                   \
177                                                                                                    \
178         /* sanity checks */                                                                        \
179         ASSERT((merge_idx >= 0) && (merge_idx < max_merge_cand));                                  \
180                                                                                                    \
181         /* encode the merge idx only if required */                                                \
182         if(max_merge_cand > 1)                                                                     \
183         {                                                                                          \
184             WORD32 bin = (merge_idx > 0);                                                          \
185                                                                                                    \
186             /* bits for the context modelled first bin */                                          \
187             cab_bits_q12 += gau2_ihevce_cabac_bin_to_bits[merge_idx_0_model ^ bin];                \
188                                                                                                    \
189             /* bits for larged merge idx coded as bypass tunary */                                 \
190             if((max_merge_cand > 2) && (merge_idx > 0))                                            \
191             {                                                                                      \
192                 cab_bits_q12 += (MIN(merge_idx, (max_merge_cand - 2))) << CABAC_FRAC_BITS_Q;       \
193             }                                                                                      \
194                                                                                                    \
195             cost = COMPUTE_RATE_COST_CLIP30(                                                       \
196                 cab_bits_q12, lambda, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));                       \
197         }                                                                                          \
198         else                                                                                       \
199         {                                                                                          \
200             cost = 0;                                                                              \
201         }                                                                                          \
202     }
203 
204 /*****************************************************************************/
205 /* Typedefs                                                                  */
206 /*****************************************************************************/
207 
208 typedef FT_CALC_HAD_SATD_8BIT *pf_res_trans_luma_had_chroma;
209 
210 /** \breif function pointer prototype for residue and transform enc_loop */
211 typedef UWORD32 (*pf_res_trans_chroma)(
212     UWORD8 *pu1_src,
213     UWORD8 *pu1_pred,
214     WORD32 *pi4_tmp,
215     WORD16 *pi2_dst,
216     WORD32 src_strd,
217     WORD32 pred_strd,
218     WORD32 dst_strd,
219     CHROMA_PLANE_ID_T e_chroma_plane);
220 
221 /** \breif function pointer prototype for quantization and inv Quant for ssd
222 calc. for all transform sizes */
223 typedef WORD32 (*pf_quant_iquant_ssd)(
224     WORD16 *pi2_coeffs,
225     WORD16 *pi2_quant_coeff,
226     WORD16 *pi2_q_dst,
227     WORD16 *pi2_iq_dst,
228     WORD32 trans_size,
229     WORD32 qp_div, /* qpscaled / 6 */
230     WORD32 qp_rem, /* qpscaled % 6 */
231     WORD32 q_add,
232     WORD32 *pi4_quant_round_factor_0_1,
233     WORD32 *pi4_quant_round_factor_1_2,
234     WORD32 src_strd,
235     WORD32 dst_q_strd,
236     WORD32 dst_iq_strd,
237     UWORD8 *csbf,
238     WORD32 csbf_strd,
239     WORD32 *zero_col,
240     WORD32 *zero_row,
241     WORD16 *pi2_dequant_coeff,
242     LWORD64 *pi8_cost);
243 
244 /** \breif function pointer prototype for quantization and inv Quant for ssd
245 calc. for all transform sizes (in case of RDOQ + SBH) */
246 typedef WORD32 (*pf_quant_iquant_ssd_sbh)(
247     WORD16 *pi2_coeffs,
248     WORD16 *pi2_quant_coeff,
249     WORD16 *pi2_q_dst,
250     WORD16 *pi2_iq_dst,
251     WORD32 trans_size,
252     WORD32 qp_div, /* qpscaled / 6 */
253     WORD32 qp_rem, /* qpscaled % 6 */
254     WORD32 q_add,
255     WORD32 src_strd,
256     WORD32 dst_q_strd,
257     WORD32 dst_iq_strd,
258     UWORD8 *csbf,
259     WORD32 csbf_strd,
260     WORD32 *zero_col,
261     WORD32 *zero_row,
262     WORD16 *pi2_dequant_coeff,
263     WORD32 *pi4_cost,
264     WORD32 i4_scan_idx,
265     WORD32 i4_perform_rdoq);
266 
267 /** \breif function pointer prototype for inverse transform and recon
268 for all transform sizes : Luma */
269 typedef void (*pf_it_recon)(
270     WORD16 *pi2_src,
271     WORD16 *pi2_tmp,
272     UWORD8 *pu1_pred,
273     UWORD8 *pu1_dst,
274     WORD32 src_strd,
275     WORD32 pred_strd,
276     WORD32 dst_strd,
277     WORD32 zero_cols,
278     WORD32 zero_rows);
279 
280 /** \breif function pointer prototype for inverse transform and recon
281 for all transform sizes : Chroma */
282 typedef void (*pf_it_recon_chroma)(
283     WORD16 *pi2_src,
284     WORD16 *pi2_tmp,
285     UWORD8 *pu1_pred,
286     UWORD8 *pu1_dst,
287     WORD32 src_strd,
288     WORD32 pred_strd,
289     WORD32 dst_strd,
290     WORD32 zero_cols,
291     WORD32 zero_rows);
292 
293 /** \breif function pointer prototype for luma sao. */
294 typedef void (*pf_sao_luma)(
295     UWORD8 *pu1_src,
296     WORD32 src_strd,
297     UWORD8 *pu1_src_left,
298     UWORD8 *pu1_src_top,
299     UWORD8 *pu1_src_top_left,
300     UWORD8 *pu1_src_top_right,
301     UWORD8 *pu1_src_bot_left,
302     UWORD8 *pu1_avail,
303     WORD8 *pi1_sao_offset,
304     WORD32 wd,
305     WORD32 ht);
306 
307 /** \breif function pointer prototype for chroma sao. */
308 typedef void (*pf_sao_chroma)(
309     UWORD8 *pu1_src,
310     WORD32 src_strd,
311     UWORD8 *pu1_src_left,
312     UWORD8 *pu1_src_top,
313     UWORD8 *pu1_src_top_left,
314     UWORD8 *pu1_src_top_right,
315     UWORD8 *pu1_src_bot_left,
316     UWORD8 *pu1_avail,
317     WORD8 *pi1_sao_offset_u,
318     WORD8 *pi1_sao_offset_v,
319     WORD32 wd,
320     WORD32 ht);
321 
322 /*****************************************************************************/
323 /* Enums                                                                     */
324 /*****************************************************************************/
325 
326 typedef enum
327 {
328     IP_FUNC_MODE_0 = 0,
329     IP_FUNC_MODE_1,
330     IP_FUNC_MODE_2,
331     IP_FUNC_MODE_3TO9,
332     IP_FUNC_MODE_10,
333     IP_FUNC_MODE_11TO17,
334     IP_FUNC_MODE_18_34,
335     IP_FUNC_MODE_19TO25,
336     IP_FUNC_MODE_26,
337     IP_FUNC_MODE_27TO33,
338 
339     NUM_IP_FUNCS
340 
341 } IP_FUNCS_T;
342 
343 typedef enum
344 {
345     /* currently only cu and cu/2 modes are supported */
346     TU_EQ_CU = 0,
347     TU_EQ_CU_DIV2,
348     TU_EQ_SUBCU, /* only applicable for NXN mode at mincusize */
349 
350     /* support for below modes needs to be added */
351     TU_EQ_CU_DIV4,
352     TU_EQ_CU_DIV8,
353     TU_EQ_CU_DIV16,
354 
355     NUM_TU_WRT_CU,
356 
357 } TU_SIZE_WRT_CU_T;
358 
359 typedef enum
360 {
361     RDOPT_MODE = 0,
362     RDOPT_SKIP_MODE = 1,
363 
364     NUM_CORE_CALL_MODES,
365 
366 } CORE_FUNC_CALL_MODE_T;
367 
368 typedef enum
369 {
370     ENC_LOOP_CTXT = 0,
371     ENC_LOOP_THRDS_CTXT,
372     ENC_LOOP_SCALE_MAT,
373     ENC_LOOP_RESCALE_MAT,
374     ENC_LOOP_TOP_LUMA,
375     ENC_LOOP_TOP_CHROMA,
376     ENC_LOOP_TOP_NBR4X4,
377     ENC_LOOP_RC_PARAMS, /* memory to dump rate control parameters by each thread for each bit-rate instance */
378     ENC_LOOP_QP_TOP_4X4,
379     ENC_LOOP_DEBLOCKING,
380     ENC_LOOP_422_CHROMA_INTRA_PRED,
381     ENC_LOOP_INTER_PRED,
382     ENC_LOOP_CHROMA_PRED_INTRA,
383     ENC_LOOP_REF_SUB_OUT,
384     ENC_LOOP_REF_FILT_OUT,
385     ENC_LOOP_CU_RECUR_LUMA_RECON,
386     ENC_LOOP_CU_RECUR_CHROMA_RECON,
387     ENC_LOOP_CU_RECUR_LUMA_PRED,
388     ENC_LOOP_CU_RECUR_CHROMA_PRED,
389     ENC_LOOP_LEFT_LUMA_DATA,
390     ENC_LOOP_LEFT_CHROMA_DATA,
391     ENC_LOOP_SAO,
392     ENC_LOOP_CU_COEFF_DATA,
393     ENC_LOOP_CU_RECUR_COEFF_DATA,
394     ENC_LOOP_CU_DEQUANT_DATA,
395     ENC_LOOP_RECON_DATA_STORE,
396     /* should always be the last entry */
397     NUM_ENC_LOOP_MEM_RECS
398 
399 } ENC_LOOP_MEM_TABS_T;
400 
401 /** This is for assigning the pred buiffers for luma (2 ping-pong) and
402 chroma(1)   */
403 typedef enum
404 {
405     CU_ME_INTRA_PRED_LUMA_IDX0 = 0,
406     CU_ME_INTRA_PRED_LUMA_IDX1,
407     CU_ME_INTRA_PRED_CHROMA_IDX,
408 
409     /* should be always the last entry */
410     NUM_CU_ME_INTRA_PRED_IDX
411 
412 } CU_ME_INTRA_PRED_IDX_T;
413 
414 /*****************************************************************************/
415 /* Structure                                                                 */
416 /*****************************************************************************/
417 
418 /**
419 ******************************************************************************
420 *  @brief     Structure to store TU prms req. for enc_loop only
421 ******************************************************************************
422 */
423 typedef struct
424 {
425     /** Zero_col info. for the current TU Luma */
426     UWORD32 u4_luma_zero_col;
427     /** Zero_row info. for the current TU Luma */
428     UWORD32 u4_luma_zero_row;
429 
430     /** Zero_col info. for the current TU Chroma Cb */
431     UWORD32 au4_cb_zero_col[2];
432     /** Zero_row info. for the current TU Chroma Cb */
433     UWORD32 au4_cb_zero_row[2];
434     /** Zero_col info. for the current TU Chroma Cr */
435     UWORD32 au4_cr_zero_col[2];
436     /** Zero_row info. for the current TU Chroma Cr */
437     UWORD32 au4_cr_zero_row[2];
438 
439     /** bytes consumed by the luma ecd data */
440     WORD16 i2_luma_bytes_consumed;
441     /** bytes consumed by the Cb ecd data */
442     WORD16 ai2_cb_bytes_consumed[2];
443     /** bytes consumed by the Cr ecd data */
444     WORD16 ai2_cr_bytes_consumed[2];
445 
446     /** flag to re-evaluate IQ and Coeff data of luma in the final_recon
447     function. If zero, uses the data from RDOPT cand.                   */
448     UWORD16 b1_eval_luma_iq_and_coeff_data : 1;
449     /** flag to re-evaluate IQ and Coeff data of chroma in the final_recon
450     function. If zero, uses the data from RDOPT cand.                   */
451     UWORD16 b1_eval_chroma_iq_and_coeff_data : 1;
452 
453     /* TO DO : No support now, need to add. Always comapre ZERO_CBF cost */
454     /** Luma ZERO_CBF cost is compared with residue coding cost only if this
455     flag is enabled */
456     UWORD16 b1_eval_luma_zero_cbf_cost : 1;
457     /** Chroma ZERO_CBF cost is compared with residue coding cost only if this
458     flag is enabled */
459     UWORD16 b1_eval_chroma_zero_cbf_cost : 1;
460 
461     /** Reserved to make WORD32 alignment */
462     UWORD16 b12_reserved : 12;
463 
464 } tu_enc_loop_temp_prms_t;
465 
466 typedef struct recon_datastore_t
467 {
468     /* 2 to store current and best */
469     void *apv_luma_recon_bufs[2];
470 
471     /* 0 to store cur chroma mode recon */
472     /* 1 to store winning independent chroma mode with a single TU's recon */
473     /* 2 to store winning independent chroma mode with 4 TUs' recon */
474     void *apv_chroma_recon_bufs[3];
475 
476     /* The following two arrays are used to store the ID's of the buffers */
477     /* where the winning recon is being stored */
478     /* For Luma buffers, the permissible values are 0, 1 and UCHAR_MAX */
479     /* For Chroma buffers, the permissible values are 0, 1, 2 and UCHAR_MAX */
480     /* The value 'UCHAR_MAX' indicates the absence of Recon for that particular TU */
481     UWORD8 au1_bufId_with_winning_LumaRecon[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
482 
483     /* 2 - 2 Chroma planes */
484     /* 2 - 2 possible subTU's */
485     UWORD8 au1_bufId_with_winning_ChromaRecon[2][MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW][2];
486 
487     WORD32 i4_lumaRecon_stride;
488 
489     WORD32 i4_chromaRecon_stride;
490 
491     UWORD8 au1_is_chromaRecon_available[3];
492 
493     UWORD8 u1_is_lumaRecon_available;
494 
495 } recon_datastore_t;
496 
497 typedef struct enc_loop_cu_final_prms_t
498 {
499     recon_datastore_t s_recon_datastore;
500 
501     /**
502     * Cu size of the current cu being processed
503     */
504     UWORD8 u1_cu_size;
505     /**
506     * flags to indicate the final cu prediction mode
507     */
508     UWORD8 u1_intra_flag;
509 
510     /**
511     * flags to indicate Skip mode for CU
512     */
513     UWORD8 u1_skip_flag;
514 
515     /**
516     * number of tu in current cu for a given mode
517     * if skip then this value should be 1
518     */
519     UWORD16 u2_num_tus_in_cu;
520 
521     /**
522     * number of pu in current cu for a given mode
523     * if skip then this value should be 1
524     */
525     UWORD16 u2_num_pus_in_cu;
526 
527     /**
528     * total bytes produced in ECD data buffer
529     * if skip then this value should be 0
530     */
531     WORD32 i4_num_bytes_ecd_data;
532 
533     /**
534     * Partition mode of the best candidate
535     * if skip then this value should be SIZE_2Nx2N
536     * @sa PART_SIZE_E
537     */
538     UWORD8 u1_part_mode;
539 
540     /**
541     * indicates if inter cu has coded coeffs 1: coded, 0: not coded
542     * if skip then this value shoudl be ignored
543     */
544     UWORD8 u1_is_cu_coded;
545 
546     /**
547     * Chroma pred mode as signalled in bitstream
548     */
549     UWORD8 u1_chroma_intra_pred_mode;
550 
551     /**
552     * To store the best chroma mode for TU. Will be same for NxN case.
553     * Actual Chroma pred
554     */
555     UWORD8 u1_chroma_intra_pred_actual_mode;
556 
557     /**
558     * sad accumulated over all Tus of given CU
559     */
560     UWORD32 u4_cu_sad;
561 
562     /**
563     * sad accumulated over all Tus of given CU
564     */
565     LWORD64 i8_cu_ssd;
566 
567     /**
568     * open loop intra sad
569     */
570     UWORD32 u4_cu_open_intra_sad;
571 
572     /**
573     * header bits of cu estimated during RDO evaluation.
574     * Includes tu splits flags excludes cbf flags
575     */
576     UWORD32 u4_cu_hdr_bits;
577     /**
578     * luma residual bits of a cu estimated during RDO evaluation.
579     */
580     UWORD32 u4_cu_luma_res_bits;
581 
582     /**
583     * chroma residual bits of a cu estimated during RDO evaluation.
584     */
585     UWORD32 u4_cu_chroma_res_bits;
586 
587     /**
588     * cbf bits of a cu estimated during RDO evaluation (considered as part of texture bits later)
589     */
590     UWORD32 u4_cu_cbf_bits;
591 
592     /**
593     * array of PU for current CU
594     * For Inter PUs this will contain the follwoing
595     *   - merge flag
596     *   - (MVD and reference indicies) or (Merge Index)
597     *   - (if Cu is skipped then Merge index for skip
598     *      will be in 1st PU entry in array)
599     * for intra PU only intra flag will be set to 1
600     *
601     */
602     pu_t as_pu_enc_loop[NUM_PU_PARTS];
603 
604     /**
605     * array of PU for chroma usage
606     * in case of Merge MVs and reference idx of the final candidate
607     * used by luma need sto be stored
608     * for intra PU this will not be used
609     */
610     pu_t as_pu_chrm_proc[NUM_PU_PARTS];
611 
612     /**
613     * array of colocated PU for current CU
614     * MV and Ref pic id should be stored in this
615     * for intra PU only intra flag will be set to 1
616     */
617     pu_col_mv_t as_col_pu_enc_loop[NUM_INTER_PU_PARTS];
618 
619     /** array to store the intra mode pred related params
620     * if nxn mode the all 4 lcoations will be used
621     */
622     intra_prev_rem_flags_t as_intra_prev_rem[NUM_PU_PARTS];
623 
624     /**
625     * array to store TU propeties of the each tu in a CU
626     */
627     tu_enc_loop_out_t as_tu_enc_loop[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
628 
629     /**
630     * array to store TU propeties (req. for enc_loop only and not for
631     * entropy) of the each tu in a CU
632     */
633     tu_enc_loop_temp_prms_t as_tu_enc_loop_temp_prms[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
634 
635     /**
636     * Neighbour flags stored for chroma reuse
637     */
638     UWORD32 au4_nbr_flags[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
639 
640     /**
641     * intra pred modes stored for chroma reuse
642     */
643     UWORD8 au1_intra_pred_mode[4];
644 
645     /**
646     * array for storing coeffs during RD opt stage at CU level.
647     * Luma and chroma together
648     */
649     UWORD8 *pu1_cu_coeffs;
650 
651     /**
652     * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
653     */
654     WORD32 i4_chrm_cu_coeff_strt_idx;
655 
656     /**
657     * array for storing dequantized vals. during RD opt stage at CU level
658     * Luma and chroma together.
659     * Stride is assumed to be cu_size
660     * u-v interleaved storing is at TU level
661     */
662     WORD16 *pi2_cu_deq_coeffs;
663 
664     /**
665     * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
666     */
667     WORD32 i4_chrm_deq_coeff_strt_idx;
668 
669     /**
670     * The total RDOPT cost of the CU for the best mode
671     */
672     LWORD64 i8_best_rdopt_cost;
673 
674     /**
675     * The current running RDOPT cost for the current mode
676     */
677     LWORD64 i8_curr_rdopt_cost;
678 
679     LWORD64 i8_best_distortion;
680 
681 } enc_loop_cu_final_prms_t;
682 
683 typedef struct
684 {
685     /** Current Cu chroma recon pointer in pic buffer */
686     UWORD8 *pu1_final_recon;
687 
688     UWORD16 *pu2_final_recon;
689 
690     /** Current Cu chroma source pointer in pic buffer */
691     UWORD8 *pu1_curr_src;
692 
693     UWORD16 *pu2_curr_src;
694 
695     /** Current CU chroma reocn buffer stride */
696     WORD32 i4_chrm_recon_stride;
697 
698     /** Current CU chroma source buffer stride */
699     WORD32 i4_chrm_src_stride;
700 
701     /** Current Cu chroma Left pointer for intra pred */
702     UWORD8 *pu1_cu_left;
703 
704     UWORD16 *pu2_cu_left;
705 
706     /** Left buffer stride */
707     WORD32 i4_cu_left_stride;
708 
709     /** Current Cu chroma top pointer for intra pred */
710     UWORD8 *pu1_cu_top;
711 
712     UWORD16 *pu2_cu_top;
713 
714     /** Current Cu chroma top left pointer for intra pred */
715     UWORD8 *pu1_cu_top_left;
716 
717     UWORD16 *pu2_cu_top_left;
718 
719 } enc_loop_chrm_cu_buf_prms_t;
720 
721 typedef struct
722 {
723     /** cost of the current satd cand */
724     WORD32 i4_cost;
725 
726     /** tu size w.r.t to cu of the current satd cand
727     * @sa TU_SIZE_WRT_CU_T
728     */
729     WORD8 i4_tu_depth;
730 
731     /**
732     *  access valid number of entries in this array based on u1_part_size
733     */
734     UWORD8 au1_intra_luma_modes[NUM_PU_PARTS];
735 
736     /** @remarks u1_part_size 2Nx2N or  NxN  */
737     UWORD8 u1_part_mode; /* @sa: PART_SIZE_E */
738 
739     /** Flag to indicate whether current candidate needs to be evaluated */
740     UWORD8 u1_eval_flag;
741 
742 } cu_intra_satd_out_t;
743 
744 /** \brief cu level parameters for SATD / RDOPT function */
745 
746 typedef struct
747 {
748     /** pointer to source luma pointer
749     *  pointer will be pointing to CTB start location
750     *  At CU level based on the CU position this pointer
751     *  has to appropriately incremented
752     */
753     UWORD8 *pu1_luma_src;
754 
755     UWORD16 *pu2_luma_src;
756 
757     /** pointer to source chroma pointer
758     *  pointer will be pointing to CTB start location
759     *  At CU level based on the CU position this pointer
760     *  has to appropriately incremented
761     */
762     UWORD8 *pu1_chrm_src;
763 
764     UWORD16 *pu2_chrm_src;
765 
766     /** pointer to recon luma pointer
767     *  pointer will be pointing to CTB start location
768     *  At CU level based on the CU position this pointer
769     *  has to appropriately incremented
770     */
771     UWORD8 *pu1_luma_recon;
772 
773     UWORD16 *pu2_luma_recon;
774 
775     /** pointer to recon chroma pointer
776     *  pointer will be pointing to CTB start location
777     *  At CU level based on the CU position this pointer
778     *  has to appropriately incremented
779     */
780     UWORD8 *pu1_chrm_recon;
781 
782     UWORD16 *pu2_chrm_recon;
783 
784     /*1st pass parallel dpb buffer pointers aimilar to the above*/
785     UWORD8 *pu1_luma_recon_src;
786 
787     UWORD16 *pu2_luma_recon_src;
788 
789     UWORD8 *pu1_chrm_recon_src;
790 
791     UWORD16 *pu2_chrm_recon_src;
792 
793     /** Pointer to Subpel Plane Buffer */
794     UWORD8 *pu1_sbpel_hxfy;
795 
796     /** Pointer to Subpel Plane Buffer */
797     UWORD8 *pu1_sbpel_fxhy;
798 
799     /** Pointer to Subpel Plane Buffer */
800     UWORD8 *pu1_sbpel_hxhy;
801 
802     /** Luma source stride */
803     WORD32 i4_luma_src_stride;
804 
805     /** chroma soruce stride */
806     WORD32 i4_chrm_src_stride;
807 
808     /** Luma recon stride */
809     WORD32 i4_luma_recon_stride;
810 
811     /** chroma recon stride */
812     WORD32 i4_chrm_recon_stride;
813 
814     /** ctb size */
815     WORD32 i4_ctb_size;
816 
817     /** current ctb postion horz */
818     WORD32 i4_ctb_pos;
819 
820     /** number of PU finalized for curr CU  */
821     WORD32 i4_num_pus_in_cu;
822 
823     /** number of bytes consumed for current in ecd data buf */
824     WORD32 i4_num_bytes_cons;
825 
826     UWORD8 u1_is_cu_noisy;
827 
828     UWORD8 *pu1_is_8x8Blk_noisy;
829 
830 } enc_loop_cu_prms_t;
831 
832 /**
833 ******************************************************************************
834 *  @brief Pad inter pred recon context
835 ******************************************************************************
836 */
837 typedef struct
838 {
839     /** Pointer to Subpel Plane Buffer */
840     UWORD8 *pu1_sbpel_hxfy;
841 
842     /** Pointer to Subpel Plane Buffer */
843     UWORD8 *pu1_sbpel_fxhy;
844 
845     /** Pointer to Subpel Plane Buffer */
846     UWORD8 *pu1_sbpel_hxhy;
847 
848     /** pointer to recon luma pointer
849     *  pointer will be pointing to CTB start location
850     *  At CU level based on the CU position this pointer
851     *  has to appropriately incremented
852     */
853     UWORD8 *pu1_luma_recon;
854 
855     /** pointer to recon chroma pointer
856     *  pointer will be pointing to CTB start location
857     *  At CU level based on the CU position this pointer
858     *  has to appropriately incremented
859     */
860     UWORD8 *pu1_chrm_recon;
861 
862     /*FOr recon source 1st pass starts*/
863 
864     UWORD8 *pu1_luma_recon_src;
865 
866     /** pointer to recon chroma pointer
867     *  pointer will be pointing to CTB start location
868     *  At CU level based on the CU position this pointer
869     *  has to appropriately incremented
870     */
871     UWORD8 *pu1_chrm_recon_src;
872     /*FOr recon source 1st pass ends */
873     /** Luma recon stride */
874     WORD32 i4_luma_recon_stride;
875 
876     /** chroma recon stride */
877     WORD32 i4_chrm_recon_stride;
878 
879     /** ctb size */
880     WORD32 i4_ctb_size;
881 
882     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
883     UWORD8 u1_chroma_array_type;
884 
885 } pad_interp_recon_frm_t;
886 
887 /**
888 ******************************************************************************
889 *  @brief inter prediction (MC) context for enc loop
890 ******************************************************************************
891 */
892 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
893 typedef struct
894 {
895     /** pointer to reference lists */
896     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
897 
898     /** scratch buffer for horizontal interpolation destination */
899     WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
900 
901     /** scratch 16 bit buffer for interpolation in l0 direction */
902     WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
903 
904     /** scratch 16 bit buffer for interpolation in l1 direction */
905     WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
906 
907     /** Pointer to struct containing function pointers to
908     functions in the 'common' library' */
909     func_selector_t *ps_func_selector;
910 
911     /** common denominator used for luma weights */
912     WORD32 i4_log2_luma_wght_denom;
913 
914     /** common denominator used for chroma weights */
915     WORD32 i4_log2_chroma_wght_denom;
916 
917     /**  offset w.r.t frame start in horz direction (pels) */
918     WORD32 i4_ctb_frm_pos_x;
919 
920     /**  offset w.r.t frame start in vert direction (pels) */
921     WORD32 i4_ctb_frm_pos_y;
922 
923     /* Bit Depth of Input */
924     WORD32 i4_bit_depth;
925 
926     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
927     UWORD8 u1_chroma_array_type;
928 
929     /** weighted_pred_flag      */
930     WORD8 i1_weighted_pred_flag;
931 
932     /** weighted_bipred_flag    */
933     WORD8 i1_weighted_bipred_flag;
934 
935     /** Structure to describe extra CTBs around frame due to search
936     range associated with distributed-mode. Entries are top, left,
937     right and bottom */
938     WORD32 ai4_tile_xtra_pel[4];
939 
940 } inter_pred_ctxt_t;
941 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
942 
943 typedef IV_API_CALL_STATUS_T (*PF_LUMA_INTER_PRED_PU)(
944     void *pv_inter_pred_ctxt,
945     pu_t *ps_pu,
946     void *pv_dst_buf,
947     WORD32 dst_stride,
948     WORD32 i4_flag_inter_pred_source);
949 
950 /**
951 ******************************************************************************
952 *  @brief  Motion predictor context structure
953 ******************************************************************************
954 */
955 typedef struct
956 {
957     /** pointer to reference lists */
958     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
959 
960     /** pointer to the slice header */
961     slice_header_t *ps_slice_hdr;
962 
963     /** pointer to SPS */
964     sps_t *ps_sps;
965 
966     /** CTB x. In CTB unit*/
967     WORD32 i4_ctb_x;
968 
969     /** CTB y. In CTB unit */
970     WORD32 i4_ctb_y;
971 
972     /** Log2 Parallel Merge Level - 2  */
973     WORD32 i4_log2_parallel_merge_level_minus2;
974 
975     /* Number of extra CTBs external to tile due to fetched search-range around Tile */
976     /* TOP, left, right and bottom */
977     WORD32 ai4_tile_xtra_ctb[4];
978 
979 } mv_pred_ctxt_t;
980 
981 /**
982 ******************************************************************************
983 *  @brief  Deblocking and Boundary strength CTB level structure
984 ******************************************************************************
985 */
986 typedef struct
987 {
988     /** Array to store the packed BS values in horizontal direction  */
989     UWORD32 au4_horz_bs[(MAX_CTB_SIZE >> 3) + 1];
990 
991     /** Array to store the packed BS values in vertical direction  */
992     UWORD32 au4_vert_bs[(MAX_CTB_SIZE >> 3) + 1];
993 
994     /** CTB neighbour availability flags for deblocking */
995     UWORD8 u1_not_first_ctb_col_of_frame;
996     UWORD8 u1_not_first_ctb_row_of_frame;
997 
998 } deblk_bs_ctb_ctxt_t;
999 
1000 /**
1001 ******************************************************************************
1002 *  @brief  Deblocking and CTB level structure
1003 ******************************************************************************
1004 */
1005 typedef struct
1006 {
1007     /**
1008     * BS of the last vertical 4x4 column of previous CTB
1009     */
1010     UWORD8 au1_prev_bs[MAX_CTB_SIZE >> 3];
1011 
1012     /**
1013     * BS of the last vertical 4x4 column of previous CTB
1014     */
1015     UWORD8 au1_prev_bs_uv[MAX_CTB_SIZE >> 3];
1016 
1017     /** pointer to top 4x4 ctb nbr structure; for accessing qp  */
1018     nbr_4x4_t *ps_top_ctb_nbr_4x4;
1019 
1020     /** pointer to left 4x4 ctb nbr structure; for accessing qp */
1021     nbr_4x4_t *ps_left_ctb_nbr_4x4;
1022 
1023     /** pointer to current 4x4 ctb nbr structure; for accessing qp */
1024     nbr_4x4_t *ps_cur_ctb_4x4;
1025 
1026     /** max of 8 such contiguous bs to be computed for 64x64 ctb */
1027     UWORD32 *pu4_bs_horz;
1028 
1029     /** max of 8 such contiguous bs to be computed for 64x64 ctb */
1030     UWORD32 *pu4_bs_vert;
1031 
1032     /** ptr to current ctb luma pel in frame */
1033     UWORD8 *pu1_ctb_y;
1034 
1035     UWORD16 *pu2_ctb_y;
1036 
1037     /** ptr to current ctb sp interleaved chroma pel in frame */
1038     UWORD8 *pu1_ctb_uv;
1039 
1040     UWORD16 *pu2_ctb_uv;
1041 
1042     func_selector_t *ps_func_selector;
1043 
1044     /** left nbr buffer stride in terms of 4x4 units */
1045     WORD32 i4_left_nbr_4x4_strd;
1046 
1047     /** current  buffer stride in terms of 4x4 units */
1048     WORD32 i4_cur_4x4_strd;
1049 
1050     /** size in pels 16 / 32 /64 */
1051     WORD32 i4_ctb_size;
1052 
1053     /** stride for luma       */
1054     WORD32 i4_luma_pic_stride;
1055 
1056     /** stride for  chroma */
1057     WORD32 i4_chroma_pic_stride;
1058 
1059     /** boolean indicating if left ctb edge is to be deblocked or not */
1060     WORD32 i4_deblock_left_ctb_edge;
1061 
1062     /** boolean indicating if top ctb edge is to be deblocked or not */
1063     WORD32 i4_deblock_top_ctb_edge;
1064 
1065     /** beta offset index */
1066     WORD32 i4_beta_offset_div2;
1067 
1068     /** tc offset index */
1069     WORD32 i4_tc_offset_div2;
1070 
1071     /** chroma cb qp offset index */
1072     WORD32 i4_cb_qp_indx_offset;
1073 
1074     /** chroma cr qp offset index */
1075     WORD32 i4_cr_qp_indx_offset;
1076 
1077     WORD32 i4_bit_depth;
1078 
1079     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
1080     UWORD8 u1_chroma_array_type;
1081 
1082 } deblk_ctb_params_t;
1083 
1084 /**
1085 ******************************************************************************
1086 *  @brief  Stores the BS and Qp of a CTB row. For CTB-row level deblocking
1087 ******************************************************************************
1088 */
1089 typedef struct deblk_ctbrow_prms
1090 {
1091     /**
1092     * Refer to ihevce_enc_loop_get_mem_recs() and
1093     * ihevce_enc_loop_init()for more info
1094     * regarding memory allocation to each one below.
1095     */
1096 
1097     /**
1098     * Stores the vertical boundary strength of a CTB row.
1099     */
1100     UWORD32 *pu4_ctb_row_bs_vert;
1101 
1102     /**
1103     * Storage is same as above. Contains horizontal BS.
1104     */
1105     UWORD32 *pu4_ctb_row_bs_horz;
1106 
1107     /**
1108     * Pointer to the CTB row's Qp storage
1109     */
1110     WORD8 *pi1_ctb_row_qp;
1111 
1112     /**
1113     * Stride of the pu1_ctb_row_qp_p buffer in WORD32 unit
1114     */
1115     WORD32 u4_qp_buffer_stride;
1116 
1117     /*
1118     *   Pointer to the  memory which contains the Qp of
1119     *   top4x4 neighbour blocks for each CTB row.
1120     *   This memory is at frame level.
1121     */
1122     WORD8 *api1_qp_top_4x4_ctb_row[MAX_NUM_ENC_LOOP_PARALLEL];
1123 
1124     /*
1125     *   Stride of the above memory location.
1126     *   Values in one-stride correspondes to one CTB row.
1127     */
1128     WORD32 u4_qp_top_4x4_buf_strd;
1129 
1130     /*size of frm level qp buffer*/
1131     WORD32 u4_qp_top_4x4_buf_size;
1132 
1133 } deblk_ctbrow_prms_t;
1134 
1135 /**
1136 ******************************************************************************
1137 *  @brief  Entropy rd opt context for cabac bit estimation and RDO
1138 ******************************************************************************
1139 */
1140 typedef struct rdopt_entropy_ctxt
1141 {
1142     /**
1143     * array for entropy contexts during RD opt stage at CU level
1144     * one best and one current is required
1145     */
1146     entropy_context_t as_cu_entropy_ctxt[2];
1147 
1148     /**
1149     * init state of entropy context models during CU RD opt stage,
1150     * required for saving and restoring the cabac states
1151     */
1152     UWORD8 au1_init_cabac_ctxt_states[IHEVC_CAB_CTXT_END];
1153 
1154     /*
1155     * ptr to top row cu skip flags (1 bit per 8x8CU)
1156     */
1157     UWORD8 *pu1_cu_skip_top_row;
1158 
1159     /**
1160     * Current entropy ctxt idx
1161     */
1162     WORD32 i4_curr_buf_idx;
1163 
1164 } rdopt_entropy_ctxt_t;
1165 
1166 /**
1167 ******************************************************************************
1168 *  @brief  structure to save predicted data from Inter SATD stage to Inter RD opt stage
1169 ******************************************************************************
1170 */
1171 typedef struct
1172 {
1173     /*Buffer to store the predicted data after motion compensation for merge and
1174     * skip candidates.
1175     * [2] Because for a given candidate we do motion compensation for 5 merge candidates.
1176     *     store the pred data after mc for the first 2 candidates and from 3rd candidate
1177     *     onwards, overwrite the data which has higher SATD cost.
1178     */
1179     void *apv_pred_data[2];
1180 
1181     /** Stride to store the predicted data
1182     */
1183     WORD32 i4_pred_data_stride;
1184 
1185 } merge_skip_pred_data_t;
1186 /**
1187 ******************************************************************************
1188 *  @brief  Structure to hold Rate control related parameters
1189 *          for each bit-rate instance and each thread
1190 ******************************************************************************
1191 */
1192 typedef struct
1193 {
1194     /**
1195     *frame level open loop intra sad
1196     *
1197     */
1198     LWORD64 i8_frame_open_loop_ssd;
1199 
1200     /**
1201     *frame level open loop intra sad
1202     *
1203     */
1204     UWORD32 u4_frame_open_loop_intra_sad;
1205     /**
1206     * frame level intra sad accumulator
1207     */
1208     UWORD32 u4_frame_intra_sad;
1209 
1210     /**
1211     *  frame level sad accumulator
1212     */
1213     UWORD32 u4_frame_sad_acc;
1214 
1215     /**
1216     *  frame level intra sad accumulator
1217     */
1218     UWORD32 u4_frame_inter_sad_acc;
1219 
1220     /**
1221     *  frame level inter sad accumulator
1222     */
1223     UWORD32 u4_frame_intra_sad_acc;
1224 
1225     /**
1226     *  frame level cost accumulator
1227     */
1228     LWORD64 i8_frame_cost_acc;
1229 
1230     /**
1231     *  frame level intra cost accumulator
1232     */
1233     LWORD64 i8_frame_inter_cost_acc;
1234 
1235     /**
1236     *  frame level inter cost accumulator
1237     */
1238     LWORD64 i8_frame_intra_cost_acc;
1239 
1240     /**
1241     * frame level rdopt bits accumulator
1242     */
1243     UWORD32 u4_frame_rdopt_bits;
1244 
1245     /**
1246     * frame level rdopt header bits accumulator
1247     */
1248     UWORD32 u4_frame_rdopt_header_bits;
1249 
1250     /* Sum the Qps of each 8*8 block in CU
1251     * 8*8 block is considered as Min CU size possible as per standard is 8
1252     * 0 corresponds to INTER and 1 corresponds to INTRA
1253     */
1254     WORD32 i4_qp_normalized_8x8_cu_sum[2];
1255 
1256     /* Count the number of 8x8 blocks in each CU type (INTER/INTRA)
1257     * 0 corresponds to INTER and 1 corresponds to INTRA
1258     */
1259     WORD32 i4_8x8_cu_sum[2];
1260 
1261     /* SAD/Qscale accumulated over all CUs. CU size is inherently
1262     * taken care in SAD
1263     */
1264     LWORD64 i8_sad_by_qscale[2];
1265 
1266 } enc_loop_rc_params_t;
1267 /**
1268 ******************************************************************************
1269 *  @brief  CU information structure. This is to store the
1270 *  CU final out after Recursion
1271 ******************************************************************************
1272 */
1273 typedef struct ihevce_enc_cu_node_ctxt_t
1274 {
1275     /* CU params */
1276     /** CU X position in terms of min CU (8x8) units */
1277     UWORD8 b3_cu_pos_x : 3;
1278 
1279     /** CU Y position in terms of min CU (8x8) units */
1280     UWORD8 b3_cu_pos_y : 3;
1281 
1282     /** reserved bytes */
1283     UWORD8 b2_reserved : 2;
1284 
1285     /** CU size 2N (width or height) in pixels */
1286     UWORD8 u1_cu_size;
1287 
1288     /**
1289     * array for storing cu level final params for a given mode
1290     * one best and one current is required
1291     */
1292     enc_loop_cu_final_prms_t s_cu_prms;
1293 
1294     /**
1295     * array for storing cu level final params for a given mode
1296     * one best and one current is required
1297     */
1298     enc_loop_cu_final_prms_t *ps_cu_prms;
1299 
1300     /* flag to indicate if current CU is the first
1301     CU of the Quantisation group*/
1302     UWORD32 b1_first_cu_in_qg : 1;
1303 
1304     /** qp used during for CU
1305     * @remarks :
1306     */
1307     WORD8 i1_cu_qp;
1308 
1309 } ihevce_enc_cu_node_ctxt_t;
1310 
1311 typedef struct
1312 {
1313     WORD32 i4_sad;
1314 
1315     WORD32 i4_mv_cost;
1316 
1317     WORD32 i4_tot_cost;
1318 
1319     WORD8 i1_ref_idx;
1320 
1321     mv_t s_mv;
1322 
1323 } block_merge_nodes_t;
1324 
1325 /**
1326 ******************************************************************************
1327 *  @brief  This struct is used for storing output of block merge
1328 ******************************************************************************
1329 */
1330 typedef struct
1331 {
1332     block_merge_nodes_t *aps_best_results[MAX_NUM_PARTS];
1333 
1334     /* Contains the best uni dir for each partition type */
1335     WORD32 ai4_best_uni_dir[MAX_NUM_PARTS];
1336 
1337     /* Contains the best pred dir for each partition type */
1338     WORD32 ai4_best_pred_dir[MAX_NUM_PARTS];
1339 
1340     WORD32 i4_tot_cost;
1341 
1342     PART_TYPE_T e_part_type;
1343 } block_merge_results_t;
1344 
1345 /**
1346 ******************************************************************************
1347 *  @brief  This struct is used for storing output of block merge and also
1348 *          all of the intermediate results required
1349 ******************************************************************************
1350 */
1351 typedef struct
1352 {
1353     block_merge_results_t as_best_results[3 + 1][NUM_BEST_ME_OUTPUTS];
1354 
1355     block_merge_nodes_t as_nodes[3][TOT_NUM_PARTS][NUM_BEST_ME_OUTPUTS];
1356 
1357     WORD32 part_mask;
1358 
1359     WORD32 num_results_per_part;
1360 
1361     WORD32 num_best_results;
1362 
1363     /**
1364     * Overall best CU cost, while other entries store CU costs
1365     * in single direction, this is best CU cost, where each
1366     * partition cost is evaluated as best of uni/bi
1367     */
1368     WORD32 best_cu_cost;
1369 
1370 } block_merge_data_t;
1371 /**
1372 ******************************************************************************
1373 *  @brief  CU nbr information structure. This is to store the
1374 *  neighbour information for final reconstruction function
1375 ******************************************************************************
1376 */
1377 typedef struct
1378 {
1379     /* Pointer to top-left nbr */
1380     nbr_4x4_t *ps_topleft_nbr_4x4;
1381     /* Pointer to left nbr */
1382     nbr_4x4_t *ps_left_nbr_4x4;
1383     /* Pointer to top nbr */
1384     nbr_4x4_t *ps_top_nbr_4x4;
1385     /* stride of left_nbr_4x4 */
1386     WORD32 nbr_4x4_left_strd;
1387 
1388     /* Pointer to CU top */
1389     UWORD8 *pu1_cu_top;
1390 
1391     UWORD16 *pu2_cu_top;
1392 
1393     /* Pointer to CU top-left */
1394     UWORD8 *pu1_cu_top_left;
1395 
1396     UWORD16 *pu2_cu_top_left;
1397 
1398     /* Pointer to CU left */
1399     UWORD8 *pu1_cu_left;
1400 
1401     UWORD16 *pu2_cu_left;
1402 
1403     /* stride of left pointer */
1404     WORD32 cu_left_stride;
1405 } cu_nbr_prms_t;
1406 
1407 /** Structure to save the flags required for Final mode Reconstruction
1408 function. These flags are set based on quality presets and
1409 the bit-rate we are working on */
1410 typedef struct
1411 {
1412     /** Flag to indicate whether Luma pred data need to recomputed in the
1413     final_recon function. Now disabled for all modes */
1414     UWORD8 u1_eval_luma_pred_data;
1415 
1416     /** Flag to indicate whether Chroma pred data need to recomputed in the
1417     final_recon function. Now disabled for MedSpeed only */
1418     UWORD8 u1_eval_chroma_pred_data;
1419 
1420     /** Flag to indicate whether header data need to recomputed in the
1421     final_recon function. Now disabled for all modes */
1422     UWORD8 u1_eval_header_data;
1423 
1424     UWORD8 u1_eval_recon_data;
1425 } cu_final_recon_flags_t;
1426 
1427 /**
1428 ******************************************************************************
1429 *  @brief  structure to save pred data of ME cand. 1 ping-pong to store the
1430 *  the best and current luma cand. 1 buffer to store the best chroma pred
1431 ******************************************************************************
1432 */
1433 typedef struct
1434 {
1435     /** Pointers to store luma pred data of me/intra cand.(2) and chroma(1) */
1436     UWORD8 *pu1_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
1437 
1438     UWORD16 *pu2_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
1439 
1440     /** Stride to store the predicted data of me/intra cand.(2) and chroma(1) */
1441     WORD32 ai4_pred_data_stride[NUM_CU_ME_INTRA_PRED_IDX];
1442     /** Counter saying how many pointers are assigned */
1443     WORD32 i4_pointer_count;
1444 
1445 } cu_me_intra_pred_prms_t;
1446 
1447 /**
1448 ******************************************************************************
1449 *  @brief  Chroma RDOPT context structure
1450 ******************************************************************************
1451 */
1452 typedef struct
1453 {
1454     /** Storing the inverse quantized data (cb) for the special modes*/
1455     WORD16 ai2_iq_data_cb[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
1456 
1457     /** Storing the inverse quantized data (cr) for the special modes*/
1458     WORD16 ai2_iq_data_cr[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
1459 
1460     /** Storing the scan coeffs (cb) for the special modes*/
1461     UWORD8 au1_scan_coeff_cb[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
1462 
1463     /** Storing the scan coeffs (cb) for the special modes*/
1464     UWORD8 au1_scan_coeff_cr[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
1465 
1466     /** Max number of bytes filled in scan coeff data (cb) per TU*/
1467     WORD32 ai4_num_bytes_scan_coeff_cb_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
1468 
1469     /** Max number of bytes filled in scan coeff data (cr) per TU*/
1470     WORD32 ai4_num_bytes_scan_coeff_cr_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
1471 
1472     /** Stride of the iq buffer*/
1473     WORD32 i4_iq_buff_stride;
1474 
1475     /** Storing the pred data
1476     The predicted data is always interleaved. Therefore the size of this array will be
1477     ((MAX_TU_SIZE * MAX_TU_SIZE) >> 2) * 2)*/
1478     void *pv_pred_data;
1479 
1480     /** Predicted data stride*/
1481     WORD32 i4_pred_stride;
1482 
1483     /** Storing the cbfs for each tu
1484     For 1 tu case, only the 0th element will be valid*/
1485     UWORD8 au1_cbf_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1486 
1487     /** Storing the cbfs for each tu
1488     For 1 tu case, only the 0th element will be valid*/
1489     UWORD8 au1_cbf_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1490 
1491     /** To store the cabac ctxt model updated by the RDOPT of best chroma mode
1492     [0] : for 1 TU case, [1] : for 4 TU case */
1493     UWORD8 au1_chrm_satd_updated_ctxt_models[IHEVC_CAB_CTXT_END];
1494 
1495     /** Best SATD chroma mode, [0] : for 1 TU case (TU_EQ_CU) , [1] : for 4 TU case
1496     Values : 0(PLANAR), 1(VERT), 2(HOR), 3(DC) chroma mode per each TU */
1497     UWORD8 u1_best_cr_mode;
1498 
1499     /** Best SATD chroma mode's RDOPT cost, [0] : for 1 TU case, [1] : for 4 TU case */
1500     LWORD64 i8_chroma_best_rdopt;
1501 
1502     /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
1503     /* This is done by adding the bits for signalling chroma mode (0-3)    */
1504     /* and subtracting the bits for chroma mode same as luma mode (4)      */
1505     LWORD64 i8_cost_to_encode_chroma_mode;
1506 
1507     /** Best SATD chroma mode's tu bits, [0] : for 1 TU case, [1] : for 4 TU case */
1508     WORD32 i4_chrm_tu_bits;
1509 
1510     /** Storing the zero col values for each TU for cb*/
1511     WORD32 ai4_zero_col_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1512 
1513     /** Storing the zero col values for each TU for cr*/
1514     WORD32 ai4_zero_col_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1515 
1516     /** Storing the zero row values for each TU for cb*/
1517     WORD32 ai4_zero_row_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1518 
1519     /** Storing the zero row values for each TU for cr*/
1520     WORD32 ai4_zero_row_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1521 } chroma_intra_satd_ctxt_t;
1522 
1523 /**
1524 ******************************************************************************
1525 *  @brief  Chroma RDOPT context structure
1526 ******************************************************************************
1527 */
1528 typedef struct
1529 {
1530     /** Chroma SATD context structure. It is an array of two to account for the TU_EQ_CU candidate
1531     and the TU_EQ_CU_DIV2 candidate*/
1532     chroma_intra_satd_ctxt_t as_chr_intra_satd_ctxt[NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD];
1533 
1534     /** Chroma SATD has has to be evaluated only for the HIGH QUALITY */
1535     UWORD8 u1_eval_chrm_satd;
1536 
1537     /** Chroma RDOPT has to be evaluated only for the HIGH QUALITY / MEDIUM SPEED preset */
1538     UWORD8 u1_eval_chrm_rdopt;
1539 
1540 } ihevce_chroma_rdopt_ctxt_t;
1541 
1542 typedef struct
1543 {
1544     inter_cu_results_t s_cu_results;
1545 
1546     inter_pu_results_t s_pu_results;
1547 } block_merge_output_t;
1548 
1549 /**
1550 ******************************************************************************
1551 *  @brief  Structure to store the Merge/Skip Cand. for EncLoop
1552 ******************************************************************************
1553 */
1554 typedef struct
1555 {
1556     /** List of all  merge/skip candidates to be evalauted (SATD/RDOPT) for
1557     * this CU
1558     */
1559     cu_inter_cand_t as_cu_inter_merge_skip_cand[MAX_NUM_CU_MERGE_SKIP_CAND];
1560 
1561     /** number of merge/skip candidates
1562     */
1563     UWORD8 u1_num_merge_cands;
1564 
1565     UWORD8 u1_num_skip_cands;
1566 
1567     UWORD8 u1_num_merge_skip_cands;
1568 
1569 } cu_inter_merge_skip_t;
1570 
1571 /** Structure to store the Mixed mode Cand. for EncLoop */
1572 typedef struct
1573 {
1574     cu_inter_cand_t as_cu_data[MAX_NUM_MIXED_MODE_INTER_RDO_CANDS];
1575 
1576     UWORD8 u1_num_mixed_mode_type0_cands;
1577 
1578     UWORD8 u1_num_mixed_mode_type1_cands;
1579 
1580 } cu_mixed_mode_inter_t;
1581 
1582 typedef struct
1583 {
1584     /* +2 because an additional buffer is required for */
1585     /* storing both cur and best during merge eval */
1586     void *apv_inter_pred_data[MAX_NUM_INTER_RDO_CANDS + 4];
1587 
1588     /* Bit field used to determine the indices of free bufs in 'apv_pred_data' buf array */
1589     UWORD32 u4_is_buf_in_use;
1590 
1591     /* Assumption is that the same stride is used for the */
1592     /* entire set of buffers above and is equal to the */
1593     /* CU size */
1594     WORD32 i4_pred_stride;
1595 
1596 } ihevce_inter_pred_buf_data_t;
1597 /** Structure to store the Inter Cand. info in EncLoop */
1598 typedef struct
1599 {
1600     cu_inter_cand_t *aps_cu_data[MAX_NUM_INTER_RDO_CANDS];
1601 
1602     UWORD32 au4_cost[MAX_NUM_INTER_RDO_CANDS];
1603 
1604     UWORD8 au1_pred_buf_idx[MAX_NUM_INTER_RDO_CANDS];
1605 
1606     UWORD32 u4_src_variance;
1607 
1608     UWORD8 u1_idx_of_worst_cost_in_cost_array;
1609 
1610     UWORD8 u1_idx_of_worst_cost_in_pred_buf_array;
1611 
1612     UWORD8 u1_num_inter_cands;
1613 
1614 } inter_cu_mode_info_t;
1615 typedef struct
1616 {
1617     /*Frame level base pointer of buffers for each ctb row to store the top pixels
1618     *and top left pixel for the next ctb row.These buffers are common accross all threads
1619     */
1620     UWORD8 *apu1_sao_src_frm_top_luma[MAX_NUM_ENC_LOOP_PARALLEL];
1621     /*Ctb level pointer to buffer to store the top pixels
1622     *and top left pixel for the next ctb row.These buffers are common accross all threads
1623     */
1624     UWORD8 *pu1_curr_sao_src_top_luma;
1625     /*Buffer to store the left boundary before
1626     * doing sao on current ctb for the next ctb in the current row
1627     */
1628     UWORD8 au1_sao_src_left_luma[MAX_CTB_SIZE];
1629     /*Frame level base pointer of buffers for each ctb row to store the top pixels
1630     *and top left pixel for the next ctb row.These buffers are common accross all threads
1631     */
1632     UWORD8 *apu1_sao_src_frm_top_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
1633 
1634     WORD32 i4_frm_top_chroma_buf_stride;
1635 
1636     /*Ctb level pointer to buffer to store the top chroma pixels
1637     *and top left pixel for the next ctb row.These buffers are common accross all threads
1638     */
1639     UWORD8 *pu1_curr_sao_src_top_chroma;
1640 
1641     /*Scratch buffer to store the left boundary before
1642     * doing sao on current ctb for the next ctb in the current row
1643     */
1644     UWORD8 au1_sao_src_left_chroma[MAX_CTB_SIZE * 2];
1645 
1646     /**
1647     * Luma recon buffer
1648     */
1649     UWORD8 *pu1_frm_luma_recon_buf;
1650     /**
1651     * Chroma recon buffer
1652     */
1653     UWORD8 *pu1_frm_chroma_recon_buf;
1654     /**
1655     * Luma recon buffer for curr ctb
1656     */
1657     UWORD8 *pu1_cur_luma_recon_buf;
1658     /**
1659     * Chroma recon buffer for curr ctb
1660     */
1661     UWORD8 *pu1_cur_chroma_recon_buf;
1662     /**
1663     * Luma src buffer
1664     */
1665     UWORD8 *pu1_frm_luma_src_buf;
1666     /**
1667     * Chroma src buffer
1668     */
1669     UWORD8 *pu1_frm_chroma_src_buf;
1670     /**
1671     * Luma src(input yuv) buffer for curr ctb
1672     */
1673     UWORD8 *pu1_cur_luma_src_buf;
1674     /**
1675     * Chroma src buffer for curr ctb
1676     */
1677     UWORD8 *pu1_cur_chroma_src_buf;
1678     /* Left luma scratch buffer required for sao RD optimisation*/
1679     UWORD8 au1_left_luma_scratch[MAX_CTB_SIZE];
1680 
1681     /* Left chroma scratch buffer required for sao RD optimisation*/
1682     /* Min size required= MAX_CTB_SIZE/2 * 2
1683     * Multiplied by 2 because size reuired is MAX_CTB_SIZE/2 each for U and V
1684     */
1685     UWORD8 au1_left_chroma_scratch[MAX_CTB_SIZE * 2];
1686 
1687     /* Top luma scratch buffer required for sao RD optimisation*/
1688     UWORD8 au1_top_luma_scratch[MAX_CTB_SIZE + 2];  // +1 for top left pixel and +1 for top right
1689 
1690     /* Top chroma scratch buffer required for sao RD optimisation*/
1691     UWORD8 au1_top_chroma_scratch[MAX_CTB_SIZE + 4];  // +2 for top left pixel and +2 for top right
1692 
1693     /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
1694     /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
1695     MAX_CTB _SIZE + 4*/
1696     UWORD8 au1_sao_luma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
1697 
1698     /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
1699     /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
1700     MAX_CTB _SIZE + 4*/
1701     UWORD8 au1_sao_chroma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
1702 
1703     /**
1704     * CTB size
1705     */
1706     WORD32 i4_ctb_size;
1707     /**
1708     * Luma recon buffer stride
1709     */
1710     WORD32 i4_frm_luma_recon_stride;
1711     /**
1712     * Chroma recon buffer stride
1713     */
1714     WORD32 i4_frm_chroma_recon_stride;
1715     /**
1716     * Luma recon buffer stride for curr ctb
1717     */
1718     WORD32 i4_cur_luma_recon_stride;
1719     /**
1720     * Chroma recon buffer stride for curr ctb
1721     */
1722     WORD32 i4_cur_chroma_recon_stride;
1723     /**
1724     * Luma src buffer stride
1725     */
1726     WORD32 i4_frm_luma_src_stride;
1727     /**
1728     * Chroma src buffer stride
1729     */
1730     WORD32 i4_frm_chroma_src_stride;
1731 
1732     WORD32 i4_frm_top_luma_buf_stride;
1733     /**
1734     * Luma src buffer stride for curr ctb
1735     */
1736     WORD32 i4_cur_luma_src_stride;
1737     /**
1738     * Chroma src buffer stride for curr ctb
1739     */
1740     WORD32 i4_cur_chroma_src_stride;
1741 
1742     /* Top luma buffer size */
1743     WORD32 i4_top_luma_buf_size;
1744 
1745     /* Top Chroma buffer size */
1746     WORD32 i4_top_chroma_buf_size;
1747 
1748     /*** Number of CTB units **/
1749     WORD32 i4_num_ctb_units;
1750 
1751     /**
1752     * CTB x pos
1753     */
1754     WORD32 i4_ctb_x;
1755     /**
1756     * CTB y pos
1757     */
1758     WORD32 i4_ctb_y;
1759     /* SAO block width*/
1760     WORD32 i4_sao_blk_wd;
1761 
1762     /* SAO block height*/
1763     WORD32 i4_sao_blk_ht;
1764 
1765     /* Last ctb row flag*/
1766     WORD32 i4_is_last_ctb_row;
1767 
1768     /* Last ctb col flag*/
1769     WORD32 i4_is_last_ctb_col;
1770 
1771     /* CTB aligned width */
1772     UWORD32 u4_ctb_aligned_wd;
1773 
1774     /* Number of ctbs in a row*/
1775     UWORD32 u4_num_ctbs_horz;
1776 
1777     UWORD32 u4_num_ctbs_vert;
1778     /**
1779     * Closed loop SSD Lambda
1780     * This is multiplied with bits for RD cost computations in SSD mode
1781     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1782     */
1783     LWORD64 i8_cl_ssd_lambda_qf;
1784 
1785     /**
1786     * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
1787     * This is multiplied with bits for RD cost computations in SSD mode
1788     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1789     */
1790     LWORD64 i8_cl_ssd_lambda_chroma_qf;
1791     /**
1792     * Pointer to current PPS
1793     */
1794     pps_t *ps_pps;  //not used currently
1795     /**
1796     * Pointer to current SPS
1797     */
1798     sps_t *ps_sps;
1799 
1800     /**
1801     * Pointer to current slice header structure
1802     */
1803     slice_header_t *ps_slice_hdr;
1804     /**
1805     * Pointer to current frame ctb out array of structures
1806     */
1807     ctb_enc_loop_out_t *ps_ctb_out;
1808     /**
1809     *  context for cabac bit estimation used during rdopt stage
1810     */
1811     rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt;
1812     /**
1813     * Pointer to sao_enc_t for the current ctb
1814     */
1815     sao_enc_t *ps_sao;
1816     /*
1817     * Pointer to an array to store the sao information of the top ctb
1818     * This is required for to decide top merge
1819     */
1820     sao_enc_t *aps_frm_top_ctb_sao[MAX_NUM_ENC_LOOP_PARALLEL];
1821 
1822     /*
1823     * Pointer to structure to store the sao parameters of (x,y)th ctb
1824     * for top merge of (x,y+1)th ctb
1825     */
1826     sao_enc_t *ps_top_ctb_sao;
1827 
1828     /* structure to store the sao parameters of (x,y)th ctb for
1829     * the left merge of (x+1,y)th ctb
1830     */
1831     sao_enc_t s_left_ctb_sao;
1832 
1833     /* Array of structures for SAO RDO candidates*/
1834     sao_enc_t as_sao_rd_cand[MAX_SAO_RD_CAND];
1835 
1836     /** array of function pointers for luma sao */
1837     pf_sao_luma apf_sao_luma[4];
1838 
1839     /** array of function pointers for chroma sao */
1840     pf_sao_chroma apf_sao_chroma[4];
1841 
1842     /* Flag to do SAO luma and chroma filtering*/
1843     WORD8 i1_slice_sao_luma_flag;
1844 
1845     WORD8 i1_slice_sao_chroma_flag;
1846 
1847 #if DISABLE_SAO_WHEN_NOISY
1848     ctb_analyse_t *ps_ctb_data;
1849 
1850     WORD32 i4_ctb_data_stride;
1851 #endif
1852 
1853     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
1854 
1855 } sao_ctxt_t;
1856 
1857 /**
1858 ******************************************************************************
1859 *  @brief  Encode loop module context structure
1860 ******************************************************************************
1861 */
1862 typedef struct
1863 {
1864 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
1865     void *pv_err_func_selector;
1866 #endif
1867 
1868     /**
1869     * Quality preset for comtrolling numbe of RD opt cand
1870     * @sa : IHEVCE_QUALITY_CONFIG_T
1871     */
1872     WORD32 i4_quality_preset;
1873     /**
1874     *
1875     *
1876     */
1877     WORD32 i4_rc_pass;
1878     /**
1879     * Lamda to be mulitplied with bits for SATD
1880     * should be equal to Lamda*Qp
1881     */
1882     WORD32 i4_satd_lamda;
1883 
1884     /**
1885     * Lamda to be mulitplied with bits for SAD
1886     * should be equal to Lamda*Qp
1887     */
1888     WORD32 i4_sad_lamda;
1889 
1890     /**
1891     * Closed loop SSD Lambda
1892     * This is multiplied with bits for RD cost computations in SSD mode
1893     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1894     */
1895     LWORD64 i8_cl_ssd_lambda_qf;
1896 
1897     /**
1898     * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
1899     * This is multiplied with bits for RD cost computations in SSD mode
1900     * This is represented in q format with shift of LAMBDA_Q_SHIFT
1901     */
1902     LWORD64 i8_cl_ssd_lambda_chroma_qf;
1903 
1904     /**
1905     * Ratio of Closed loop SSD Lambda and Closed loop SSD Lambda for chroma
1906     * This is multiplied with (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)
1907     * to keep the precision of the ratio
1908     */
1909     UWORD32 u4_chroma_cost_weighing_factor;
1910     /**
1911     * Frame level QP to be used
1912     */
1913     WORD32 i4_frame_qp;
1914 
1915     WORD32 i4_frame_mod_qp;
1916 
1917     WORD32 i4_frame_qstep;
1918 
1919     UWORD8 u1_max_tr_depth;
1920 
1921     /**
1922     * CU level Qp
1923     */
1924     WORD32 i4_cu_qp;
1925 
1926     /**
1927     * CU level Qp / 6
1928     */
1929     WORD32 i4_cu_qp_div6;
1930 
1931     /**
1932     * CU level Qp % 6
1933     */
1934     WORD32 i4_cu_qp_mod6;
1935 
1936     /**
1937     *  CU level QP to be used
1938     */
1939     WORD32 i4_chrm_cu_qp;
1940 
1941     /**
1942     * CU level Qp / 6
1943     */
1944     WORD32 i4_chrm_cu_qp_div6;
1945 
1946     /**
1947     * CU level Qp % 6
1948     */
1949     WORD32 i4_chrm_cu_qp_mod6;
1950 
1951     /** previous cu qp
1952     * @remarks : This needs to be remembered to handle skip cases in deblocking.
1953     */
1954     WORD32 i4_prev_cu_qp;
1955 
1956     /** chroma qp offset
1957     * @remarks : Used to calculate chroma qp and other qp related parameter at CU level
1958     */
1959     WORD32 i4_chroma_qp_offset;
1960 
1961     /**
1962     * Buffer Pointer to populate the scale matrix for all transform size
1963     */
1964     WORD16 *pi2_scal_mat;
1965 
1966     /**
1967     * Buffer Pointer to populate the rescale matrix for all transform size
1968     */
1969     WORD16 *pi2_rescal_mat;
1970 
1971     /** array of pointer to store the scaling matrices for
1972     *  all transform sizes and qp % 6 (pre computed)
1973     */
1974     WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
1975 
1976     /** array of pointer to store the re-scaling matrices for
1977     *  all transform sizes and qp % 6 (pre computed)
1978     */
1979     WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
1980 
1981     /** array of function pointers for residual and
1982     *  forward transform for all transform sizes
1983     */
1984     pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
1985 
1986     /** array of function pointers for residual and
1987     *  forward HAD transform for all transform sizes
1988     */
1989     pf_res_trans_luma_had_chroma apf_chrm_resd_trns_had[NUM_TRANS_TYPES - 2];
1990 
1991     /** array of function pointers for residual and
1992     *  forward transform for all transform sizes
1993     *  for chroma
1994     */
1995     pf_res_trans_chroma apf_chrm_resd_trns[NUM_TRANS_TYPES - 2];
1996 
1997     /** array of function pointers for qunatization and
1998     *  inv Quant for ssd calc. for all transform sizes
1999     */
2000     pf_quant_iquant_ssd apf_quant_iquant_ssd[4];
2001 
2002     /** array of function pointers for inv.transform and
2003     *  recon for all transform sizes
2004     */
2005     pf_it_recon apf_it_recon[NUM_TRANS_TYPES];
2006 
2007     /** array of function pointers for inverse transform
2008     * and recon for all transform sizes for chroma
2009     */
2010     pf_it_recon_chroma apf_chrm_it_recon[NUM_TRANS_TYPES - 2];
2011 
2012     /** array of luma intra prediction function pointers */
2013     pf_intra_pred apf_lum_ip[NUM_IP_FUNCS];
2014 
2015     /** array of chroma intra prediction function pointers */
2016     pf_intra_pred apf_chrm_ip[NUM_IP_FUNCS];
2017 
2018     /* - Function pointer to cu_mode_decide function */
2019     /* - The 'void *' is used since one of the parameters of */
2020     /* this class of functions is the current structure */
2021     /* - This function pointer is used to choose the */
2022     /* appropriate function depending on whether bit_depth is */
2023     /* chosen as 8 bits or greater */
2024     /* - This function pointer's type is defined at the end */
2025     /* of this file */
2026     void *pv_cu_mode_decide;
2027 
2028     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2029     void *pv_inter_rdopt_cu_mc_mvp;
2030 
2031     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2032     void *pv_inter_rdopt_cu_ntu;
2033 
2034     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2035     void *pv_intra_chroma_pred_mode_selector;
2036 
2037     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2038     void *pv_intra_rdopt_cu_ntu;
2039 
2040     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2041     void *pv_final_rdopt_mode_prcs;
2042 
2043     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2044     void *pv_store_cu_results;
2045 
2046     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2047     void *pv_enc_loop_cu_bot_copy;
2048 
2049     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2050     void *pv_final_mode_reevaluation_with_modified_cu_qp;
2051 
2052     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2053     void *pv_enc_loop_ctb_left_copy;
2054 
2055     /** Qunatization rounding factor for inter and intra CUs */
2056     WORD32 i4_quant_rnd_factor[2];
2057 
2058     /**
2059     * Frame Buffer Pointer to store the top row luma data.
2060     * one pixel row in every ctb row
2061     */
2062     void *apv_frm_top_row_luma[MAX_NUM_ENC_LOOP_PARALLEL];
2063 
2064     /**
2065     * One CTB row size of Top row luma data buffer
2066     */
2067     WORD32 i4_top_row_luma_stride;
2068 
2069     /**
2070     * One frm of Top row luma data buffer
2071     */
2072     WORD32 i4_frm_top_row_luma_size;
2073 
2074     /**
2075     * Current luma row bottom data store pointer
2076     */
2077     void *pv_bot_row_luma;
2078 
2079     /**
2080     * Top luma row top data access pointer
2081     */
2082     void *pv_top_row_luma;
2083 
2084     /**
2085     * Frame Buffer Pointer to store the top row chroma data (Cb  Cr pixel interleaved )
2086     * one pixel row in every ctb row
2087     */
2088     void *apv_frm_top_row_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
2089 
2090     /**
2091     * One CTB row size of Top row chroma data buffer (Cb  Cr pixel interleaved )
2092     */
2093     WORD32 i4_top_row_chroma_stride;
2094 
2095     /**
2096     * One frm size of Top row chroma data buffer (Cb  Cr pixel interleaved )
2097     */
2098     WORD32 i4_frm_top_row_chroma_size;
2099 
2100     /**
2101     * Current chroma row bottom data store pointer
2102     */
2103     void *pv_bot_row_chroma;
2104 
2105     /**
2106     * Top chroma row top data access pointer
2107     */
2108     void *pv_top_row_chroma;
2109 
2110     /**
2111     * Frame Buffer Pointer to store the top row neighbour modes stored at 4x4 level
2112     * one 4x4 row in every ctb row
2113     */
2114     nbr_4x4_t *aps_frm_top_row_nbr[MAX_NUM_ENC_LOOP_PARALLEL];
2115 
2116     /**
2117     * One CTB row size of Top row nbr 4x4 params buffer
2118     */
2119     WORD32 i4_top_row_nbr_stride;
2120 
2121     /**
2122     * One frm size of Top row nbr 4x4 params buffer
2123     */
2124     WORD32 i4_frm_top_row_nbr_size;
2125 
2126     /**
2127     * Current row nbr prms bottom data store pointer
2128     */
2129     nbr_4x4_t *ps_bot_row_nbr;
2130 
2131     /**
2132     * Top row nbr prms top data access pointer
2133     */
2134     nbr_4x4_t *ps_top_row_nbr;
2135 
2136     /**
2137     * Pointer to (1,1) location in au1_nbr_ctb_map
2138     */
2139     UWORD8 *pu1_ctb_nbr_map;
2140 
2141     /**
2142     * neigbour map buffer stride;
2143     */
2144     WORD32 i4_nbr_map_strd;
2145 
2146     /**
2147     * Array at ctb level to store the neighour map
2148     * its size is 25x25 for ctb size of 64x64
2149     */
2150     UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
2151 
2152     /**
2153     * Array to store left ctb data for luma
2154     * some padding is added to take care of unconditional access
2155     */
2156     void *pv_left_luma_data;
2157 
2158     /**
2159     * Array to store left ctb data for chroma (cb abd cr pixel interleaved
2160     * some padding is added to take care of unconditional access
2161     */
2162     void *pv_left_chrm_data;
2163 
2164     /**
2165     * Array to store the left neighbour modes at 4x4 level
2166     */
2167     nbr_4x4_t as_left_col_nbr[MAX_PU_IN_CTB_ROW];
2168 
2169     /**
2170     * Array to store currrent CTb pred modes at a 4x4 level
2171     * used for prediction inside ctb
2172     */
2173     nbr_4x4_t as_ctb_nbr_arr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2174 
2175     /**
2176     * array for storing csbf during RD opt stage at CU level
2177     * one best and one current is required
2178     */
2179     UWORD8 au1_cu_csbf[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
2180 
2181     /**
2182     * Stride of csbf buffer. will be useful for scanning access
2183     * if stored in a 2D order. right now set to max tx size >> 4;
2184     */
2185     WORD32 i4_cu_csbf_strd;
2186 
2187     /**
2188     * Array to store pred modes  during SATD and RD opt stage at CU level
2189     * one best and one current is required
2190     */
2191     nbr_4x4_t as_cu_nbr[2][MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2192 
2193     /**
2194     * array to store the output of reference substitution process output
2195     * for intra CUs
2196     * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
2197     */
2198     void *pv_ref_sub_out;
2199 
2200     /**
2201     * array to store the filtered reference samples for intra CUs
2202     * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
2203     */
2204     void *pv_ref_filt_out;
2205 
2206     /**
2207     * Used for 3 purposes
2208     *
2209     * 1. MC Intermediate buffer
2210     * array for storing intermediate 16-bit value for hxhy subpel
2211     * generation at CTB level (+ 16) for subpel planes boundary
2212     * +4 is for horizontal 4pels
2213     *
2214     * 2. Temprory scratch buffer for transform and coeffs storage
2215     * MAX_TRANS_SIZE *2 for trans_scratch(32bit) and MAX_TRANS_SIZE *1 for trans_values
2216     * The first part i.e. from 0 to MAX_TRANS_SIZE is then reused for storing the quant coeffs
2217     * Max of both are used
2218     *
2219     * 3. MC Intermediate buffer
2220     * buffer for storing intermediate 16 bit values prior to conversion to 8bit in HBD
2221     *
2222     */
2223     MEM_ALIGN16 WORD16 ai2_scratch[(MAX_CTB_SIZE + 8 + 8) * (MAX_CTB_SIZE + 8 + 8 + 8) * 2];
2224 
2225     /**
2226     * array for storing cu level final params for a given mode
2227     * one best and one current is required
2228     */
2229     enc_loop_cu_final_prms_t as_cu_prms[2];
2230 
2231     /**
2232     * Scan index to be used for any gien transform
2233     * this is a scartch variable used to communicate
2234     * scan idx at every transform level
2235     */
2236     WORD32 i4_scan_idx;
2237 
2238     /**
2239     * Buffer index in ping pong buffers
2240     * to be used SATD mode evaluations
2241     */
2242     WORD32 i4_satd_buf_idx;
2243 
2244     /**
2245     * Motion Compensation module context structre
2246     */
2247     inter_pred_ctxt_t s_mc_ctxt;
2248 
2249     /**
2250     * MV pred module context structre
2251     */
2252     mv_pred_ctxt_t s_mv_pred_ctxt;
2253 
2254     /**
2255     * Deblock BS ctb structure
2256     */
2257     deblk_bs_ctb_ctxt_t s_deblk_bs_prms;
2258 
2259     /**
2260     * Deblocking ctb structure
2261     */
2262     deblk_ctb_params_t s_deblk_prms;
2263 
2264     /**
2265     * Deblocking structure. For ctb-row level
2266     */
2267     deblk_ctbrow_prms_t s_deblk_ctbrow_prms;
2268 
2269     /**
2270     * Deblocking enable flag
2271     */
2272     WORD32 i4_deblock_type;
2273 
2274     /**
2275     *  context for cabac bit estimation used during rdopt stage
2276     */
2277     rdopt_entropy_ctxt_t s_rdopt_entropy_ctxt;
2278 
2279     /**
2280     * Context models stored for RDopt store and restore purpose
2281     */
2282     UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
2283 
2284     /**
2285     * current picture slice type
2286     */
2287     WORD8 i1_slice_type;
2288 
2289     /**
2290     * strong_intra_smoothing_enable_flag
2291     */
2292     WORD8 i1_strong_intra_smoothing_enable_flag;
2293 
2294     /** Pointer to Dep Mngr for controlling Top-Right CU dependency */
2295     void *pv_dep_mngr_enc_loop_cu_top_right;
2296 
2297     /** Pointer to Dep Mngr for controlling Deblocking Top dependency */
2298     void *pv_dep_mngr_enc_loop_dblk;
2299 
2300     /** Pointer to Dep Mngr for controlling Deblocking Top dependency */
2301     void *pv_dep_mngr_enc_loop_sao;
2302 
2303     /** pointer to store the cabac states at end of second CTB in current row */
2304     UWORD8 *pu1_curr_row_cabac_state;
2305 
2306     /** pointer to copy the cabac states at start of first CTB in current row */
2307     UWORD8 *pu1_top_rt_cabac_state;
2308     /** flag to indicate rate control mode.
2309     * @remarks :  To enable CU level qp modulation only when required.
2310     */
2311     WORD8 i1_cu_qp_delta_enable;
2312 
2313     /** flag to indicate rate control mode.
2314     * @remarks :  Entropy sync enable flag
2315     */
2316     WORD8 i1_entropy_coding_sync_enabled_flag;
2317 
2318     /** Use SATD or SAD for best merge candidate evaluation */
2319     WORD32 i4_use_satd_for_merge_eval;
2320 
2321     UWORD8 u1_use_early_cbf_data;
2322 
2323     /** Use SATD or SAD for best CU merge candidate evaluation */
2324     WORD32 i4_use_satd_for_cu_merge;
2325 
2326     /** Maximum number of merge candidates to be evaluated */
2327     WORD32 i4_max_merge_candidates;
2328 
2329     /** Flag to indicate whether current pictute needs to be deblocked,
2330     padded and hpel planes need to be generated.
2331     These are turned off typically in non referecne pictures when psnr
2332     and recon dump is disabled
2333     */
2334     WORD32 i4_deblk_pad_hpel_cur_pic;
2335 
2336     /* Array of structures for storing mc predicted data for
2337     * merge and skip modes
2338     */
2339     merge_skip_pred_data_t as_merge_skip_pred_data[MAX_NUM_CU_MERGE_SKIP_CAND];
2340 
2341     /* Sum the Qps of each 8*8 block in CU
2342     * 8*8 block is considered as Min CU size possible as per standard is 8
2343     * 0 corresponds to INTER and 1 corresponds to INTRA
2344     */
2345     LWORD64 i8_cl_ssd_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
2346     UWORD32 au4_chroma_cost_weighing_factor_array[MAX_HEVC_QP_12bit + 1];
2347     LWORD64 i8_cl_ssd_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
2348     WORD32 i4_satd_lamda_array[MAX_HEVC_QP_12bit + 1];
2349     WORD32 i4_sad_lamda_array[MAX_HEVC_QP_12bit + 1];
2350 
2351     /************************************************************************/
2352     /* The fields with the string 'type2' in their names are required */
2353     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
2354     /* to the bit_depth != internal_bit_depth are stored in these fields */
2355     /************************************************************************/
2356     LWORD64 i8_cl_ssd_type2_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
2357     LWORD64 i8_cl_ssd_type2_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
2358     WORD32 i4_satd_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
2359     WORD32 i4_sad_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
2360 
2361     /* Lokesh: Added to find if the CU is the first to be coded in the group */
2362     WORD32 i4_is_first_cu_qg_coded;
2363 
2364     /* Chroma RDOPT related parameters */
2365     ihevce_chroma_rdopt_ctxt_t s_chroma_rdopt_ctxt;
2366 
2367     /* Structure to save pred data of ME/Intra cand */
2368     cu_me_intra_pred_prms_t s_cu_me_intra_pred_prms;
2369 
2370     /* Structure to save the flags required for Final mode Reconstruction
2371     function. These flags are set based on quality presets and bit-rate
2372     we are working on */
2373     cu_final_recon_flags_t s_cu_final_recon_flags;
2374 
2375     /* Parameter to how at which level RDOQ will be implemented:
2376     0 - RDOQ disbaled
2377     1 - RDOQ enabled during RDOPT for all candidates
2378     2 - RDOQ enabled only for the final candidate*/
2379     WORD32 i4_rdoq_level;
2380 
2381     /* Parameter to how at which level Quant rounding factors are computed:
2382     FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
2383     NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
2384     CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
2385     CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
2386     TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used*/
2387     WORD32 i4_quant_rounding_level;
2388 
2389     /* Parameter to how at which level Quant rounding factors are computed:
2390     CHROMA_QUANT_ROUNDING    : Chroma Quant rounding values are used for chroma */
2391     WORD32 i4_chroma_quant_rounding_level;
2392 
2393     /* Parameter to how at which level RDOQ will be implemented:
2394     0 - SBH disbaled
2395     1 - SBH enabled during RDOPT for all candidates
2396     2 - SBH enabled only for the final candidate*/
2397     WORD32 i4_sbh_level;
2398 
2399     /* Parameter to how at which level ZERO CBF RDO will be implemented:
2400     0 - ZCBF disbaled
2401     1 - ZCBF enabled during RDOPT for all candidates
2402     2 - ZCBF enabled only for the final candidate
2403     */
2404     WORD32 i4_zcbf_rdo_level;
2405 
2406     /*RDOQ-SBH context structure*/
2407     rdoq_sbh_ctxt_t s_rdoq_sbh_ctxt;
2408 
2409     /** Structure to store the Merge/Skip Cand. for EncLoop */
2410     cu_inter_merge_skip_t s_cu_inter_merge_skip;
2411     /** Structure to store the Mixed mode Cand. for EncLoop */
2412     cu_mixed_mode_inter_t s_mixed_mode_inter_cu;
2413 
2414     ihevce_inter_pred_buf_data_t s_pred_buf_data;
2415 
2416     void *pv_422_chroma_intra_pred_buf;
2417 
2418     WORD32 i4_max_num_inter_rdopt_cands;
2419 
2420     /* Output Struct per each CU during recursions */
2421     ihevce_enc_cu_node_ctxt_t as_enc_cu_ctxt[MAX_CU_IN_CTB + 1];
2422 
2423     /* Used to store best inter candidate. Used only when */
2424     /* 'CU modulated QP override' is enabled */
2425     cu_inter_cand_t as_best_cand[MAX_CU_IN_CTB + 1];
2426 
2427     cu_inter_cand_t *ps_best_cand;
2428 
2429     UWORD8 au1_cu_init_cabac_state_a_priori[MAX_CU_IN_CTB + 1][IHEVC_CAB_CTXT_END];
2430 
2431     UWORD8 (*pau1_curr_cu_a_priori_cabac_state)[IHEVC_CAB_CTXT_END];
2432 
2433     /* Used to store pred data of each CU in the CTB. */
2434     /* Used only when 'CU modulated QP override' is enabled */
2435     void *pv_CTB_pred_luma;
2436 
2437     void *pv_CTB_pred_chroma;
2438 
2439     /**
2440     * array for storing recon during SATD and RD opt stage at CU level
2441     * one best and one current is required.Luma and chroma together
2442     */
2443     void *pv_cu_luma_recon;
2444 
2445     /**
2446     * array for storing recon during SATD and RD opt stage at CU level
2447     * one best and one current is required.Luma and chroma together
2448     */
2449     void *pv_cu_chrma_recon;
2450 
2451     /**
2452     * Array to store pred modes  during SATD and RD opt stage at CU level
2453     * one best and one current is required
2454     */
2455     nbr_4x4_t as_cu_recur_nbr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2456 
2457     /**
2458     * Pointer to Array to store pred modes  during SATD and RD opt stage at CU level
2459     * one best and one current is required
2460     */
2461     nbr_4x4_t *ps_cu_recur_nbr;
2462 
2463     /**
2464     * Context models stored for CU recursion parent evaluation
2465     */
2466     UWORD8 au1_rdopt_recur_ctxt_models[4][IHEVC_CAB_CTXT_END];
2467 
2468     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt;
2469 
2470     /**
2471     * array for storing coeffs during RD opt stage at CU level
2472     * one best and one current is required. Luma and chroma together
2473     */
2474     /*UWORD8 au1_cu_recur_coeffs[MAX_LUMA_COEFFS_CTB + MAX_CHRM_COEFFS_CTB];*/
2475 
2476     UWORD8 *pu1_cu_recur_coeffs;
2477 
2478     UWORD8 *apu1_cu_level_pingpong_coeff_buf_addr[2];
2479 
2480     WORD16 *api2_cu_level_pingpong_deq_buf_addr[2];
2481 
2482     UWORD8 *pu1_ecd_data;
2483 
2484     /* OPT: flag to skip parent CU=4TU eval during recursion */
2485     UWORD8 is_parent_cu_rdopt;
2486 
2487     /**
2488     *   Array of structs containing block merge data for
2489     *   4 32x32 CU's in indices 1 - 4 and 64x64 CU at 0
2490     */
2491     UWORD8 u1_cabac_states_next_row_copied_flag;
2492 
2493     UWORD8 u1_cabac_states_first_cu_copied_flag;
2494 
2495     UWORD32 u4_cur_ctb_wd;
2496 
2497     UWORD32 u4_cur_ctb_ht;
2498 
2499     /* thread id of the current context */
2500     WORD32 thrd_id;
2501 
2502     /** Number of processing threads created run time */
2503     WORD32 i4_num_proc_thrds;
2504 
2505     /* Instance number of bit-rate for multiple bit-rate encode */
2506     WORD32 i4_bitrate_instance_num;
2507 
2508     WORD32 i4_num_bitrates;
2509 
2510     WORD32 i4_enc_frm_id;
2511 
2512     /* Flag to indicate if chroma needs to be considered for cost calculation */
2513     WORD32 i4_consider_chroma_cost;
2514 
2515     /* Number of modes to be evaluated for intra */
2516     WORD32 i4_num_modes_to_evaluate_intra;
2517 
2518     /* Number of modes to be evaluated for inter */
2519     WORD32 i4_num_modes_to_evaluate_inter;
2520     /*pointers for struct to hold RC parameters for each bit-rate instance */
2521     enc_loop_rc_params_t
2522         *aaps_enc_loop_rc_params[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2523 
2524     /** Pointer to structure containing function pointers of common*/
2525     func_selector_t *ps_func_selector;
2526 
2527     /* Flag to control Top Right Sync for during Merge */
2528     UWORD8 u1_use_top_at_ctb_boundary;
2529 
2530     UWORD8 u1_is_input_data_hbd;
2531 
2532     UWORD8 u1_bit_depth;
2533 
2534     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
2535     UWORD8 u1_chroma_array_type;
2536 
2537     rc_quant_t *ps_rc_quant_ctxt;
2538 
2539     sao_ctxt_t s_sao_ctxt_t;
2540 
2541     /* Offset to get the Qp for the last CU of upper CTB-row.
2542     This offset is from the current tile top row QP map start.
2543     This will only be consumed by the first CU of current CTB-row
2544     iff [it is skip && entropy sync is off] */
2545     WORD32 *pi4_offset_for_last_cu_qp;
2546 
2547     double i4_lamda_modifier;
2548     double i4_uv_lamda_modifier;
2549     WORD32 i4_temporal_layer_id;
2550 
2551     UWORD8 u1_disable_intra_eval;
2552 
2553     WORD32 i4_quant_round_tu[2][32 * 32];
2554 
2555     WORD32 *pi4_quant_round_factor_tu_0_1[5];
2556     WORD32 *pi4_quant_round_factor_tu_1_2[5];
2557 
2558     WORD32 i4_quant_round_4x4[2][4 * 4];
2559     WORD32 i4_quant_round_8x8[2][8 * 8];
2560     WORD32 i4_quant_round_16x16[2][16 * 16];
2561     WORD32 i4_quant_round_32x32[2][32 * 32];
2562 
2563     WORD32 *pi4_quant_round_factor_cu_ctb_0_1[5];
2564     WORD32 *pi4_quant_round_factor_cu_ctb_1_2[5];
2565 
2566     WORD32 i4_quant_round_cr_4x4[2][4 * 4];
2567     WORD32 i4_quant_round_cr_8x8[2][8 * 8];
2568     WORD32 i4_quant_round_cr_16x16[2][16 * 16];
2569 
2570     WORD32 *pi4_quant_round_factor_cr_cu_ctb_0_1[3];
2571     WORD32 *pi4_quant_round_factor_cr_cu_ctb_1_2[3];
2572     /* cost for not coding cu residue i.e forcing no residue syntax as 1 */
2573     LWORD64 i8_cu_not_coded_cost;
2574 
2575     /* dependency manager for forward ME  sync */
2576     void *pv_dep_mngr_encloop_dep_me;
2577 
2578     LWORD64 ai4_source_satd_8x8[64];
2579 
2580     LWORD64 ai4_source_chroma_satd[256];
2581 
2582     UWORD8 u1_is_refPic;
2583 
2584     WORD32 i4_qp_mod;
2585 
2586     WORD32 i4_is_ref_pic;
2587 
2588     WORD32 i4_chroma_format;
2589 
2590     WORD32 i4_temporal_layer;
2591 
2592     WORD32 i4_use_const_lamda_modifier;
2593 
2594     double f_i_pic_lamda_modifier;
2595 
2596     LWORD64 i8_distortion;
2597 
2598     WORD32 i4_use_ctb_level_lamda;
2599 
2600     float f_str_ratio;
2601 
2602     /* Flag to indicate if current frame is to be shared with other clients.
2603     Used only in distributed-encoding */
2604     WORD32 i4_share_flag;
2605 
2606     /* Pointer to the current recon being processed.
2607     Needed for enabling TMVP in dist-encoding */
2608     void *pv_frm_recon;
2609 
2610     ihevce_cmn_opt_func_t s_cmn_opt_func;
2611 
2612     /* The ME analogue to the struct above was not included since */
2613     /* that would have entailed inclusion of all ME specific */
2614     /* header files */
2615     /*FT_SAD_EVALUATOR **/
2616 
2617     /*FT_SAD_EVALUATOR **/
2618     void *pv_evalsad_pt_npu_mxn_8bit;
2619     UWORD8 u1_enable_psyRDOPT;
2620 
2621     UWORD8 u1_is_stasino_enabled;
2622 
2623     UWORD32 u4_psy_strength;
2624     /*Sub PIC rc context */
2625 
2626     WORD32 i4_sub_pic_level_rc;
2627     WORD32 i4_num_ctb_for_out_scale;
2628 
2629     /**
2630      * Accumalated bits of all cu for required CTBS estimated during RDO evaluation.
2631      * Required for sup pic level RC. Reset when required CU/CTB count is reached.
2632      */
2633     UWORD32 u4_total_cu_bits;
2634 
2635     UWORD32 u4_total_cu_bits_mul_qs;
2636 
2637     UWORD32 u4_total_cu_hdr_bits;
2638 
2639     UWORD32 u4_cu_tot_bits_into_qscale;
2640 
2641     UWORD32 u4_cu_tot_bits;
2642 
2643     /*Scale added to the current qscale, output from sub pic rc*/
2644     WORD32 i4_cu_qp_sub_pic_rc;
2645 
2646     /*Frame level L1 IPE sad*/
2647     LWORD64 i8_frame_l1_ipe_sad;
2648 
2649     /*Frame level L0 IPE satd*/
2650     LWORD64 i8_frame_l0_ipe_satd;
2651 
2652     /*Frame level L1 ME sad*/
2653     LWORD64 i8_frame_l1_me_sad;
2654 
2655     /*Frame level L1 activity factor*/
2656     LWORD64 i8_frame_l1_activity_fact;
2657     /*bits esimated for frame calulated for sub pic rc bit control */
2658     WORD32 ai4_frame_bits_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2659     /** I Scene cut */
2660     WORD32 i4_is_I_scenecut;
2661 
2662     /** Non Scene cut */
2663     WORD32 i4_is_non_I_scenecut;
2664 
2665     /** Frames for which online/offline model is not valid */
2666     WORD32 i4_is_model_valid;
2667 
2668     /** Steady State Frame */
2669     //WORD32 i4_is_steady_state;
2670 
2671     WORD32 i4_is_first_query;
2672 
2673     /* Pointer to Tile params base */
2674     void *pv_tile_params_base;
2675 
2676     /** The index of column tile for which it is working */
2677     WORD32 i4_tile_col_idx;
2678 
2679     WORD32 i4_max_search_range_horizontal;
2680 
2681     WORD32 i4_max_search_range_vertical;
2682 
2683     WORD32 i4_is_ctb_qp_modified;
2684 
2685     WORD32 i4_display_num;
2686 
2687     WORD32 i4_pred_qp;
2688 
2689     /*assumption of qg size is 8x8 block size*/
2690     WORD32 ai4_qp_qg[8 * 8];
2691 
2692     WORD32 i4_last_cu_qp_from_prev_ctb;
2693 
2694     WORD32 i4_prev_QP;
2695 
2696     UWORD8 u1_max_inter_tr_depth;
2697 
2698     UWORD8 u1_max_intra_tr_depth;
2699 
2700 } ihevce_enc_loop_ctxt_t;
2701 
2702 /*****************************************************************************/
2703 /* Enums                                                                     */
2704 /*****************************************************************************/
2705 
2706 /** @brief RDOQ_LEVELS_T: This enumeration specifies the RDOQ mode of operation
2707 *
2708 *  NO_RDOQ    : RDOQ is not performed
2709 *  BEST_CAND_RDOQ : RDOQ for final candidate only
2710 *  ALL_CAND_RDOQ : RDOQ for all candidates
2711 */
2712 typedef enum
2713 {
2714     NO_RDOQ,
2715     BEST_CAND_RDOQ,
2716     ALL_CAND_RDOQ,
2717 } RDOQ_LEVELS_T;
2718 
2719 /** @brief QUANT_ROUNDING_COEFF_LEVELS_T: This enumeration specifies the Coef level RDOQ mode of operation
2720 *
2721 *  FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
2722 *  NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
2723 *  CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
2724 *  CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
2725 *  TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used
2726 *               Defaulat for all candidtes, based on RDOQ_LEVELS_T choose to best candidate
2727 */
2728 typedef enum
2729 {
2730     FIXED_QUANT_ROUNDING,
2731     NCTB_LEVEL_QUANT_ROUNDING,
2732     CTB_LEVEL_QUANT_ROUNDING,
2733     CU_LEVEL_QUANT_ROUNDING,
2734     TU_LEVEL_QUANT_ROUNDING,
2735     CHROMA_QUANT_ROUNDING
2736 } QUANT_ROUNDING_COEFF_LEVELS_T;
2737 
2738 /*****************************************************************************/
2739 /* Enums                                                                     */
2740 /*****************************************************************************/
2741 
2742 /** @brief SBH_LEVELS_T: This enumeration specifies the RDOQ mode of operation
2743 *
2744 *  NO_SBH    : SBH is not performed
2745 *  BEST_CAND_SBH : SBH for final candidate only
2746 *  ALL_CAND_SBH : SBH for all candidates
2747 */
2748 typedef enum
2749 {
2750     NO_SBH,
2751     BEST_CAND_SBH,
2752     ALL_CAND_SBH,
2753 } SBH_LEVELS_T;
2754 
2755 /** @brief ZCBF_LEVELS_T: This enumeration specifies the ZeroCBF RDO mode of operation
2756 *
2757 *  NO_ZCBF    : ZCBF RDO is not performed
2758 *  ALL_CAND_ZCBF : ZCBF RDO for all candidates
2759 */
2760 typedef enum
2761 {
2762     NO_ZCBF,
2763     ZCBF_ENABLE,
2764 } ZCBF_LEVELS_T;
2765 
2766 /**
2767 ******************************************************************************
2768 *  @brief  Encode loop master context structure
2769 ******************************************************************************
2770 */
2771 typedef struct
2772 {
2773     /** Array of encode loop structure */
2774     ihevce_enc_loop_ctxt_t *aps_enc_loop_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
2775 
2776     /** Number of processing threads created run time */
2777     WORD32 i4_num_proc_thrds;
2778 
2779     /**
2780     *  Array of top row cu skip flags (1 bit per 8x8CU)
2781     */
2782     UWORD8 au1_cu_skip_top_row[HEVCE_MAX_WIDTH >> 6];
2783 
2784     /** Context models stored at the end of second CTB in a row)
2785     *  stored in packed form pState[bits6-1] | MPS[bit0]
2786     *  for each CTB row
2787     *  using entropy sync model in RD opt
2788     */
2789     UWORD8 au1_ctxt_models[MAX_NUM_CTB_ROWS_FRM][IHEVC_CAB_CTXT_END];
2790 
2791     /** Dependency manager for controlling EncLoop Top-Right CU dependency
2792     * One per each bit-rate and one per each frame in parallel
2793     */
2794     void *aapv_dep_mngr_enc_loop_cu_top_right[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2795 
2796     /** Dependency manager for controlling Deblocking Top dependency
2797     * One per each bit-rate and one per each frame in parallel
2798     */
2799     void *aapv_dep_mngr_enc_loop_dblk[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2800 
2801     /** Dependency manager for controlling Sao Top dependency
2802     * One per each bit-rate and one per each frame in parallel
2803     */
2804     void *aapv_dep_mngr_enc_loop_sao[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2805 
2806     /** number of bit-rate instances running */
2807     WORD32 i4_num_bitrates;
2808 
2809     /** number of enc frames running in parallel */
2810     WORD32 i4_num_enc_loop_frm_pllel;
2811 
2812     /* Pointer to Tile params base */
2813     void *pv_tile_params_base;
2814     /* Offset to get the Qp for the last CU of upper CTB-row.
2815     This offset is from the current tile top row QP map start.
2816 
2817     This will only be consumed by the first CU of current CTB-row
2818     iff [it is skip && entropy sync is off]
2819     There is one entry of every tile-column bcoz offset remains constant
2820     for all tiles lying in a tile-column */
2821     WORD32 ai4_offset_for_last_cu_qp[MAX_TILE_COLUMNS];
2822 } ihevce_enc_loop_master_ctxt_t;
2823 
2824 /**
2825 ******************************************************************************
2826 *  @brief  This struct is used for storing data required by the block merge
2827 *          function
2828 ******************************************************************************
2829 */
2830 typedef struct
2831 {
2832     block_data_8x8_t *ps_8x8_data;
2833 
2834     block_data_16x16_t *ps_16x16_data;
2835 
2836     block_data_32x32_t *ps_32x32_data;
2837 
2838     block_data_64x64_t *ps_64x64_data;
2839 
2840     part_type_results_t **ps_32x32_results;
2841 
2842     cur_ctb_cu_tree_t *ps_cu_tree;
2843 
2844     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
2845 
2846     mv_pred_ctxt_t *ps_mv_pred_ctxt;
2847 
2848     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
2849 
2850     nbr_4x4_t *ps_top_nbr_4x4;
2851 
2852     nbr_4x4_t *ps_left_nbr_4x4;
2853 
2854     nbr_4x4_t *ps_curr_nbr_4x4;
2855 
2856     UWORD8 *pu1_inp;
2857 
2858     UWORD8 *pu1_ctb_nbr_map;
2859 
2860     WORD32 i4_nbr_map_strd;
2861 
2862     WORD32 inp_stride;
2863 
2864     WORD32 i4_ctb_x_off;
2865 
2866     WORD32 i4_ctb_y_off;
2867 
2868     WORD32 use_satd_for_err_calc;
2869 
2870     WORD32 lambda;
2871 
2872     WORD32 lambda_q_shift;
2873 
2874     WORD32 frm_qstep;
2875 
2876     WORD32 num_4x4_in_ctb;
2877 
2878     UWORD8 *pu1_wkg_mem;
2879 
2880     UWORD8 **ppu1_pred;
2881 
2882     UWORD8 u1_bidir_enabled;
2883 
2884     UWORD8 u1_max_tr_depth;
2885 
2886     WORD32 i4_ctb_pos;
2887 
2888     WORD32 i4_ctb_size;
2889 
2890     UWORD8 *apu1_wt_inp[MAX_REFS_SEARCHABLE + 1];
2891 
2892     /** Pointer of Dep Mngr for EncLoop Top-Right CU dependency */
2893     void *pv_dep_mngr_enc_loop_cu_top_right;
2894     /** The current cu row no. for Dep Manager to Check */
2895     WORD32 i4_dep_mngr_cur_cu_row_no;
2896     /** The Top cu row no. for Dep Manager to Check */
2897     WORD32 i4_dep_mngr_top_cu_row_no;
2898 
2899     WORD8 i1_quality_preset;
2900 
2901     /* Flag to control Top Right Sync for during Merge */
2902     UWORD8 u1_use_top_at_ctb_boundary;
2903 
2904 } block_merge_input_t;
2905 
2906 /* Structure which stores the info regarding the TU's present in the CU*/
2907 typedef struct tu_prms_t
2908 {
2909     UWORD8 u1_tu_size;
2910 
2911     UWORD8 u1_x_off;
2912 
2913     UWORD8 u1_y_off;
2914 
2915     WORD32 i4_tu_cost;
2916 
2917     WORD32 i4_early_cbf;
2918 
2919 } tu_prms_t;
2920 
2921 typedef struct
2922 {
2923     cu_enc_loop_out_t **pps_cu_final;
2924 
2925     pu_t **pps_row_pu;
2926 
2927     tu_enc_loop_out_t **pps_row_tu;
2928 
2929     UWORD8 **ppu1_row_ecd_data;
2930 
2931     WORD32 *pi4_num_pus_in_ctb;
2932 
2933     WORD32 *pi4_last_cu_pos_in_ctb;
2934 
2935     WORD32 *pi4_last_cu_size;
2936 
2937     UWORD8 *pu1_num_cus_in_ctb_out;
2938 
2939 } cu_final_update_prms;
2940 
2941 typedef struct
2942 {
2943     cu_nbr_prms_t *ps_cu_nbr_prms;
2944 
2945     cu_inter_cand_t *ps_best_inter_cand;
2946 
2947     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms;
2948 
2949     WORD32 packed_pred_mode;
2950 
2951     WORD32 rd_opt_best_idx;
2952 
2953     void *pv_src;
2954 
2955     WORD32 src_strd;
2956 
2957     void *pv_pred;
2958 
2959     WORD32 pred_strd;
2960 
2961     void *pv_pred_chrm;
2962 
2963     WORD32 pred_chrm_strd;
2964 
2965     UWORD8 *pu1_final_ecd_data;
2966 
2967     UWORD8 *pu1_csbf_buf;
2968 
2969     WORD32 csbf_strd;
2970 
2971     void *pv_luma_recon;
2972 
2973     WORD32 recon_luma_strd;
2974 
2975     void *pv_chrm_recon;
2976 
2977     WORD32 recon_chrma_strd;
2978 
2979     UWORD8 u1_cu_pos_x;
2980 
2981     UWORD8 u1_cu_pos_y;
2982 
2983     UWORD8 u1_cu_size;
2984 
2985     WORD8 i1_cu_qp;
2986 
2987     UWORD8 u1_will_cabac_state_change;
2988 
2989     UWORD8 u1_recompute_sbh_and_rdoq;
2990 
2991     UWORD8 u1_is_first_pass;
2992 
2993 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2994     UWORD8 u1_is_cu_noisy;
2995 #endif
2996 
2997 } final_mode_process_prms_t;
2998 
2999 typedef struct
3000 {
3001     cu_inter_cand_t s_best_cand;
3002 
3003     /* The size is twice of what is required to ensure availability */
3004     /* of adequate space for 'HBD' case */
3005     UWORD8 au1_pred_luma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
3006 
3007     /* The size is twice of what is required to ensure availability */
3008     /* of adequate space for 422 case */
3009     UWORD8 au1_pred_chroma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
3010 } final_mode_state_t;
3011 
3012 typedef struct
3013 {
3014     cu_mixed_mode_inter_t *ps_mixed_modes_datastore;
3015 
3016     cu_inter_cand_t *ps_me_cands;
3017 
3018     cu_inter_cand_t *ps_merge_cands;
3019 
3020     mv_pred_ctxt_t *ps_mv_pred_ctxt;
3021 
3022     inter_pred_ctxt_t *ps_mc_ctxt;
3023 
3024     UWORD8 *pu1_ctb_nbr_map;
3025 
3026     void *pv_src;
3027 
3028     nbr_4x4_t *ps_cu_nbr_buf;
3029 
3030     nbr_4x4_t *ps_left_nbr_4x4;
3031 
3032     nbr_4x4_t *ps_top_nbr_4x4;
3033 
3034     nbr_4x4_t *ps_topleft_nbr_4x4;
3035 
3036     WORD32 i4_ctb_nbr_map_stride;
3037 
3038     WORD32 i4_src_strd;
3039 
3040     WORD32 i4_nbr_4x4_left_strd;
3041 
3042     UWORD8 u1_cu_size;
3043 
3044     UWORD8 u1_cu_pos_x;
3045 
3046     UWORD8 u1_cu_pos_y;
3047 
3048     UWORD8 u1_num_me_cands;
3049 
3050     UWORD8 u1_num_merge_cands;
3051 
3052     UWORD8 u1_max_num_mixed_mode_cands_to_select;
3053 
3054     UWORD8 u1_max_merge_candidates;
3055 
3056     UWORD8 u1_use_satd_for_merge_eval;
3057 
3058 } ihevce_mixed_inter_modes_selector_prms_t;
3059 
3060 typedef struct
3061 {
3062     LWORD64 i8_ssd;
3063 
3064     LWORD64 i8_cost;
3065 
3066 #if ENABLE_INTER_ZCU_COST
3067     LWORD64 i8_not_coded_cost;
3068 #endif
3069 
3070     UWORD32 u4_sad;
3071 
3072     WORD32 i4_bits;
3073 
3074     WORD32 i4_num_bytes_used_for_ecd;
3075 
3076     WORD32 i4_zero_col;
3077 
3078     WORD32 i4_zero_row;
3079 
3080     UWORD8 u1_cbf;
3081 
3082     UWORD8 u1_reconBufId;
3083 
3084     UWORD8 u1_is_valid_node;
3085 
3086     UWORD8 u1_size;
3087 
3088     UWORD8 u1_posx;
3089 
3090     UWORD8 u1_posy;
3091 } tu_node_data_t;
3092 
3093 typedef struct tu_tree_node_t
3094 {
3095     struct tu_tree_node_t *ps_child_node_tl;
3096 
3097     struct tu_tree_node_t *ps_child_node_tr;
3098 
3099     struct tu_tree_node_t *ps_child_node_bl;
3100 
3101     struct tu_tree_node_t *ps_child_node_br;
3102 
3103     tu_node_data_t s_luma_data;
3104 
3105     /* 2 because of the 2 subTU's when input is 422 */
3106     tu_node_data_t as_cb_data[2];
3107 
3108     tu_node_data_t as_cr_data[2];
3109 
3110     UWORD8 u1_is_valid_node;
3111 
3112 } tu_tree_node_t;
3113 
3114 /*****************************************************************************/
3115 /* Extern Variable Declarations                                              */
3116 /*****************************************************************************/
3117 
3118 /*****************************************************************************/
3119 /* Extern Function Declarations                                              */
3120 /*****************************************************************************/
3121 
3122 /*****************************************************************************/
3123 /* Typedefs                                                                  */
3124 /*****************************************************************************/
3125 typedef LWORD64 (*pf_cu_mode_decide)(
3126     ihevce_enc_loop_ctxt_t *ps_ctxt,
3127     enc_loop_cu_prms_t *ps_cu_prms,
3128     cu_analyse_t *ps_cu_analyse,
3129     final_mode_state_t *ps_final_mode_state,
3130     UWORD8 *pu1_ecd_data,
3131     pu_col_mv_t *ps_col_pu,
3132     UWORD8 *pu1_col_pu_map,
3133     WORD32 col_start_pu_idx);
3134 
3135 typedef LWORD64 (*pf_inter_rdopt_cu_mc_mvp)(
3136     ihevce_enc_loop_ctxt_t *ps_ctxt,
3137     cu_inter_cand_t *ps_inter_cand,
3138     WORD32 cu_size,
3139     WORD32 cu_pos_x,
3140     WORD32 cu_pos_y,
3141     nbr_4x4_t *ps_left_nbr_4x4,
3142     nbr_4x4_t *ps_top_nbr_4x4,
3143     nbr_4x4_t *ps_topleft_nbr_4x4,
3144     WORD32 nbr_4x4_left_strd,
3145     WORD32 curr_buf_idx);
3146 
3147 typedef LWORD64 (*pf_inter_rdopt_cu_ntu)(
3148     ihevce_enc_loop_ctxt_t *ps_ctxt,
3149     enc_loop_cu_prms_t *ps_cu_prms,
3150     void *pv_src,
3151     WORD32 cu_size,
3152     WORD32 cu_pos_x,
3153     WORD32 cu_pos_y,
3154     WORD32 curr_buf_idx,
3155     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3156     cu_inter_cand_t *ps_inter_cand,
3157     cu_analyse_t *ps_cu_analyse,
3158     WORD32 i4_alpha_stim_multiplier);
3159 
3160 typedef void (*pf_intra_chroma_pred_mode_selector)(
3161     ihevce_enc_loop_ctxt_t *ps_ctxt,
3162     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3163     cu_analyse_t *ps_cu_analyse,
3164     WORD32 rd_opt_curr_idx,
3165     WORD32 tu_mode,
3166     WORD32 i4_alpha_stim_multiplier,
3167     UWORD8 u1_is_cu_noisy);
3168 
3169 typedef LWORD64 (*pf_intra_rdopt_cu_ntu)(
3170     ihevce_enc_loop_ctxt_t *ps_ctxt,
3171     enc_loop_cu_prms_t *ps_cu_prms,
3172     void *pv_pred_org,
3173     WORD32 pred_strd_org,
3174     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3175     UWORD8 *pu1_luma_mode,
3176     cu_analyse_t *ps_cu_analyse,
3177     void *pv_curr_src,
3178     void *pv_cu_left,
3179     void *pv_cu_top,
3180     void *pv_cu_top_left,
3181     nbr_4x4_t *ps_left_nbr_4x4,
3182     nbr_4x4_t *ps_top_nbr_4x4,
3183     WORD32 nbr_4x4_left_strd,
3184     WORD32 cu_left_stride,
3185     WORD32 curr_buf_idx,
3186     WORD32 func_proc_mode,
3187     WORD32 i4_alpha_stim_multiplier);
3188 
3189 typedef void (*pf_final_rdopt_mode_prcs)(
3190     ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms);
3191 
3192 typedef void (*pf_store_cu_results)(
3193     ihevce_enc_loop_ctxt_t *ps_ctxt,
3194     enc_loop_cu_prms_t *ps_cu_prms,
3195     final_mode_state_t *ps_final_state);
3196 
3197 typedef void (*pf_enc_loop_cu_bot_copy)(
3198     ihevce_enc_loop_ctxt_t *ps_ctxt,
3199     enc_loop_cu_prms_t *ps_cu_prms,
3200     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
3201     WORD32 curr_cu_pos_in_row,
3202     WORD32 curr_cu_pos_in_ctb);
3203 
3204 typedef void (*pf_enc_loop_ctb_left_copy)(
3205     ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms);
3206 
3207 #endif /* _IHEVCE_ENC_LOOP_STRUCTS_H_ */
3208