1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_enc_structs.h
23 *
24 * \brief
25 *    This file contains structure definations of Encoder
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _IHEVCE_ENC_STRUCTS_H_
37 #define _IHEVCE_ENC_STRUCTS_H_
38 
39 /*****************************************************************************/
40 /* Constant Macros                                                           */
41 /*****************************************************************************/
42 #define HEVCE_MAX_WIDTH 1920
43 #define HEVCE_MAX_HEIGHT 1088
44 
45 #define HEVCE_MIN_WIDTH 64
46 #define HEVCE_MIN_HEIGHT 64
47 
48 #define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE)
49 #define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE)
50 
51 #define MIN_VERT_PROC_UNIT (8)
52 #define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT)
53 
54 #define HEVCE_MAX_REF_PICS 8
55 #define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1)
56 
57 #define PAD_HORZ 80
58 #define PAD_VERT 80
59 
60 #define DEFAULT_MAX_REFERENCE_PICS 4
61 
62 #define BLU_RAY_SUPPORT 231457
63 
64 /** @brief max number of parts in minCU : max 4 for NxN */
65 #define NUM_PU_PARTS 4
66 /** @brief max number of parts in Inter CU */
67 #define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS)
68 #define SEND_BI_RDOPT
69 #ifdef SEND_BI_RDOPT
70 /** @brief */
71 #define MAX_INTER_CU_CANDIDATES 4
72 #else
73 /** @brief */
74 #define MAX_INTER_CU_CANDIDATES 3
75 #endif
76 /** @brief */
77 #define MAX_INTRA_CU_CANDIDATES 3
78 
79 #define MAX_INTRA_CANDIDATES 35
80 
81 /** For each resolution & bit-rate instance, one entropy thread is created */
82 #define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES)
83 
84 /* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */
85 #define NUM_BUFS_DECOMP_HME 1
86 
87 /** Macro to indicate pre me and L0 ipe stagger in pre enc*/
88 /** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/
89 #define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ)
90 
91 #define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL)
92 
93 #define MIN_L0_IPE_ENC_STAGGER 1
94 
95 /*stagger between L0 IPE and enc*/
96 #define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER))
97 
98 #define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
99 
100 #define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
101 
102 /** @brief number of ctb contexts maintained at frame level b/w encode : entropy */
103 #define NUM_FRMPROC_ENTCOD_BUFS 1
104 
105 /** @brief number of extra recon buffs required for stagger design*/
106 #define NUM_EXTRA_RECON_BUFS 0
107 
108 /** recon picture buffer size need to be increased to support EncLoop Parallelism **/
109 #define NUM_EXTRA_RECON_BUFS_FOR_ELP 0
110 
111 /** @brief maximum number of bytes in 4x4 afetr scanning */
112 #define MAX_SCAN_COEFFS_BYTES_4x4 (48)
113 
114 /** @brief maximum number of luma coeffs bytes after scan at CTB level  */
115 #define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4)
116 
117 /** @brief maximum number of chroma coeffs bytes after scan at CTB level  */
118 #define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4)
119 
120 /** @brief maximum number of coeffs bytes after scan at CTB level  */
121 #define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB))
122 
123 /** @breif PU map CTB buffer buyes for neighbour availibility */
124 #define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW)
125 
126 /** @brief tottal system memory records */
127 #define TOTAL_SYSTEM_MEM_RECS 120
128 
129 /** @brief number of input async command buffers */
130 #define NUM_AYSNC_CMD_BUFS 4
131 
132 /** @brief Comand buffers size */
133 #define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */
134 
135 /** @brief Number of output buffers */
136 #define NUM_OUTPUT_BUFS 4
137 
138 /** @brief Lamda for SATD cost estimation */
139 #define LAMDA_SATD 1
140 
141 /** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */
142 #define MAX_GT_ONE 8
143 
144 /** MAX num ipntra pred modes */
145 #define MAX_NUM_IP_MODES 35
146 
147 /** Number of best intra modes used for intra mode refinement */
148 #define NUM_BEST_MODES 3
149 
150 /** Maximim number of parallel frame processing threads in pre enocde group */
151 #define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES
152 
153 /** Maximim number of parallel frame processing threads in encode group */
154 #define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES
155 
156 /** Macro to indicate teh PING_PONG buffers for stagger*/
157 #define PING_PONG_BUF 2
158 
159 /** Max number of layers in Motion estimation
160  * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h
161  */
162 
163 #define MAX_NUM_HME_LAYERS 5
164 /**
165 ******************************************************************************
166  *  @brief      Maximum number of layers allowed
167 ******************************************************************************
168  */
169 #define MAX_NUM_LAYERS 4
170 
171 #define NUM_RC_PIC_TYPE 9
172 
173 #define MAX_NUM_NODES_CU_TREE (85)
174 
175 /* macros to control Dynamic load balance */
176 #define DYN_LOAD_BAL_UPPER_LIMIT 0.80
177 
178 #define DYN_LOAD_BAL_LOWER_LIMIT 0.20
179 
180 #define NUM_SUB_GOP_DYN_BAL 1
181 
182 #define MIN_NUM_FRMS_DYN_BAL 4
183 
184 #define CORES_SRES_OR_MRES 2
185 
186 #define HME_HIGH_SAD_BLK_THRESH 35
187 
188 /* Enable to compare cabac states of final entropy thread with enc loop states */
189 #define VERIFY_ENCLOOP_CABAC_STATES 0
190 
191 #define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */
192 
193 /*****************************************************************************/
194 /* Function Macros                                                           */
195 /*****************************************************************************/
196 
197 /*****************************************************************************/
198 /* Typedefs                                                                  */
199 /*****************************************************************************/
200 typedef void (*pf_iq_it_rec)(
201     WORD16 *pi2_src,
202     WORD16 *pi2_tmp,
203     UWORD8 *pu1_pred,
204     WORD16 *pi2_dequant_coeff,
205     UWORD8 *pu1_dst,
206     WORD32 qp_div, /* qpscaled / 6 */
207     WORD32 qp_rem, /* qpscaled % 6 */
208     WORD32 src_strd,
209     WORD32 pred_strd,
210     WORD32 dst_strd,
211     WORD32 zero_cols,
212     WORD32 zero_rows);
213 
214 typedef void (*pf_intra_pred)(
215     UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode);
216 
217 typedef UWORD32 (*pf_res_trans_luma)(
218     UWORD8 *pu1_src,
219     UWORD8 *pu1_pred,
220     WORD32 *pi4_tmp,
221     WORD16 *pi2_dst,
222     WORD32 src_strd,
223     WORD32 pred_strd,
224     WORD32 dst_strd,
225     CHROMA_PLANE_ID_T e_chroma_plane);
226 
227 typedef WORD32 (*pf_quant)(
228     WORD16 *pi2_coeffs,
229     WORD16 *pi2_quant_coeff,
230     WORD16 *pi2_dst,
231     WORD32 qp_div, /* qpscaled / 6 */
232     WORD32 qp_rem, /* qpscaled % 6 */
233     WORD32 q_add,
234     WORD32 src_strd,
235     WORD32 dst_strd,
236     UWORD8 *pu1_csbf_buf,
237     WORD32 csbf_strd,
238     WORD32 *zero_cols,
239     WORD32 *zero_row);
240 
241 /*****************************************************************************/
242 /* Enums                                                                     */
243 /*****************************************************************************/
244 /// supported partition shape
245 typedef enum
246 {
247     SIZE_2Nx2N = 0,  ///< symmetric motion partition,  2Nx2N
248     SIZE_2NxN = 1,  ///< symmetric motion partition,  2Nx N
249     SIZE_Nx2N = 2,  ///< symmetric motion partition,   Nx2N
250     SIZE_NxN = 3,  ///< symmetric motion partition,   Nx N
251     SIZE_2NxnU = 4,  ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2)
252     SIZE_2NxnD = 5,  ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
253     SIZE_nLx2N = 6,  ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
254     SIZE_nRx2N = 7  ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
255 } PART_SIZE_E;
256 
257 /** @brief  Interface level Queues of Encoder */
258 
259 typedef enum
260 {
261     IHEVCE_INPUT_DATA_CTRL_Q = 0,
262     IHEVCE_ENC_INPUT_Q,
263     IHEVCE_INPUT_ASYNCH_CTRL_Q,
264     IHEVCE_OUTPUT_DATA_Q,
265     IHEVCE_OUTPUT_STATUS_Q,
266     IHEVCE_RECON_DATA_Q,  //   /*que for holding recon buffer */
267 
268     IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */
269 
270     IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */
271 
272     IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */
273 
274     IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/
275 
276     /* should be last entry */
277     IHEVCE_MAX_NUM_QUEUES
278 
279 } IHEVCE_Q_DESC_T;
280 
281 /*****************************************************************************/
282 /* Structure                                                                 */
283 /*****************************************************************************/
284 
285 /**
286 RC_QP_QSCALE conversion structures
287 **/
288 typedef struct
289 {
290     WORD16 i2_min_qp;
291 
292     WORD16 i2_max_qp;
293 
294     WORD16 i2_min_qscale;
295 
296     WORD16 i2_max_qscale;
297 
298     WORD32 *pi4_qscale_to_qp;
299 
300     WORD32 *pi4_qp_to_qscale_q_factor;
301 
302     WORD32 *pi4_qp_to_qscale;
303 
304     WORD8 i1_qp_offset;
305 
306 } rc_quant_t;
307 
308 /**
309 ******************************************************************************
310  *  @brief     4x4 level structure which contains all the parameters
311  *             for neighbour prediction puopose
312 ******************************************************************************
313  */
314 typedef struct
315 {
316     /** PU motion vectors */
317     pu_mv_t mv;
318     /** Intra or Inter flag for each partition - 0 or 1  */
319     UWORD16 b1_intra_flag : 1;
320     /** CU skip flag - 0 or 1  */
321     UWORD16 b1_skip_flag : 1;
322     /** CU depth in CTB tree (0-3)  */
323     UWORD16 b2_cu_depth : 2;
324 
325     /** Y Qp  for loop filter */
326     WORD16 b8_qp : 8;
327 
328     /** Luma Intra Mode 0 - 34   */
329     UWORD16 b6_luma_intra_mode : 6;
330 
331     /** Y CBF  for BS compute */
332     UWORD16 b1_y_cbf : 1;
333     /** Pred L0 flag of current 4x4 */
334     UWORD16 b1_pred_l0_flag : 1;
335 
336     /** Pred L0 flag of current 4x4 */
337     UWORD16 b1_pred_l1_flag : 1;
338 } nbr_4x4_t;
339 
340 typedef struct
341 {
342     /** Bottom Left availability flag */
343     UWORD8 u1_bot_lt_avail;
344 
345     /** Left availability flag */
346     UWORD8 u1_left_avail;
347 
348     /** Top availability flag */
349     UWORD8 u1_top_avail;
350 
351     /** Top Right availability flag */
352     UWORD8 u1_top_rt_avail;
353 
354     /** Top Left availability flag */
355     UWORD8 u1_top_lt_avail;
356 
357 } nbr_avail_flags_t;
358 
359 typedef struct
360 {
361     /** prev intra flag*/
362     UWORD8 b1_prev_intra_luma_pred_flag : 1;
363 
364     /** mpm_idx */
365     UWORD8 b2_mpm_idx : 2;
366 
367     /** reminder pred mode */
368     UWORD8 b5_rem_intra_pred_mode : 5;
369 
370 } intra_prev_rem_flags_t;
371 
372 /**
373 ******************************************************************************
374  *  @brief     calc (T+Q+RDOQ) output TU structure; entropy input TU structure
375 ******************************************************************************
376  */
377 typedef struct
378 {
379     /** base tu structure */
380     tu_t s_tu;
381 
382     /** offset of luma data in ecd buffer */
383     WORD32 i4_luma_coeff_offset;
384 
385     /** offset of cb data in ecd buffer */
386     WORD32 ai4_cb_coeff_offset[2];
387 
388     /** offset of cr data in ecd buffer */
389     WORD32 ai4_cr_coeff_offset[2];
390 
391 } tu_enc_loop_out_t;
392 
393 typedef struct
394 {
395     /* L0 Motion Vector */
396     mv_t s_l0_mv;
397 
398     /* L1 Motion Vector */
399     mv_t s_l1_mv;
400 
401     /* L0 Ref index */
402     WORD8 i1_l0_ref_idx;
403 
404     /*  L1 Ref index */
405     WORD8 i1_l1_ref_idx;
406 
407     /* L0 Ref Pic Buf ID */
408     WORD8 i1_l0_pic_buf_id;
409 
410     /* L1 Ref Pic Buf ID */
411     WORD8 i1_l1_pic_buf_id;
412 
413     /** intra flag */
414     UWORD8 b1_intra_flag : 1;
415 
416     /* Pred mode */
417     UWORD8 b2_pred_mode : 2;
418 
419     /* reserved flag can be used for something later */
420     UWORD8 u1_reserved;
421 
422 } pu_col_mv_t;
423 
424 /*****************************************************************************/
425 /* Encoder uses same structure as pu_t for prediction unit                   */
426 /*****************************************************************************/
427 
428 /**
429 ******************************************************************************
430  *  @brief     Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure
431 ******************************************************************************
432  */
433 typedef struct
434 {
435     /* CU X position in terms of min CU (8x8) units */
436     UWORD32 b3_cu_pos_x : 3;
437 
438     /* CU Y position in terms of min CU (8x8) units */
439     UWORD32 b3_cu_pos_y : 3;
440 
441     /** CU size in terms of min CU (8x8) units */
442     UWORD32 b4_cu_size : 4;
443 
444     /** transquant bypass flag ; 0 for this encoder */
445     UWORD32 b1_tq_bypass_flag : 1;
446 
447     /** cu skip flag */
448     UWORD32 b1_skip_flag : 1;
449 
450     /** intra / inter CU flag */
451     UWORD32 b1_pred_mode_flag : 1;
452 
453     /** indicates partition information for CU
454      *  For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize
455      *  For inter 0 : @sa PART_SIZE_E
456      */
457     UWORD32 b3_part_mode : 3;
458 
459     /** 0 for this encoder */
460     UWORD32 b1_pcm_flag : 1;
461 
462     /** only applicable for intra cu */
463     UWORD32 b3_chroma_intra_pred_mode : 3;
464 
465     /** no residue flag for cu */
466     UWORD32 b1_no_residual_syntax_flag : 1;
467 
468     /* flag to indicate if current CU is the first
469     CU of the Quantisation group*/
470     UWORD32 b1_first_cu_in_qg : 1;
471 
472     /** Intra prev and reminder flags
473      * if part is NxN the tntries 1,2,3 will be valid
474      * other wise only enry 0 will be set.
475      */
476     intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS];
477 
478     /**
479      *  Access valid  number of pus in this array based on u1_part_mode
480      *  Moiton vector differentials and reference idx should be
481      *  populated in this structure
482      *  @remarks shall be accessed only for inter pus
483      */
484     pu_t *ps_pu;
485 
486     /**
487      *  pointer to first tu of this cu. Each TU need to be populated
488      *  in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu
489      */
490     tu_enc_loop_out_t *ps_enc_tu;
491 
492     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
493     UWORD16 u2_num_tus_in_cu;
494 
495     /** Coeff bufer pointer */
496     /* Pointer to transform coeff data */
497     /*************************************************************************/
498     /* Following format is repeated for every coded TU                       */
499     /* Luma Block                                                            */
500     /* num_coeffs      : 16 bits                                             */
501     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
502     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
503     /* coeff_data      : Non zero coefficients                               */
504     /* Cb Block (only for last TU in 4x4 case else for every luma TU)        */
505     /* num_coeffs      : 16 bits                                             */
506     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
507     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
508     /* coeff_data      : Non zero coefficients                               */
509     /* Cr Block (only for last TU in 4x4 case else for every luma TU)        */
510     /* num_coeffs      : 16 bits                                             */
511     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
512     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
513     /* coeff_data      : Non zero coefficients                               */
514     /*************************************************************************/
515     void *pv_coeff;
516 
517     /** qp used during for CU
518       * @remarks :
519       */
520     WORD8 i1_cu_qp;
521 
522 } cu_enc_loop_out_t;
523 
524 /**
525  * SAO
526  */
527 typedef struct
528 {
529     /**
530      * sao_type_idx_luma
531      */
532     UWORD32 b3_y_type_idx : 3;
533 
534     /**
535      * luma sao_band_position
536      */
537     UWORD32 b5_y_band_pos : 5;
538 
539     /**
540      * sao_type_idx_chroma
541      */
542     UWORD32 b3_cb_type_idx : 3;
543 
544     /**
545      * cb sao_band_position
546      */
547     UWORD32 b5_cb_band_pos : 5;
548 
549     /**
550      * sao_type_idx_chroma
551      */
552     UWORD32 b3_cr_type_idx : 3;
553 
554     /**
555      * cb sao_band_position
556      */
557     UWORD32 b5_cr_band_pos : 5;
558 
559     /*SAO Offsets
560      * In all these offsets, 0th element is not used
561      */
562     /**
563      * luma SaoOffsetVal[i]
564      */
565     WORD8 u1_y_offset[5];
566 
567     /**
568      * chroma cb SaoOffsetVal[i]
569      */
570     WORD8 u1_cb_offset[5];
571 
572     /**
573      * chroma cr SaoOffsetVal[i]
574      */
575     WORD8 u1_cr_offset[5];
576 
577     /**
578      * sao_merge_left_flag common for y,cb,cr
579      */
580     UWORD32 b1_sao_merge_left_flag : 1;
581 
582     /**
583      * sao_merge_up_flag common for y,cb,cr
584      */
585     UWORD32 b1_sao_merge_up_flag : 1;
586 
587 } sao_enc_t;
588 
589 /**
590 ******************************************************************************
591  *  @brief       ctb output structure; output of Encode loop, input to entropy
592 ******************************************************************************
593  */
594 typedef struct
595 {
596     /**
597      * bit0     :  depth0 split flag, (64x64 splits)
598      * bits 1-3 :  not used
599      * bits 4-7 :  depth1 split flags; valid iff depth0 split=1 (32x32 splits)
600      * bits 8-23:  depth2 split flags; (if 0 16x16 is cu else 8x8 min cu)
601 
602      * if a split flag of n is set for depth 1, check the following split flags
603      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
604      *
605      */
606     UWORD32 u4_cu_split_flags;
607 
608     /***************************************************************
609      * For any given CU position CU_posx, CU_posy access
610      *  au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)]
611      * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0]
612      ****************************************************************/
613 
614     /**
615      * access bits corresponding to actual CU size till leaf nodes
616      * bit0     :  (32x32 TU split flag)
617      * bits 1-3 :  not used
618      * bits 4-7 :  (16x16 TUsplit flags)
619      * bits 8-23:  (8x8  TU split flags)
620 
621      * if a split flag of n is set for depth 1, check the following split flags
622      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
623      *
624      * @remarks     As tu sizes are relative to CU sizes the producer has to
625      * make sure the correctness of u4_packed_tu_split_flags.
626      *
627      * @remarks     au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only
628      *              for 64x64 ctb.
629      */
630     UWORD32 au4_packed_tu_split_flags_cu[4];
631 
632     /**
633      *  pointer to first CU of CTB. Each CU need to be populated
634      *  in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb
635      */
636     cu_enc_loop_out_t *ps_enc_cu;
637 
638     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
639     UWORD8 u1_num_cus_in_ctb;
640 
641     /** CTB neighbour availability flags */
642     nbr_avail_flags_t s_ctb_nbr_avail_flags;
643 
644     /* SAO parameters of the CTB */
645     sao_enc_t s_sao;
646 
647 } ctb_enc_loop_out_t;
648 
649 /**
650 ******************************************************************************
651  *  @brief      cu inter candidate for encoder
652 ******************************************************************************
653  */
654 typedef struct
655 {
656     /** base pu structure
657      *  access valid  number of entries in this array based on u1_part_size
658      */
659     pu_t as_inter_pu[NUM_INTER_PU_PARTS];
660 
661     /* TU split flag : tu_split_flag[0] represents the transform splits
662      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
663      *  to respective 32x32  */
664     /* For a 8x8 TU - 1 bit used to indicate split */
665     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
666     /* For a 32x32 TU - See above */
667     WORD32 ai4_tu_split_flag[4];
668 
669     /* TU split flag : tu_split_flag[0] represents the transform splits
670      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
671      *  to respective 32x32  */
672     /* For a 8x8 TU - 1 bit used to indicate split */
673     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
674     /* For a 32x32 TU - See above */
675     WORD32 ai4_tu_early_cbf[4];
676 
677     /**Pointer to the buffer having predicted data after mc in SATD stage
678      * Since we have 2 buffers for each candidate pred data for best merge candidate
679      * can be in one of the 2 buffers.
680      */
681     UWORD8 *pu1_pred_data;
682 
683     UWORD16 *pu2_pred_data;
684 
685     UWORD8 *pu1_pred_data_scr;
686 
687     UWORD16 *pu2_pred_data_src;
688 
689     /* Total cost: SATD cost + MV cost */
690     WORD32 i4_total_cost;
691 
692     /** Stride for predicted data*/
693     WORD32 i4_pred_data_stride;
694 
695     /** @remarks u1_part_size can be non square only for  Inter   */
696     UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */
697 
698     /** evaluate transform for cusize iff this flag is 1 */
699     /** this flag should be set 0 if CU is 64x64         */
700     UWORD8 b1_eval_tx_cusize : 1;
701 
702     /** evaluate transform for cusize/2 iff this flag is 1 */
703     UWORD8 b1_eval_tx_cusize_by2 : 1;
704 
705     /** Skip Flag : ME should always set this 0 for the candidates */
706     UWORD8 b1_skip_flag : 1;
707 
708     UWORD8 b1_intra_has_won : 1;
709 
710     /* used to mark if this mode needs to be evaluated in auxiliary mode */
711     /* if 1, this mode will be evaluated otherwise not.*/
712     UWORD8 b1_eval_mark : 1;
713 
714 } cu_inter_cand_t;
715 
716 /**
717 ******************************************************************************
718  *  @brief      cu intra candidate for encoder
719 ******************************************************************************
720  */
721 typedef struct
722 {
723     UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES];
724 
725     /**
726      *  List of NxN PU candidates in CU  for each partition
727      *  valid only of if current cusize = mincusize
728      * +1 to signal the last flag invalid value of 255 needs to be stored
729      */
730     UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1];
731 
732     /* used to mark if this mode needs to be evaluated in auxiliary mode */
733     /* if 1, this mode will be evaluated otherwise not.*/
734     UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1];
735 
736     /**
737      *  List of 2Nx2N PU candidates in CU
738      * +1 to signal the last flag invalid value of 255 needs to be stored
739      */
740     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1];
741 
742     /**
743      *  List of 2Nx2N PU candidates in CU
744      * +1 to signal the last flag invalid value of 255 needs to be stored
745      */
746     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1];
747 
748     /* used to mark if this mode needs to be evaluated in auxiliary mode */
749     /* if 1, this mode will be evaluated otherwise not.*/
750     UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
751 
752     /* used to mark if this mode needs to be evaluated in auxiliary mode */
753     /* if 1, this mode will be evaluated otherwise not.*/
754     UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
755 
756     UWORD8 au1_num_modes_added[NUM_PU_PARTS];
757 
758     /** evaluate transform for cusize iff this flag is 1 */
759     /** this flag should be set 0 if CU is 64x64         */
760     UWORD8 b1_eval_tx_cusize : 1;
761 
762     /** evaluate transform for cusize/2 iff this flag is 1 */
763     UWORD8 b1_eval_tx_cusize_by2 : 1;
764 
765     /** number of intra candidates for SATD evaluation in */
766     UWORD8 b6_num_intra_cands : 6;
767 
768 } cu_intra_cand_t;
769 
770 /**
771 ******************************************************************************
772  *  @brief      cu structure for mode analysis/evaluation
773 ******************************************************************************
774  */
775 typedef struct
776 {
777     /** CU X position in terms of min CU (8x8) units */
778     UWORD8 b3_cu_pos_x : 3;
779 
780     /** CU Y position in terms of min CU (8x8) units */
781     UWORD8 b3_cu_pos_y : 3;
782 
783     /** reserved bytes */
784     UWORD8 b2_reserved : 2;
785 
786     /** CU size 2N (width or height) in pixels */
787     UWORD8 u1_cu_size;
788 
789     /** Intra CU candidates after FAST CU decision (output of IPE)
790      *  8421 algo along with transform size evalution will
791      *  be done for these modes in Encode loop pass.
792      */
793     cu_intra_cand_t s_cu_intra_cand;
794 
795     /** indicates the angular mode (0 - 34) for chroma,
796      *  Note : No provision currently to take chroma through RDOPT or SATD
797      */
798     UWORD8 u1_chroma_intra_pred_mode;
799 
800     /** number of inter candidates in as_cu_inter_cand[]
801       * shall be 0 for intra frames.
802       * These inters are evaluated for RDOPT apart from merge/skip candidates
803       */
804     UWORD8 u1_num_inter_cands;
805 
806     /** List of candidates to be evalauted (SATD/RDOPT) for this CU
807       * @remarks : all  merge/skip candidates not a part of this list
808       */
809     cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES];
810 
811     WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
812 
813 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
814     WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
815 #endif
816 
817     /* Flag to convey if Inta or Inter is the best candidate among the
818     candidates populated
819      0: If inter is the winner and 1: if Intra is winner*/
820     UWORD8 u1_best_is_intra;
821 
822     /** number of intra rdopt candidates
823       * @remarks : shall be <= u1_num_intra_cands
824       */
825     UWORD8 u1_num_intra_rdopt_cands;
826     /** qp used during for CU
827       * @remarks :
828       */
829     WORD8 i1_cu_qp;
830     /** Activity factor used in pre enc thread for deriving the Qp
831       * @remarks : This is in Q format
832       */
833     WORD32 i4_act_factor[4][2];
834 
835 } cu_analyse_t;
836 
837 /**
838 ******************************************************************************
839  *  @brief      Structure for CU recursion
840 ******************************************************************************
841  */
842 typedef struct cur_ctb_cu_tree_t
843 {
844     /** CU X position in terms of min CU (8x8) units */
845     UWORD8 b3_cu_pos_x : 3;
846 
847     /** CU X position in terms of min CU (8x8) units */
848     UWORD8 b3_cu_pos_y : 3;
849 
850     /** reserved bytes */
851     UWORD8 b2_reserved : 2;
852 
853     UWORD8 u1_cu_size;
854 
855     UWORD8 u1_intra_eval_enable;
856 
857     UWORD8 u1_inter_eval_enable;
858 
859     /* Flag that indicates whether to evaluate this node */
860     /* during RDOPT evaluation. This does not mean that */
861     /* evaluation of the children need to be abandoned */
862     UWORD8 is_node_valid;
863 
864     LWORD64 i8_best_rdopt_cost;
865 
866     struct cur_ctb_cu_tree_t *ps_child_node_tl;
867 
868     struct cur_ctb_cu_tree_t *ps_child_node_tr;
869 
870     struct cur_ctb_cu_tree_t *ps_child_node_bl;
871 
872     struct cur_ctb_cu_tree_t *ps_child_node_br;
873 
874 } cur_ctb_cu_tree_t;
875 
876 typedef struct
877 {
878     WORD32 num_best_results;
879 
880     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
881 
882 } block_data_32x32_t;
883 
884 /**
885 ******************************************************************************
886  *  @brief      Structure for storing data about all the 64x64
887  *              block in a 64x64 CTB
888 ******************************************************************************
889  */
890 typedef block_data_32x32_t block_data_64x64_t;
891 
892 /**
893 ******************************************************************************
894  *  @brief      Structure for storing data about all 16 16x16
895  *              blocks in a 64x64 CTB and each of their partitions
896 ******************************************************************************
897  */
898 typedef struct
899 {
900     WORD32 num_best_results;
901 
902     /**
903      * mask of active partitions, Totally 17 bits. For a given partition
904      * id, as per PART_ID_T enum the corresponding bit position is 1/0
905      * indicating that partition is active or inactive
906      */
907     /*WORD32 i4_part_mask;*/
908 
909     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
910 
911 } block_data_16x16_t;
912 
913 typedef struct
914 {
915     WORD32 num_best_results;
916 
917     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
918 } block_data_8x8_t;
919 
920 /**
921 ******************************************************************************
922  *  @brief      Structure for data export from ME to Enc_Loop
923 ******************************************************************************
924  */
925 typedef struct
926 {
927     block_data_8x8_t as_8x8_block_data[64];
928 
929     block_data_16x16_t as_block_data[16];
930 
931     block_data_32x32_t as_32x32_block_data[4];
932 
933     block_data_64x64_t s_64x64_block_data;
934 
935 } me_ctb_data_t;
936 
937 /**
938 ******************************************************************************
939  *  @brief   noise detection related structure
940  *
941 ******************************************************************************
942  */
943 
944 typedef struct
945 {
946     WORD32 i4_noise_present;
947 
948     UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB];
949 
950     UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB];
951 } ihevce_ctb_noise_params;
952 
953 /**
954 ******************************************************************************
955  *  @brief      ctb structure for mode analysis/evaluation
956 ******************************************************************************
957  */
958 typedef struct
959 {
960     /**
961      * CU decision in a ctb is frozen by ME/IPE and populated in
962      * u4_packed_cu_split_flags.
963      * @remarks
964      * TODO:review comment
965      * bit0     :  64x64 split flag,  (depth0 flag for 64x64 ctb unused for smaller ctb)
966      * bits 1-3 :  not used
967      * bits 4-7 :  32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb)
968      * bits 8-23:  16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] )
969 
970      * if a split flag of n is set for depth 1, check the following split flags
971      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
972      *
973      */
974     UWORD32 u4_cu_split_flags;
975 
976     UWORD8 u1_num_cus_in_ctb;
977 
978     cur_ctb_cu_tree_t *ps_cu_tree;
979 
980     me_ctb_data_t *ps_me_ctb_data;
981 
982     ihevce_ctb_noise_params s_ctb_noise_params;
983 
984 } ctb_analyse_t;
985 /**
986 ******************************************************************************
987  *  @brief Structures for tapping ssd and bit-estimate information for all CUs
988 ******************************************************************************
989  */
990 
991 typedef struct
992 {
993     LWORD64 i8_cost;
994     WORD32 i4_idx;
995 } cost_idx_t;
996 
997 /**
998 ******************************************************************************
999  *  @brief      reference/non reference pic context for encoder
1000 ******************************************************************************
1001  */
1002 typedef struct
1003 
1004 {
1005     /**
1006      * YUV buffer discriptor for the recon
1007      * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) +  2 * PAD_HORZ)*
1008      *                              ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT)
1009      */
1010     iv_enc_yuv_buf_t s_yuv_buf_desc;
1011 
1012     iv_enc_yuv_buf_src_t s_yuv_buf_desc_src;
1013 
1014     /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid            */
1015     /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */
1016     /* the fullpel plane for use as reference */
1017     UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE];
1018 
1019     /**
1020      * frm level pointer to pu bank for colocated  mv access
1021      * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) *
1022      *                         (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE)
1023      */
1024     pu_col_mv_t *ps_frm_col_mv;
1025     /**
1026      ************************************************************************
1027      * Pointer to a PU map stored at frame level,
1028      * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min
1029      * granularirty of MIN_PU_SIZE size.
1030      ************************************************************************
1031      */
1032     UWORD8 *pu1_frm_pu_map;
1033 
1034     /** CTB level frame buffer to store the accumulated sum of
1035      * number of PUs for every row */
1036     UWORD16 *pu2_num_pu_map;
1037 
1038     /** Offsets in the PU buffer at every CTB level */
1039     UWORD32 *pu4_pu_off;
1040 
1041     /**  Collocated POC for reference list 0
1042      * ToDo: Change the array size when multiple slices are to be supported */
1043     WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS];
1044 
1045     /** Collocated POC for reference list 1 */
1046     WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS];
1047 
1048     /** 0 = top field,  1 = bottom field  */
1049     WORD32 i4_bottom_field;
1050 
1051     /** top field first input in case of interlaced case */
1052     WORD32 i4_topfield_first;
1053 
1054     /** top field first input in case of interlaced case */
1055     WORD32 i4_poc;
1056 
1057     /** unique buffer id */
1058     WORD32 i4_buf_id;
1059 
1060     /** is this reference frame or not */
1061     WORD32 i4_is_reference;
1062 
1063     /** Picture type of current picture */
1064     WORD32 i4_pic_type;
1065 
1066     /** Flag to indicate whether current pictute is free or in use */
1067     WORD32 i4_is_free;
1068 
1069     /** Bit0 -  of this Flag to indicate whether current pictute needs to be deblocked,
1070         padded and hpel planes need to be generated.
1071         These are turned off typically in non referecne pictures when psnr
1072         and recon dump is disabled.
1073 
1074         Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled
1075      */
1076     WORD32 i4_deblk_pad_hpel_cur_pic;
1077 
1078     /**
1079      * weight and offset for this ref pic. To be initialized for every pic
1080      * based on the lap output
1081      */
1082     ihevce_wght_offst_t s_weight_offset;
1083 
1084     /**
1085      * Reciprocal of the lumaweight in q15 format
1086      */
1087     WORD32 i4_inv_luma_wt;
1088 
1089     /**
1090      * Log to base 2 of the common denominator used for luma weights across all ref pics
1091      */
1092     WORD32 i4_log2_wt_denom;
1093 
1094     /**
1095      * Used as Reference for encoding current picture flag
1096      */
1097     WORD32 i4_used_by_cur_pic_flag;
1098 
1099 #if ADAPT_COLOCATED_FROM_L0_FLAG
1100     WORD32 i4_frame_qp;
1101 #endif
1102     /*
1103     * IDR GOP number
1104     */
1105 
1106     WORD32 i4_idr_gop_num;
1107 
1108     /*
1109     * non-ref-free_flag
1110     */
1111     WORD32 i4_non_ref_free_flag;
1112     /**
1113       * Dependency manager instance for ME - Prev recon dep
1114       */
1115     void *pv_dep_mngr_recon;
1116 
1117     /*display num*/
1118     WORD32 i4_display_num;
1119 } recon_pic_buf_t;
1120 
1121 /**
1122 ******************************************************************************
1123  *  @brief  Lambda values used for various cost computations
1124 ******************************************************************************
1125  */
1126 typedef struct
1127 {
1128     /************************************************************************/
1129     /* The fields with the string 'type2' in their names are required */
1130     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
1131     /* to the bit_depth != internal_bit_depth are stored in these fields */
1132     /************************************************************************/
1133 
1134     /**
1135      * Closed loop SSD Lambda
1136      * This is multiplied with bits for RD cost computations in SSD mode
1137      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1138      */
1139     LWORD64 i8_cl_ssd_lambda_qf;
1140 
1141     LWORD64 i8_cl_ssd_type2_lambda_qf;
1142 
1143     /**
1144      * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp)
1145      * This is multiplied with bits for RD cost computations in SSD mode
1146      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1147      */
1148     LWORD64 i8_cl_ssd_lambda_chroma_qf;
1149 
1150     LWORD64 i8_cl_ssd_type2_lambda_chroma_qf;
1151 
1152     /**
1153      * Closed loop SAD Lambda
1154      * This is multiplied with bits for RD cost computations in SAD mode
1155      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1156      */
1157     WORD32 i4_cl_sad_lambda_qf;
1158 
1159     WORD32 i4_cl_sad_type2_lambda_qf;
1160 
1161     /**
1162      * Open loop SAD Lambda
1163      * This is multiplied with bits for RD cost computations in SAD mode
1164      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1165      */
1166     WORD32 i4_ol_sad_lambda_qf;
1167 
1168     WORD32 i4_ol_sad_type2_lambda_qf;
1169 
1170     /**
1171      * Closed loop SATD Lambda
1172      * This is multiplied with bits for RD cost computations in SATD mode
1173      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1174      */
1175     WORD32 i4_cl_satd_lambda_qf;
1176 
1177     WORD32 i4_cl_satd_type2_lambda_qf;
1178 
1179     /**
1180      * Open loop SATD Lambda
1181      * This is multiplied with bits for RD cost computations in SATD mode
1182      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1183      */
1184     WORD32 i4_ol_satd_lambda_qf;
1185 
1186     WORD32 i4_ol_satd_type2_lambda_qf;
1187 
1188     double lambda_modifier;
1189 
1190     double lambda_uv_modifier;
1191 
1192     UWORD32 u4_chroma_cost_weighing_factor;
1193 
1194 } frm_lambda_ctxt_t;
1195 /**
1196 ******************************************************************************
1197 *  @brief  Mode attributes for 4x4 block populated by early decision
1198 ******************************************************************************
1199  */
1200 typedef struct
1201 {
1202     /* If best mode is present or not */
1203     UWORD8 mode_present;
1204 
1205     /** Best mode for the current 4x4 prediction block */
1206     UWORD8 best_mode;
1207 
1208     /** sad for the best mode for the current 4x4 prediction block */
1209     UWORD16 sad;
1210 
1211     /** cost for the best mode for the current 4x4 prediction block */
1212     UWORD16 sad_cost;
1213 
1214 } ihevce_ed_mode_attr_t;  //early decision
1215 
1216 /**
1217 ******************************************************************************
1218  *  @brief  Structure at 4x4 block level which has parameters about early
1219  *          intra or inter decision
1220 ******************************************************************************
1221  */
1222 typedef struct
1223 {
1224     /**
1225      * Final parameter of Intra-Inter early decision for the current 4x4.
1226      * 0 - invalid decision
1227      * 1 - eval intra only
1228      * 2 - eval inter only
1229      * 3 - eval both intra and inter
1230      */
1231     UWORD8 intra_or_inter;
1232 
1233     UWORD8 merge_success;
1234 
1235     /** Best mode for the current 4x4 prediction block */
1236     UWORD8 best_mode;
1237 
1238     /** Best mode for the current 4x4 prediction block */
1239     UWORD8 best_merge_mode;
1240 
1241     /** Store SATD at 4*4 level for current layer (L1) */
1242     WORD32 i4_4x4_satd;
1243 
1244 } ihevce_ed_blk_t;  //early decision
1245 
1246 /* l1 ipe ctb analyze structure */
1247 /* Contains cu level qp mod related information for all possible cu
1248 sizes (16,32,64 in L0) in a CTB*/
1249 typedef struct
1250 {
1251     WORD32 i4_sum_4x4_satd[16];
1252     WORD32 i4_min_4x4_satd[16];
1253 
1254     /* satd for L1_8x8 blocks in L1_32x32
1255      * [16] : num L1_8x8 in L1_32x32
1256      * [2]  : 0 - sum of L1_4x4 @ L1_8x8
1257      *          - equivalent to transform size of 16x16 @ L0
1258      *        1 - min/median of L1_4x4 @ L1_8x8
1259      *          - equivalent to transform size of 8x8 @ L0
1260      */
1261     WORD32 i4_8x8_satd[16][2];
1262 
1263     /* satd for L1_16x16 blocks in L1_32x32
1264      * [4] : num L1_16x16 in L1_32x32
1265      * [3] : 0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
1266      *         - equivalent to transform size of 32x32 @ L0
1267      *       1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
1268      *         - equivalent to transform size of 16x16 @ L0
1269      *       2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16
1270      *         - equivalent to transform size of 8x8 @ L0
1271      */
1272     WORD32 i4_16x16_satd[4][3];
1273 
1274     /* Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */
1275     /* satd for L1_32x32 blocks in L1_32x32
1276      * [1] : num L1_32x32 in L1_32x32
1277      * [4] : 0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
1278      *         - equivalent to transform size of 32x32 @ L0
1279      *       1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32
1280      *         - equivalent to transform size of 16x16 @ L0
1281      *       2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32
1282      *         - equivalent to transform size of 8x8 @ L0
1283      *       3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
1284      */
1285     WORD32 i4_32x32_satd[1][4];
1286 
1287     /*Store SATD at 8x8 level for current layer (L1)*/
1288     WORD32 i4_best_satd_8x8[16];
1289 
1290     /* EIID: This will be used for early inter intra decisions */
1291     /*SAD at 8x8 level for current layer (l1) */
1292     /*Cost based on sad at 8x8 level for current layer (l1) */
1293     WORD32 i4_best_sad_cost_8x8_l1_ipe[16];
1294 
1295     WORD32 i4_best_sad_8x8_l1_ipe[16];
1296     /* SAD at 8x8 level for ME. All other cost are IPE cost */
1297     WORD32 i4_best_sad_cost_8x8_l1_me[16];
1298 
1299     /* SAD at 8x8 level for ME. for given reference */
1300     WORD32 i4_sad_cost_me_for_ref[16];
1301 
1302     /* SAD at 8x8 level for ME. for given reference */
1303     WORD32 i4_sad_me_for_ref[16];
1304 
1305     /* SAD at 8x8 level for ME. All other cost are IPE cost */
1306     WORD32 i4_best_sad_8x8_l1_me[16];
1307 
1308     WORD32 i4_best_sad_8x8_l1_me_for_decide[16];
1309 
1310     /*Mean @ L0 16x16*/
1311     WORD32 ai4_16x16_mean[16];
1312 
1313     /*Mean @ L0 32x32*/
1314     WORD32 ai4_32x32_mean[4];
1315 
1316     /*Mean @ L0 64x64*/
1317     WORD32 i4_64x64_mean;
1318 
1319 } ihevce_ed_ctb_l1_t;  //early decision
1320 
1321 /**
1322 ******************************************************************************
1323  *  @brief   8x8 Intra analyze structure
1324 ******************************************************************************
1325  */
1326 typedef struct
1327 {
1328     /** Best intra modes for 8x8 transform.
1329      *  Insert 255 in the end to limit number of modes
1330      */
1331     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
1332 
1333     /** Best 8x8 intra modes for 4x4 transform
1334      *  Insert 255 in the end to limit number of modes
1335      */
1336     UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1];
1337 
1338     /** Best 4x4 intra modes
1339      *  Insert 255 in the end to limit number of modes
1340      */
1341     UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1];
1342 
1343     /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */
1344     UWORD8 b1_enable_nxn : 1;
1345 
1346     /** valid cu flag : required for incomplete ctbs at frame boundaries */
1347     UWORD8 b1_valid_cu : 1;
1348 
1349     /** dummy bits */
1350     UWORD8 b6_reserved : 6;
1351 
1352 } intra8_analyse_t;
1353 
1354 /**
1355 ******************************************************************************
1356  *  @brief   16x16 Intra analyze structure
1357 ******************************************************************************
1358  */
1359 typedef struct
1360 {
1361     /** Best intra modes for 16x16 transform.
1362      *  Insert 255 in the end to limit number of modes
1363      */
1364     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
1365 
1366     /** Best 16x16 intra modes for 8x8 transform
1367      *  Insert 255 in the end to limit number of modes
1368      */
1369     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
1370 
1371     /** 8x8 children intra analyze for this 16x16 */
1372     intra8_analyse_t as_intra8_analyse[4];
1373 
1374     /* indicates if 16x16 is best cu or 8x8 cu */
1375     UWORD8 b1_split_flag : 1;
1376 
1377     /* indicates if 8x8 vs 16x16 rdo evaluation needed */
1378     /* or only 8x8's rdo evaluation needed */
1379     UWORD8 b1_merge_flag : 1;
1380 
1381     /**
1382      * valid cu flag : required for incomplete ctbs at frame boundaries
1383      * or if CTB size is lower than 32
1384      */
1385     UWORD8 b1_valid_cu : 1;
1386 
1387     /** dummy bits */
1388     UWORD8 b6_reserved : 5;
1389 
1390 } intra16_analyse_t;
1391 
1392 /**
1393 ******************************************************************************
1394  *  @brief   32x32 Intra analyze structure
1395 ******************************************************************************
1396  */
1397 typedef struct
1398 {
1399     /** Best intra modes for 32x32 transform.
1400      *  Insert 255 in the end to limit number of modes
1401      */
1402     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
1403 
1404     /** Best 32x32 intra modes for 16x16 transform
1405      *  Insert 255 in the end to limit number of modes
1406      */
1407     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
1408 
1409     /** 16x16 children intra analyze for this 32x32 */
1410     intra16_analyse_t as_intra16_analyse[4];
1411 
1412     /* indicates if 32x32 is best cu or 16x16 cu    */
1413     UWORD8 b1_split_flag : 1;
1414 
1415     /* indicates if 32x32 vs 16x16 rdo evaluation needed */
1416     /* or 16x16 vs 8x8 evaluation is needed */
1417     UWORD8 b1_merge_flag : 1;
1418 
1419     /**
1420      * valid cu flag : required for incomplete ctbs at frame boundaries
1421      * or if CTB size is lower than 64
1422      */
1423     UWORD8 b1_valid_cu : 1;
1424 
1425     /** dummy bits */
1426     UWORD8 b6_reserved : 5;
1427 
1428 } intra32_analyse_t;
1429 
1430 /**
1431 ******************************************************************************
1432  *  @brief  IPE L0 analyze structure for L0 ME to do intra/inter CU decisions
1433  *          This is a CTB level structure encapsulating IPE modes, cost at all
1434  *          level. IPE also recommemds max intra CU sizes which is required
1435  *          by ME for CU size determination in intra dominant CTB
1436 ******************************************************************************
1437  */
1438 typedef struct
1439 {
1440     /** Best 64x64 intra modes for 32x32 transform.
1441      *  Insert 255 in the end to limit number of modes
1442      */
1443     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
1444 
1445     /** 32x32 children intra analyze for this 32x32    */
1446     intra32_analyse_t as_intra32_analyse[4];
1447 
1448     /* indicates if 64x64 is best CUs or 32x32 CUs      */
1449     UWORD8 u1_split_flag;
1450 
1451     /* CTB level best 8x8 intra costs  */
1452     WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB];
1453 
1454     /* CTB level best 16x16 intra costs */
1455     WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2];
1456 
1457     /* CTB level best 32x32 intra costs */
1458     WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4];
1459 
1460     /* best 64x64 intra cost */
1461     WORD32 i4_best64x64_intra_cost;
1462 
1463     /*
1464     @L0 level
1465     4 => 0 - 32x32 TU in 64x64 CU
1466          1 - 16x16 TU in 64x64 CU
1467          2 - 8x8  TU in 64x64 CU
1468          3 - 64x64 CU
1469     2 => Intra/Inter */
1470     WORD32 i4_64x64_act_factor[4][2];
1471 
1472     /*
1473     @L0 level
1474     4 => num 32x32 in CTB
1475     3 => 0 - 32x32 TU in 64x64 CU
1476          1 - 16x16 TU in 64x64 CU
1477          2 - 8x8  TU in 64x64 CU
1478     2 => Intra/Inter */
1479     WORD32 i4_32x32_act_factor[4][3][2];
1480 
1481     /*
1482     @L0 level
1483     16 => num 16x16 in CTB
1484     2 => 0 - 16x16 TU in 64x64 CU
1485          1 - 8x8  TU in 64x64 CU
1486     2 => Intra/Inter */
1487     WORD32 i4_16x16_act_factor[16][2][2];
1488 
1489     WORD32 nodes_created_in_cu_tree;
1490 
1491     cur_ctb_cu_tree_t *ps_cu_tree_root;
1492 
1493     WORD32 ai4_8x8_act_factor[16];
1494     WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB];
1495     WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB];
1496     WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB];
1497     WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB];
1498 
1499     /*Ctb level accumalated satd*/
1500     WORD32 i4_ctb_acc_satd;
1501 
1502     /*Ctb level accumalated mpm bits*/
1503     WORD32 i4_ctb_acc_mpm_bits;
1504 
1505 } ipe_l0_ctb_analyse_for_me_t;
1506 
1507 typedef struct
1508 {
1509     WORD16 i2_mv_x;
1510     WORD16 i2_mv_y;
1511 } global_mv_t;
1512 
1513 /**
1514 ******************************************************************************
1515  *  @brief  Pre Encode pass and ME pass shared variables and buffers
1516 ******************************************************************************
1517  */
1518 typedef struct
1519 {
1520     /**
1521      * Buffer id
1522      */
1523     WORD32 i4_buf_id;
1524 
1525     /**
1526     * Flag will be set to 1 by frame processing thread after receiving flush
1527     * command from application
1528     */
1529     WORD32 i4_end_flag;
1530 
1531     /** frame leve ctb analyse  buffer pointer */
1532     ctb_analyse_t *ps_ctb_analyse;
1533 
1534     /** frame level cu analyse  buffer pointer for IPE */
1535     //cu_analyse_t       *ps_cu_analyse;
1536 
1537     /** current input pointer */
1538     ihevce_lap_enc_buf_t *ps_curr_inp;
1539 
1540     /** current inp buffer id */
1541     WORD32 curr_inp_buf_id;
1542 
1543     /** Slice header parameters   */
1544     slice_header_t s_slice_hdr;
1545 
1546     /** sps parameters activated by current slice  */
1547     sps_t *ps_sps;
1548 
1549     /** pps parameters activated by current slice  */
1550     pps_t *ps_pps;
1551 
1552     /** vps parameters activated by current slice  */
1553     vps_t *ps_vps;
1554     /**  Pointer to Penultilate Layer context memory internally has MV bank buff and related params */
1555     void *pv_me_lyr_ctxt;
1556 
1557     /**  Pointer to Penultilate Layer  NV bank context memory */
1558     void *pv_me_lyr_bnk_ctxt;
1559 
1560     /**  Pointer to Penultilate Layer MV bank buff */
1561     void *pv_me_mv_bank;
1562 
1563     /**  Pointer to Penultilate Layer reference idx buffer */
1564     void *pv_me_ref_idx;
1565     /**
1566      * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost)
1567      * The order of storing is raster scan order within CTB and
1568      * CTB order is raster scan within frame.
1569      */
1570     double *plf_intra_8x8_cost;
1571 
1572     /**
1573      * L0 layer ctb anaylse frame level buffer.
1574      * IPE wil populate the cost and best modes at all levels in this buffer
1575      *  for every CTB in a frame
1576      */
1577     // moved to shorter buffer queue
1578     //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
1579 
1580     /** Layer L1 buffer pointer */
1581     ihevce_ed_blk_t *ps_layer1_buf;
1582 
1583     /** Layer L2 buffer pointer */
1584     ihevce_ed_blk_t *ps_layer2_buf;
1585 
1586     /*ME reverse map info*/
1587     UWORD8 *pu1_me_reverse_map_info;
1588 
1589     /** Buffer pointer for CTB level information in pre intra pass*/
1590     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
1591 
1592     /** vps parameters activated by current slice  */
1593     sei_params_t s_sei;
1594 
1595     /** nal_type for the slice to be encoded  */
1596     WORD32 i4_slice_nal_type;
1597 
1598     /** input time stamp in terms of ticks: lower 32  */
1599     WORD32 i4_inp_timestamp_low;
1600 
1601     /** input time stamp in terms of ticks: higher 32 */
1602     WORD32 i4_inp_timestamp_high;
1603 
1604     /** input frame ctxt of app to be retured in output buffer */
1605     void *pv_app_frm_ctxt;
1606 
1607     /** current frm valid flag :
1608      * will be 1 if valid input was processed by frame proc thrd
1609      */
1610     WORD32 i4_frm_proc_valid_flag;
1611 
1612     /**
1613      * Qp to be used for current frame
1614      */
1615     WORD32 i4_curr_frm_qp;
1616 
1617     /**
1618      * Frame level Lambda parameters
1619      */
1620     frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES];
1621 
1622     /** Frame-levelSATDcost accumalator */
1623     LWORD64 i8_frame_acc_satd_cost;
1624 
1625     /** Frame - L1 coarse me cost accumulated */
1626     LWORD64 i8_acc_frame_coarse_me_cost;
1627     /** Frame - L1 coarse me cost accumulated */
1628     //LWORD64 i8_acc_frame_coarse_me_cost_for_ref;
1629 
1630     /** Frame - L1 coarse me sad accumulated */
1631     LWORD64 i8_acc_frame_coarse_me_sad;
1632 
1633     /* Averge activity of 4x4 blocks from previous frame
1634     *  If L1, maps to 8*8 in L0
1635     */
1636     WORD32 i4_curr_frame_4x4_avg_act;
1637 
1638     WORD32 ai4_mod_factor_derived_by_variance[2];
1639 
1640     float f_strength;
1641 
1642     /* Averge activity of 8x8 blocks from previous frame
1643     *  If L1, maps to 16*16 in L0
1644     */
1645 
1646     long double ld_curr_frame_8x8_log_avg[2];
1647 
1648     LWORD64 i8_curr_frame_8x8_avg_act[2];
1649 
1650     LWORD64 i8_curr_frame_8x8_sum_act[2];
1651 
1652     WORD32 i4_curr_frame_8x8_sum_act_for_strength[2];
1653 
1654     ULWORD64 u8_curr_frame_8x8_sum_act_sqr;
1655 
1656     WORD32 i4_curr_frame_8x8_num_blks[2];
1657 
1658     LWORD64 i8_acc_frame_8x8_sum_act[2];
1659     LWORD64 i8_acc_frame_8x8_sum_act_sqr;
1660     WORD32 i4_acc_frame_8x8_num_blks[2];
1661     LWORD64 i8_acc_frame_8x8_sum_act_for_strength;
1662     LWORD64 i8_curr_frame_8x8_sum_act_for_strength;
1663 
1664     /* Averge activity of 16x16 blocks from previous frame
1665     *  If L1, maps to 32*32 in L0
1666     */
1667 
1668     long double ld_curr_frame_16x16_log_avg[3];
1669 
1670     LWORD64 i8_curr_frame_16x16_avg_act[3];
1671 
1672     LWORD64 i8_curr_frame_16x16_sum_act[3];
1673 
1674     WORD32 i4_curr_frame_16x16_num_blks[3];
1675 
1676     LWORD64 i8_acc_frame_16x16_sum_act[3];
1677     WORD32 i4_acc_frame_16x16_num_blks[3];
1678 
1679     /* Averge activity of 32x32 blocks from previous frame
1680     *  If L1, maps to 64*64 in L0
1681     */
1682 
1683     long double ld_curr_frame_32x32_log_avg[3];
1684 
1685     LWORD64 i8_curr_frame_32x32_avg_act[3];
1686 
1687     global_mv_t s_global_mv[MAX_NUM_REF];
1688     LWORD64 i8_curr_frame_32x32_sum_act[3];
1689 
1690     WORD32 i4_curr_frame_32x32_num_blks[3];
1691 
1692     LWORD64 i8_acc_frame_32x32_sum_act[3];
1693     WORD32 i4_acc_frame_32x32_num_blks[3];
1694 
1695     LWORD64 i8_acc_num_blks_high_sad;
1696 
1697     LWORD64 i8_total_blks;
1698 
1699     WORD32 i4_complexity_percentage;
1700 
1701     WORD32 i4_is_high_complex_region;
1702 
1703     WORD32 i4_avg_noise_thrshld_4x4;
1704 
1705     LWORD64 i8_curr_frame_mean_sum;
1706     WORD32 i4_curr_frame_mean_num_blks;
1707     LWORD64 i8_curr_frame_avg_mean_act;
1708 
1709 } pre_enc_me_ctxt_t;
1710 
1711 /**
1712 ******************************************************************************
1713  *  @brief  buffers from L0 IPE to ME and enc loop
1714 ******************************************************************************
1715  */
1716 typedef struct
1717 {
1718     WORD32 i4_size;
1719 
1720     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
1721 } pre_enc_L0_ipe_encloop_ctxt_t;
1722 /**
1723 ******************************************************************************
1724  *  @brief  Frame process and Entropy coding pass shared variables and buffers
1725 ******************************************************************************
1726  */
1727 
1728 typedef struct
1729 {
1730     /*PIC level Info*/
1731     ULWORD64 i8_total_cu;
1732     ULWORD64 i8_total_cu_min_8x8;
1733     ULWORD64 i8_total_pu;
1734     ULWORD64 i8_total_intra_cu;
1735     ULWORD64 i8_total_inter_cu;
1736     ULWORD64 i8_total_skip_cu;
1737     ULWORD64 i8_total_cu_based_on_size[4];
1738 
1739     ULWORD64 i8_total_intra_pu;
1740     ULWORD64 i8_total_merge_pu;
1741     ULWORD64 i8_total_non_skipped_inter_pu;
1742 
1743     ULWORD64 i8_total_2nx2n_intra_pu[4];
1744     ULWORD64 i8_total_nxn_intra_pu;
1745     ULWORD64 i8_total_2nx2n_inter_pu[4];
1746     ULWORD64 i8_total_smp_inter_pu[4];
1747     ULWORD64 i8_total_amp_inter_pu[3];
1748     ULWORD64 i8_total_nxn_inter_pu[3];
1749 
1750     ULWORD64 i8_total_L0_mode;
1751     ULWORD64 i8_total_L1_mode;
1752     ULWORD64 i8_total_BI_mode;
1753 
1754     ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE];
1755     ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE];
1756 
1757     ULWORD64 i8_total_tu;
1758     ULWORD64 i8_total_non_coded_tu;
1759     ULWORD64 i8_total_inter_coded_tu;
1760     ULWORD64 i8_total_intra_coded_tu;
1761 
1762     ULWORD64 i8_total_tu_based_on_size[4];
1763     ULWORD64 i8_total_tu_cu64[4];
1764     ULWORD64 i8_total_tu_cu32[4];
1765     ULWORD64 i8_total_tu_cu16[3];
1766     ULWORD64 i8_total_tu_cu8[2];
1767 
1768     LWORD64 i8_total_qp;
1769     LWORD64 i8_total_qp_min_cu;
1770     WORD32 i4_min_qp;
1771     WORD32 i4_max_qp;
1772     LWORD64 i8_sum_squared_frame_qp;
1773     LWORD64 i8_total_frame_qp;
1774     WORD32 i4_max_frame_qp;
1775     float f_total_buffer_underflow;
1776     float f_total_buffer_overflow;
1777     float f_max_buffer_underflow;
1778     float f_max_buffer_overflow;
1779 
1780     UWORD8 i1_num_ref_idx_l0_active;
1781     UWORD8 i1_num_ref_idx_l1_active;
1782 
1783     WORD32 i4_ref_poc_l0[MAX_DPB_SIZE];
1784     WORD32 i4_ref_poc_l1[MAX_DPB_SIZE];
1785 
1786     WORD8 i1_list_entry_l0[MAX_DPB_SIZE];
1787     DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE];
1788     WORD16 i2_luma_offset_l0[MAX_DPB_SIZE];
1789     WORD8 i1_list_entry_l1[MAX_DPB_SIZE];
1790     DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE];
1791     WORD16 i2_luma_offset_l1[MAX_DPB_SIZE];
1792 
1793     ULWORD64 u8_bits_estimated_intra;
1794     ULWORD64 u8_bits_estimated_inter;
1795     ULWORD64 u8_bits_estimated_slice_header;
1796     ULWORD64 u8_bits_estimated_sao;
1797     ULWORD64 u8_bits_estimated_split_cu_flag;
1798     ULWORD64 u8_bits_estimated_cu_hdr_bits;
1799     ULWORD64 u8_bits_estimated_split_tu_flag;
1800     ULWORD64 u8_bits_estimated_qp_delta_bits;
1801     ULWORD64 u8_bits_estimated_cbf_luma_bits;
1802     ULWORD64 u8_bits_estimated_cbf_chroma_bits;
1803 
1804     ULWORD64 u8_bits_estimated_res_luma_bits;
1805     ULWORD64 u8_bits_estimated_res_chroma_bits;
1806 
1807     ULWORD64 u8_bits_estimated_ref_id;
1808     ULWORD64 u8_bits_estimated_mvd;
1809     ULWORD64 u8_bits_estimated_merge_flag;
1810     ULWORD64 u8_bits_estimated_mpm_luma;
1811     ULWORD64 u8_bits_estimated_mpm_chroma;
1812 
1813     ULWORD64 u8_total_bits_generated;
1814     ULWORD64 u8_total_bits_vbv;
1815 
1816     ULWORD64 u8_total_I_bits_generated;
1817     ULWORD64 u8_total_P_bits_generated;
1818     ULWORD64 u8_total_B_bits_generated;
1819 
1820     UWORD32 u4_frame_sad;
1821     UWORD32 u4_frame_intra_sad;
1822     UWORD32 u4_frame_inter_sad;
1823 
1824     ULWORD64 i8_frame_cost;
1825     ULWORD64 i8_frame_intra_cost;
1826     ULWORD64 i8_frame_inter_cost;
1827 } s_pic_level_acc_info_t;
1828 
1829 typedef struct
1830 {
1831     UWORD32 u4_target_bit_rate_sei_entropy;
1832     UWORD32 u4_buffer_size_sei_entropy;
1833     UWORD32 u4_dbf_entropy;
1834 
1835 } s_pic_level_sei_info_t;
1836 /**
1837 ******************************************************************************
1838 *  @brief  ME pass and Main enocde pass shared variables and buffers
1839 ******************************************************************************
1840 */
1841 typedef struct
1842 {
1843     /**
1844     * Buffer id
1845     */
1846     WORD32 i4_buf_id;
1847 
1848     /**
1849     * Flag will be set to 1 by frame processing thread after receiving flush
1850     * command from application
1851     */
1852     WORD32 i4_end_flag;
1853 
1854     /** current input pointer */
1855     ihevce_lap_enc_buf_t *ps_curr_inp;
1856 
1857     /** current inp buffer id */
1858     WORD32 curr_inp_buf_id;
1859 
1860     /** current input buffers from ME */
1861     pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms;
1862 
1863     /** current inp buffer id from ME */
1864     WORD32 curr_inp_from_me_buf_id;
1865 
1866     /** current input buffers from L0 IPE */
1867     pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms;
1868 
1869     /** current inp buffer id from L0 IPE */
1870     WORD32 curr_inp_from_l0_ipe_buf_id;
1871 
1872     /** Slice header parameters   */
1873     slice_header_t s_slice_hdr;
1874 
1875     /** current frm valid flag :
1876      * will be 1 if valid input was processed by frame proc thrd
1877      */
1878     WORD32 i4_frm_proc_valid_flag;
1879 
1880     /**
1881      * Array of reference picture list for ping instance
1882      * 2=> ref_pic_list0 and ref_pic_list1
1883      */
1884     recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
1885 
1886     /**
1887      * Array of reference picture list
1888      * 2=> ref_pic_list0 and ref_pic_list1
1889      */
1890     recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
1891 
1892     /**  Job Queue Memory encode */
1893     job_queue_t *ps_job_q_enc;
1894 
1895     /** Array of Job Queue handles of enc group for ping and pong instance*/
1896     job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES];
1897 
1898     /** Array of Job Queue handles of enc group for re-encode*/
1899     job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES];
1900     /** frame level me_ctb_data_t buffer pointer
1901       */
1902     me_ctb_data_t *ps_cur_ctb_me_data;
1903 
1904     /** frame level cur_ctb_cu_tree_t buffer pointer for ME
1905       */
1906     cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree;
1907 
1908     /** Pointer to Dep. Mngr for CTBs processed in every row of a frame.
1909      * ME is producer, EncLoop is the consumer
1910      */
1911     void *pv_dep_mngr_encloop_dep_me;
1912 
1913 } me_enc_rdopt_ctxt_t;
1914 
1915 typedef struct
1916 {
1917     UWORD32 u4_payload_type;
1918     UWORD32 u4_payload_length;
1919     UWORD8 *pu1_sei_payload;
1920 } sei_payload_t;
1921 
1922 typedef struct
1923 {
1924     /**
1925     * Flag will be set to 1 by frame processing thread after receiving flush
1926     * command from application
1927     */
1928     WORD32 i4_end_flag;
1929 
1930     /** frame level ctb allocation for ctb after aligning to max cu size */
1931     ctb_enc_loop_out_t *ps_frm_ctb_data;
1932 
1933     /** frame level cu allocation for ctb after aligning to max cu size  */
1934     cu_enc_loop_out_t *ps_frm_cu_data;
1935 
1936     /** frame level tu allocation for ctb after aligning to max cu size  */
1937     tu_enc_loop_out_t *ps_frm_tu_data;
1938 
1939     /** frame level pu allocation for ctb after aligning to max cu size  */
1940     pu_t *ps_frm_pu_data;
1941 
1942     /**  frame level coeff allocation for ctb after aligning to max cu size */
1943     void *pv_coeff_data;
1944 
1945     /** Slice header parameters   */
1946     slice_header_t s_slice_hdr;
1947 
1948     /** sps parameters activated by current slice  */
1949     sps_t *ps_sps;
1950 
1951     /** pps parameters activated by current slice  */
1952     pps_t *ps_pps;
1953 
1954     /** vps parameters activated by current slice  */
1955     vps_t *ps_vps;
1956 
1957     /** vps parameters activated by current slice  */
1958     sei_params_t s_sei;
1959 
1960     /* Flag to indicate if AUD NAL is present */
1961     WORD8 i1_aud_present_flag;
1962 
1963     /* Flag to indicate if EOS NAL is present */
1964     WORD8 i1_eos_present_flag;
1965 
1966     /** nal_type for the slice to be encoded  */
1967     WORD32 i4_slice_nal_type;
1968 
1969     /** input time stamp in terms of ticks: lower 32  */
1970     WORD32 i4_inp_timestamp_low;
1971 
1972     /** input time stamp in terms of ticks: higher 32 */
1973     WORD32 i4_inp_timestamp_high;
1974 
1975     /** input frame ctxt of app to be retured in output buffer */
1976     void *pv_app_frm_ctxt;
1977 
1978     /** current frm valid flag :
1979      * will be 1 if valid input was processed by frame proc thrd
1980      */
1981     WORD32 i4_frm_proc_valid_flag;
1982 
1983     /** To support entropy sync the bitstream offset of each CTB row
1984      * is populated in this array any put in slice header in the end
1985      */
1986     WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM];
1987 
1988     /** RDopt estimation of bytes generated based on which rc update happens
1989      *
1990      */
1991     WORD32 i4_rdopt_bits_generated_estimate;
1992 
1993     /* These params are passed from enc-threads to entropy thread for
1994         passing params needed for PSNR caclulation and encoding
1995         summary prints */
1996     DOUBLE lf_luma_mse;
1997     DOUBLE lf_cb_mse;
1998     DOUBLE lf_cr_mse;
1999 
2000     DOUBLE lf_luma_ssim;
2001     DOUBLE lf_cb_ssim;
2002     DOUBLE lf_cr_ssim;
2003 
2004     WORD32 i4_qp;
2005     WORD32 i4_poc;
2006     WORD32 i4_display_num;
2007     WORD32 i4_pic_type;
2008 
2009     /** I-only SCD */
2010     WORD32 i4_is_I_scenecut;
2011 
2012     WORD32 i4_is_non_I_scenecut;
2013     WORD32 i4_sub_pic_level_rc;
2014 
2015     WORD32 ai4_frame_bits_estimated;
2016     s_pic_level_acc_info_t s_pic_level_info;
2017 
2018     LWORD64 i8_buf_level_bitrate_change;
2019 
2020     WORD32 i4_is_end_of_idr_gop;
2021 
2022     sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD];
2023 
2024     UWORD32 u4_num_sei_payload;
2025     /* Flag used only in mres single output case to flush out one res and start with next */
2026     WORD32 i4_out_flush_flag;
2027 
2028 } frm_proc_ent_cod_ctxt_t;
2029 
2030 /**
2031 ******************************************************************************
2032 *  @brief  ME pass and Main enocde pass shared variables and buffers
2033 ******************************************************************************
2034 */
2035 typedef struct
2036 {
2037     /*BitRate ID*/
2038     WORD32 i4_br_id;
2039 
2040     /*Frame ID*/
2041     WORD32 i4_frm_id;
2042 
2043     /*Number of CTB, after ich data is populated*/
2044     WORD32 i4_ctb_count_in_data;
2045 
2046     /*Number of CTB, after ich scale is computed*/
2047     WORD32 i4_ctb_count_out_scale;
2048 
2049     /*Bits estimated for the frame */
2050     /* For NON-I SCD max buf bits*/
2051     LWORD64 i8_frame_bits_estimated;
2052 
2053     /* Bits consumed till the nctb*/
2054     LWORD64 i8_nctb_bits_consumed;
2055 
2056     /* Bits consumed till the nctb*/
2057     LWORD64 i8_acc_bits_consumed;
2058 
2059     /*Frame level Best of Ipe and ME sad*/
2060     LWORD64 i8_frame_l1_me_sad;
2061 
2062     /*SAD accumalted till NCTB*/
2063     LWORD64 i8_nctb_l1_me_sad;
2064 
2065     /*Frame level IPE sad*/
2066     LWORD64 i8_frame_l1_ipe_sad;
2067 
2068     /*SAD accumalted till NCTB*/
2069     LWORD64 i8_nctb_l1_ipe_sad;
2070 
2071     /*Frame level L0 IPE satd*/
2072     LWORD64 i8_frame_l0_ipe_satd;
2073 
2074     /*L0 SATD accumalted till NCTB*/
2075     LWORD64 i8_nctb_l0_ipe_satd;
2076 
2077     /*Frame level Activity factor acc at 8x8 level */
2078     LWORD64 i8_frame_l1_activity_fact;
2079 
2080     /*NCTB Activity factor acc at 8x8 level */
2081     LWORD64 i8_nctb_l1_activity_fact;
2082 
2083     /*L0 MPM bits accumalted till NCTB*/
2084     LWORD64 i8_nctb_l0_mpm_bits;
2085 
2086     /*Encoder hdr accumalted till NCTB*/
2087     LWORD64 i8_nctb_hdr_bits_consumed;
2088 
2089 } ihevce_sub_pic_rc_ctxt_t;
2090 
2091 /**
2092 ******************************************************************************
2093  *  @brief  Memoery manager context (stores the memory tables allcoated)
2094 ******************************************************************************
2095  */
2096 typedef struct
2097 {
2098     /**
2099     * Total number of memtabs (Modules and system)
2100     * during create time
2101     */
2102     WORD32 i4_num_create_memtabs;
2103 
2104     /**
2105     * Pointer to the mem tabs
2106     * of crate time
2107     */
2108     iv_mem_rec_t *ps_create_memtab;
2109 
2110     /**
2111     * Total number of memtabs Data and control Ques
2112     * during Ques create time
2113     */
2114     WORD32 i4_num_q_memtabs;
2115 
2116     /**
2117     * Pointer to the mem tabs
2118     * of crate time
2119     */
2120     iv_mem_rec_t *ps_q_memtab;
2121 
2122 } enc_mem_mngr_ctxt;
2123 
2124 /**
2125 ******************************************************************************
2126  *  @brief  Encoder Interafce Queues Context
2127 ******************************************************************************
2128  */
2129 typedef struct
2130 {
2131     /** Number of Queues at interface context level */
2132     WORD32 i4_num_queues;
2133 
2134     /** Array of Queues handle */
2135     void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES];
2136 
2137     /** Mutex for encuring thread safety of the access of the queues */
2138     void *pv_q_mutex_hdl;
2139 
2140 } enc_q_ctxt_t;
2141 
2142 /**
2143 ******************************************************************************
2144  *  @brief  Module context of different modules in encoder
2145 ******************************************************************************
2146  */
2147 
2148 typedef struct
2149 {
2150     /** Motion estimation context pointer */
2151     void *pv_me_ctxt;
2152     /** Coarse Motion estimation context pointer */
2153     void *pv_coarse_me_ctxt;
2154 
2155     /** Intra Prediction context pointer */
2156     void *pv_ipe_ctxt;
2157 
2158     /** Encode Loop context pointer */
2159     void *pv_enc_loop_ctxt;
2160 
2161     /** Entropy Coding context pointer */
2162     void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES];
2163 
2164     /** Look Ahead Processing context pointer */
2165     void *pv_lap_ctxt;
2166     /** Rate control context pointer */
2167     void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES];
2168     /** Decomposition pre intra context pointer */
2169     void *pv_decomp_pre_intra_ctxt;
2170 
2171 } module_ctxt_t;
2172 
2173 /**
2174 ******************************************************************************
2175  *  @brief  Threads semaphore handles
2176 ******************************************************************************
2177  */
2178 typedef struct
2179 {
2180     /** LAP semaphore handle */
2181     void *pv_lap_sem_handle;
2182 
2183     /** Encode frame Process semaphore handle */
2184     void *pv_enc_frm_proc_sem_handle;
2185 
2186     /** Pre Encode frame Process semaphore handle */
2187     void *pv_pre_enc_frm_proc_sem_handle;
2188 
2189     /** Entropy coding semaphore handle
2190         One semaphore for each entropy thread, i.e. for each bit-rate instance*/
2191     void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2192 
2193     /**
2194      *  Semaphore handle corresponding to get free inp frame buff
2195      *  function call from app if called in blocking mode
2196      */
2197     void *pv_inp_data_sem_handle;
2198 
2199     /**
2200      *  Semaphore handle corresponding to get free inp control command buff
2201      *  function call from app if called in blocking mode
2202      */
2203     void *pv_inp_ctrl_sem_handle;
2204 
2205     /**
2206      *  Semaphore handle corresponding to get filled out bitstream buff
2207      *  function call from app if called in blocking mode
2208      */
2209     void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2210 
2211     /**
2212      *  Semaphore handle corresponding to get filled out recon buff
2213      *  function call from app if called in blocking mode
2214      */
2215     void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2216 
2217     /**
2218      *  Semaphore handle corresponding to get filled out control status buff
2219      *  function call from app if called in blocking mode
2220      */
2221     void *pv_out_ctrl_sem_handle;
2222 
2223     /**
2224      *  Semaphore handle corresponding to get filled out control status buff
2225      *  function call from app if called in blocking mode
2226      */
2227     void *pv_lap_inp_data_sem_hdl;
2228 
2229     /**
2230      *  Semaphore handle corresponding to get filled out control status buff
2231      *  function call from app if called in blocking mode
2232      */
2233     void *pv_preenc_inp_data_sem_hdl;
2234 
2235     /**
2236      *  Semaphore handle corresponding to Multi Res Single output case
2237      */
2238     void *pv_ent_common_mres_sem_hdl;
2239     void *pv_out_common_mres_sem_hdl;
2240 
2241 } thrd_que_sem_hdl_t;
2242 
2243 /**
2244 ******************************************************************************
2245  *  @brief  Frame level structure which has parameters about CTBs
2246 ******************************************************************************
2247  */
2248 typedef struct
2249 {
2250     /** CTB size of all CTB in a frame in pixels
2251      *  this will be create time value,
2252      *  run time change in this value is not supported
2253      */
2254     WORD32 i4_ctb_size;
2255 
2256     /** Minimum CU size of CTB in a frame in pixels
2257      *  this will be create time value,
2258      *  run time change in this value is not supported
2259      */
2260     WORD32 i4_min_cu_size;
2261 
2262     /** Worst case num CUs in CTB based on i4_ctb_size */
2263     WORD32 i4_num_cus_in_ctb;
2264 
2265     /** Worst case num PUs in CTB based on i4_ctb_size */
2266     WORD32 i4_num_pus_in_ctb;
2267 
2268     /** Worst case num TUs in CTB based on i4_ctb_size */
2269     WORD32 i4_num_tus_in_ctb;
2270 
2271     /** Number of CTBs in horizontal direction
2272       * this is based on run time source width and i4_ctb_size
2273       */
2274     WORD32 i4_num_ctbs_horz;
2275 
2276     /** Number of CTBs in vertical direction
2277      *  this is based on run time source height and i4_ctb_size
2278      */
2279     WORD32 i4_num_ctbs_vert;
2280 
2281     /** MAX CUs in horizontal direction
2282      * this is based on run time source width, i4_ctb_size and  i4_num_cus_in_ctb
2283      */
2284     WORD32 i4_max_cus_in_row;
2285 
2286     /** MAX PUs in horizontal direction
2287      * this is based on run time source width, i4_ctb_size and  i4_num_pus_in_ctb
2288      */
2289     WORD32 i4_max_pus_in_row;
2290 
2291     /** MAX TUs in horizontal direction
2292      * this is based on run time source width, i4_ctb_size and  i4_num_tus_in_ctb
2293      */
2294     WORD32 i4_max_tus_in_row;
2295 
2296     /**
2297      * CU aligned picture width (currently aligned to MAX CU size)
2298      * should be modified to be aligned to MIN CU size
2299      */
2300 
2301     WORD32 i4_cu_aligned_pic_wd;
2302 
2303     /**
2304      * CU aligned picture height (currently aligned to MAX CU size)
2305      * should be modified to be aligned to MIN CU size
2306      */
2307 
2308     WORD32 i4_cu_aligned_pic_ht;
2309 
2310     /* Pointer to a frame level memory,
2311     Stride is = 1 + (num ctbs in a ctb-row) + 1
2312     Hieght is = 1 + (num ctbs in a ctb-col)
2313     Contains tile-id of each ctb */
2314     WORD32 *pi4_tile_id_map;
2315 
2316     /* stride in units of ctb */
2317     WORD32 i4_tile_id_ctb_map_stride;
2318 
2319 } frm_ctb_ctxt_t;
2320 
2321 /**
2322 ******************************************************************************
2323  *  @brief  ME Job Queue desc
2324 ******************************************************************************
2325  */
2326 typedef struct
2327 {
2328     /** Number of output dependencies which need to be set after
2329      *  current job is complete,
2330      *  should be less than or equal to MAX_OUT_DEP defined in
2331      *  ihevce_multi_thrd_structs.h
2332      */
2333     WORD32 i4_num_output_dep;
2334 
2335     /** Array of offsets from the start of output dependent layer's Job Ques
2336      *  which are dependent on current Job to be complete
2337      */
2338     WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP];
2339 
2340     /** Number of input dependencies to be resolved for current job to start
2341      *  these many jobs in lower layer should be complete to
2342      *  start the current JOB
2343      */
2344     WORD32 i4_num_inp_dep;
2345 
2346 } multi_thrd_me_job_q_prms_t;
2347 
2348 /**
2349  *  @brief  structure in which recon data
2350  *          and related parameters are sent from Encoder
2351  */
2352 typedef struct
2353 {
2354     /** Kept for maintaining backwards compatibility in future */
2355     WORD32 i4_size;
2356 
2357     /** Buffer id for the current buffer */
2358     WORD32 i4_buf_id;
2359 
2360     /** POC of the current buffer */
2361     WORD32 i4_poc;
2362 
2363     /** End flag to communicate this is last frame output from encoder */
2364     WORD32 i4_end_flag;
2365 
2366     /** End flag to communicate encoder that this is the last buffer from application
2367         1 - Last buf, 0 - Not last buffer. No other values are supported.
2368         Application has to set the appropriate value before queing in encoder queue */
2369 
2370     WORD32 i4_is_last_buf;
2371 
2372     /** Recon luma buffer pointer */
2373     void *pv_y_buf;
2374 
2375     /** Recon cb buffer pointer */
2376     void *pv_cb_buf;
2377 
2378     /** Recon cr buffer pointer */
2379     void *pv_cr_buf;
2380 
2381     /** Luma size **/
2382     WORD32 i4_y_pixels;
2383 
2384     /** Chroma size **/
2385     WORD32 i4_uv_pixels;
2386 
2387 } iv_enc_recon_data_buffs_t;
2388 
2389 /**
2390 ******************************************************************************
2391  *  @brief  Multi Thread context structure
2392 ******************************************************************************
2393  */
2394 typedef struct
2395 {
2396     /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/
2397     WORD32 i4_force_end_flag;
2398 
2399     /** Force all active threads flag
2400       * This flag will be set to 1 if all Number of cores givento the encoder
2401       * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode
2402       * All pre enc threads and enc threads will run of the same cores with
2403       * time sharing ar frame level
2404       */
2405     WORD32 i4_all_thrds_active_flag;
2406 
2407     /** Flag to indicate that core manager has been configured to enable
2408      * sequential execution
2409      */
2410     WORD32 i4_seq_mode_enabled_flag;
2411     /*-----------------------------------------------------------------------*/
2412     /*--------- Params related to encode group  -----------------------------*/
2413     /*-----------------------------------------------------------------------*/
2414 
2415     /** Number of processing threads created runtime in encode group */
2416     WORD32 i4_num_enc_proc_thrds;
2417 
2418     /** Number of processing threads active for a given frame
2419      * This value will be monitored at frame level, so as to
2420      * have provsion for increasing / decreasing threads
2421      * based on Load balance b/w stage in encoder
2422      */
2423     WORD32 i4_num_active_enc_thrds;
2424 
2425     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
2426     void *pv_job_q_mutex_hdl_enc_grp_me;
2427 
2428     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
2429     void *pv_job_q_mutex_hdl_enc_grp_enc_loop;
2430 
2431     /** Array of Semaphore handles (for each frame processing threads ) */
2432     void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
2433 
2434     /** Array for ME to export the Job que dependency for all layers */
2435     multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM];
2436 
2437     /* pointer to the mutex handle*/
2438     void *apv_mutex_handle[MAX_NUM_ME_PARALLEL];
2439 
2440     /* pointer to the mutex handle for frame init*/
2441     void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL];
2442 
2443     /* pointer to the mutex handle for frame init*/
2444     void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
2445 
2446     /*pointer to the mutex handle*/
2447     void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL];
2448 
2449     /* Flag to indicate that master has done ME init*/
2450     WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL];
2451 
2452     /* Counter to keep track of me num of thrds exiting critical section*/
2453     WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL];
2454 
2455     /* Flag to indicate that master has done the frame init*/
2456     WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
2457 
2458     /* Counter to keep track of num of thrds exiting critical section*/
2459     WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL];
2460 
2461     /* Counter to keep track of num of thrds exiting critical section for re-encode*/
2462     WORD32 num_thrds_exited_for_reenc;
2463 
2464     /* Array to store the curr qp for ping and pong instance*/
2465     WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2466 
2467     /* Pointers to store output buffers for ping and pong instance*/
2468     frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2469 
2470     /* Pointer to store input buffers for me*/
2471     pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL];
2472 
2473     /*pointers to store output buffers from me */
2474     me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS];
2475 
2476     /*pointers to store input buffers to enc-rdopt */
2477     me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS];
2478 
2479     /*Shared memory for Sub Pic rc */
2480     /*Qscale calulated by sub pic rc bit control for Intra Pic*/
2481     WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2482 
2483     /*Header bits error by sub pic rc bit control*/
2484     float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2485 
2486     /*Accumalated ME SAD for NCTB*/
2487     LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2488                            [MAX_NUM_FRM_PROC_THRDS_ENC];
2489 
2490     /*Accumalated IPE SAD for NCTB*/
2491     LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2492                             [MAX_NUM_FRM_PROC_THRDS_ENC];
2493 
2494     /*Accumalated L0 IPE SAD for NCTB*/
2495     LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2496                                [MAX_NUM_FRM_PROC_THRDS_ENC];
2497 
2498     /*Accumalated Activity Factor for NCTB*/
2499     LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2500                                [MAX_NUM_FRM_PROC_THRDS_ENC];
2501 
2502     /*Accumalated Ctb counter across all threads*/
2503     WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2504 
2505     /*Bits threshold reached for across all threads*/
2506     WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2507 
2508     /*To hold the Previous In-frame RC chunk QP*/
2509     WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2510 
2511     /*Accumalated Ctb counter across all threads*/
2512     WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2513 
2514     /*Flag to check if thread is initialized */
2515     WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2516                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
2517 
2518     /*Accumalated Ctb counter across all threads*/
2519     //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC];
2520 
2521     /*Accumalated bits consumed for nctbs across all threads*/
2522     LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2523                                   [MAX_NUM_FRM_PROC_THRDS_ENC];
2524 
2525     /*Accumalated hdr bits consumed for nctbs across all threads*/
2526     LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2527                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
2528 
2529     /*Accumalated l0 mpm bits consumed for nctbs across all threads*/
2530     LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2531                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
2532 
2533     /*Accumalated bits consumed for total ctbs across all threads*/
2534     LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2535                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
2536 
2537     /*Accumalated bits consumed for total ctbs across all threads*/
2538     LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2539                                         [MAX_NUM_FRM_PROC_THRDS_ENC];
2540 
2541     /*Qscale calulated by sub pic rc bit control */
2542     WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2543     /* End of Sub pic rc variables */
2544 
2545     /* Pointers to store input (only L0 IPE)*/
2546     pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL];
2547 
2548     /* Array tp store L0 IPE input buf ids*/
2549     WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL];
2550 
2551     /* Array to store output buffer ids for ping and pong instances*/
2552     WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2553 
2554     /* Array of pointers to store the recon buf pointers*/
2555     iv_enc_recon_data_buffs_t *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2556 
2557     /* Array of pointers to frame recon for ping and pong instances*/
2558     recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];
2559 
2560     /* Array of recon buffer ids for ping and pong instance*/
2561     WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];
2562 
2563     /* Counter to keep track of num thrds done*/
2564     WORD32 num_thrds_done;
2565 
2566     /* Flags to keep track of dumped ping pong recon buffer*/
2567     WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2568 
2569     /* Flags to keep track of dumped ping pong output buffer*/
2570     WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2571 
2572     /* flag to produce output buffer by the thread who ever is finishing
2573     enc-loop processing first, so that the entropy thread can start processing */
2574     WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2575 
2576     /* Flags to keep track of dumped ping pong input buffer*/
2577     WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
2578 
2579     /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/
2580     WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
2581 
2582     /** Dependency manager for checking whether prev. EncLoop done before
2583         current frame EncLoop starts */
2584     void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL];
2585 
2586     /** Dependency manager for checking whether prev. EncLoop done before
2587         re-encode of the current frame */
2588     void *pv_dep_mngr_prev_frame_enc_done_for_reenc;
2589 
2590     /** Dependency manager for checking whether prev. me done before
2591         current frame me starts */
2592     void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL];
2593 
2594     /** ME coarsest layer JOB queue type */
2595     WORD32 i4_me_coarsest_lyr_type;
2596 
2597     /** number of encloop frames running in parallel */
2598     WORD32 i4_num_enc_loop_frm_pllel;
2599 
2600     /** number of me frames running in parallel */
2601     WORD32 i4_num_me_frm_pllel;
2602 
2603     /*-----------------------------------------------------------------------*/
2604     /*--------- Params related to pre-enc stage -----------------------------*/
2605     /*-----------------------------------------------------------------------*/
2606 
2607     /** Number of processing threads created runtime in pre encode group */
2608     WORD32 i4_num_pre_enc_proc_thrds;
2609 
2610     /** Number of processing threads active for a given frame
2611      * This value will be monitored at frame level, so as to
2612      * have provsion for increasing / decreasing threads
2613      * based on Load balance b/w stage in encoder
2614      */
2615     WORD32 i4_num_active_pre_enc_thrds;
2616     /** number of threads that have done processing the current frame
2617         Use to find out the last thread that is coming out of pre-enc processing
2618         so that the last thread can do de-init of pre-enc stage */
2619     WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2620 
2621     /** number of threads that have done processing the current frame
2622         Use to find out the first thread and last inoder to get qp query. As the query
2623         is not read only , the quer should be done only once by thread that comes first
2624         and other threads should get same value*/
2625     WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2626 
2627     /** number of threads that have done proessing decomp_intra
2628         Used to find out the last thread that is coming out so that
2629         the last thread can set flag for decomp_pre_intra_finish */
2630     WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2631 
2632     /** number of threads that have done proessing coarse_me
2633         Used to find out the last thread that is coming out so that
2634         the last thread can set flag for coarse_me_finish */
2635     WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2636 
2637     /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done.
2638       Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */
2639     WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2640 
2641     /** Dependency manager for checking whether prev. frame decomp_intra
2642         done before current frame  decomp_intra starts */
2643     void *pv_dep_mngr_prev_frame_pre_enc_l1;
2644 
2645     /** Dependency manager for checking whether prev. frame L0 IPE done before
2646         current frame L0 IPE starts */
2647     void *pv_dep_mngr_prev_frame_pre_enc_l0;
2648 
2649     /** Dependency manager for checking whether prev. frame coarse_me done before
2650         current frame coarse_me starts */
2651     void *pv_dep_mngr_prev_frame_pre_enc_coarse_me;
2652 
2653     /** flag to indicate if pre_enc_init is done for current frame */
2654     WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2655 
2656     /** flag to indicate if pre_enc_hme_init is done for current frame */
2657     WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2658 
2659     /** flag to indicate if pre_enc_deinit is done for current frame */
2660     WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2661 
2662     /** Flag to indicate the end of processing when all the frames are done processing */
2663     WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2664 
2665     /** Flag to indicate the control blocking mode indicating input command to pre-enc
2666     group should be blocking or unblocking */
2667     WORD32 i4_ctrl_blocking_mode;
2668 
2669     /** Current input pointer */
2670     ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2671 
2672     WORD32 i4_last_inp_buf;
2673 
2674     /* buffer id for input buffer */
2675     WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2676 
2677     /** Current output pointer */
2678     pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2679 
2680     /*Current L0 IPE to enc output pointer */
2681     pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc;
2682 
2683     /** buffer id for output buffer */
2684     WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2685 
2686     /** buffer id for L0 IPE enc buffer*/
2687     WORD32 i4_L0_IPE_out_buf_id;
2688 
2689     /** Current picture Qp */
2690     WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2691 
2692     /** Decomp layer buffers indicies */
2693     WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2694 
2695     /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done,
2696       an array of MAX_PRE_ENC_STAGGER might not be required*/
2697     WORD32 i4_qp_update_l0_ipe;
2698 
2699     /** Current picture encoded is the last picture to be encoded flag */
2700     WORD32 i4_last_pic_flag;
2701 
2702     /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */
2703     void *pv_job_q_mutex_hdl_pre_enc_decomp;
2704 
2705     /** Mutex for ensuring thread safety of the access of Job queues in HME group */
2706     void *pv_job_q_mutex_hdl_pre_enc_hme;
2707 
2708     /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */
2709     void *pv_job_q_mutex_hdl_pre_enc_l0ipe;
2710 
2711     /** mutex handle for pre-enc init */
2712     void *pv_mutex_hdl_pre_enc_init;
2713 
2714     /** mutex handle for pre-enc decomp deinit */
2715     void *pv_mutex_hdl_pre_enc_decomp_deinit;
2716 
2717     /** mutex handle for pre enc hme init */
2718     void *pv_mutex_hdl_pre_enc_hme_init;
2719 
2720     /** mutex handle for pre-enc hme deinit */
2721     void *pv_mutex_hdl_pre_enc_hme_deinit;
2722 
2723     /*qp qurey before l0 ipe is done by multiple frame*/
2724     /** mutex handle for L0 ipe(pre-enc init)*/
2725     void *pv_mutex_hdl_l0_ipe_init;
2726 
2727     /** mutex handle for pre-enc deinit */
2728     void *pv_mutex_hdl_pre_enc_deinit;
2729 
2730     /** Array of Semaphore handles (for each frame processing threads ) */
2731     void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
2732     /** array which will tell the number of CTB processed in each row,
2733     *   used for Row level sync in IPE pass
2734     */
2735     WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM];
2736 
2737     /**  Job Queue Memory pre encode */
2738     job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2739 
2740     /** Array of Job Queue handles enc group */
2741     job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2742                                              [NUM_PRE_ENC_JOBS_QUES];
2743 
2744     /* accumulate intra sad across all thread to get qp before L0 IPE*/
2745     WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2746                              [MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
2747 
2748     WORD32 i4_delay_pre_me_btw_l0_ipe;
2749 
2750     /*** This variable has the maximum delay between hme and l0ipe ***/
2751     /*** This is used for wrapping around L0IPE index ***/
2752     WORD32 i4_max_delay_pre_me_btw_l0_ipe;
2753 
2754     /* This is to register the handles of Dep Mngr b/w EncLoop and ME */
2755     /* This is used to delete the Mngr at the end                          */
2756     void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS];
2757     /*flag to track buffer in me/enc que is produced or not*/
2758     WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS];
2759 
2760     /*out buf que id for me */
2761     WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS];
2762 
2763     /*in buf que id for enc from me*/
2764     WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS];
2765 
2766     /* This is used to tell whether the free of recon buffers are done or not */
2767     WORD32 i4_is_recon_free_done;
2768 
2769     /* index for DVSR population */
2770     WORD32 i4_idx_dvsr_p;
2771     WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2772                                  [(HEVCE_MAX_HEIGHT >> 1) / 8];
2773 
2774     WORD32 i4_rc_l0_qp;
2775 
2776     /* Used for mres single out cases. Checks whether a particular resolution is active or passive */
2777     /* Only one resolution should be active for mres_single_out case */
2778     WORD32 *pi4_active_res_id;
2779 
2780     /**
2781      * Sub Pic bit control mutex lock handle
2782      */
2783     void *pv_sub_pic_rc_mutex_lock_hdl;
2784 
2785     void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl;
2786 
2787     WORD32 i4_encode;
2788     WORD32 i4_in_frame_rc_enabled;
2789     WORD32 i4_num_re_enc;
2790 
2791 } multi_thrd_ctxt_t;
2792 
2793 /**
2794  *  @brief    Structure to describe tile params
2795  */
2796 typedef struct
2797 {
2798     /* flag to indicate tile encoding enabled/disabled */
2799     WORD32 i4_tiles_enabled_flag;
2800 
2801     /* flag to indicate unifrom spacing of tiles */
2802     WORD32 i4_uniform_spacing_flag;
2803 
2804     /* num tiles in a tile-row. num tiles in tile-col */
2805     WORD32 i4_num_tile_cols;
2806     WORD32 i4_num_tile_rows;
2807 
2808     /* Curr tile width and height*/
2809     WORD32 i4_curr_tile_width;
2810     WORD32 i4_curr_tile_height;
2811 
2812     /* Curr tile width and heignt in CTB units*/
2813     WORD32 i4_curr_tile_wd_in_ctb_unit;
2814     WORD32 i4_curr_tile_ht_in_ctb_unit;
2815 
2816     /* frame resolution */
2817     //WORD32  i4_frame_width;  /* encode-width  */
2818     //WORD32  i4_frame_height; /* encode-height */
2819 
2820     /* total num of tiles "in frame" */
2821     WORD32 i4_num_tiles;
2822 
2823     /* Curr tile id. Assigned by raster scan order in a frame */
2824     WORD32 i4_curr_tile_id;
2825 
2826     /* x-pos of first ctb of the slice in ctb */
2827     /* y-pos of first ctb of the slice in ctb */
2828     WORD32 i4_first_ctb_x;
2829     WORD32 i4_first_ctb_y;
2830 
2831     /* x-pos of first ctb of the slice in samples */
2832     /* y-pos of first ctb of the slice in samples */
2833     WORD32 i4_first_sample_x;
2834     WORD32 i4_first_sample_y;
2835 
2836 } ihevce_tile_params_t;
2837 
2838 /**
2839 ******************************************************************************
2840  *  @brief  Encoder context structure
2841 ******************************************************************************
2842  */
2843 
2844 typedef struct
2845 {
2846     /**
2847      *  vps parameters
2848      */
2849     vps_t as_vps[IHEVCE_MAX_NUM_BITRATES];
2850 
2851     /**
2852      *  sps parameters
2853      */
2854     sps_t as_sps[IHEVCE_MAX_NUM_BITRATES];
2855 
2856     /**
2857      *  pps parameters
2858      *  Required for each bitrate separately, mainly because
2859      *  init qp etc parameters needs to be different for each instance
2860      */
2861     pps_t as_pps[IHEVCE_MAX_NUM_BITRATES];
2862 
2863     /**
2864      * Rate control mutex lock handle
2865      */
2866     void *pv_rc_mutex_lock_hdl;
2867 
2868     /** frame level cu analyse  buffer pointer for ME
2869      * ME will get ps_ctb_analyse structure populated with ps_cu pointers
2870      * pointing to ps_cu_analyse buffer from IPE.
2871       */
2872     //cu_analyse_t       *ps_cu_analyse_inter[PING_PONG_BUF];
2873 
2874     /**
2875       *  CTB frame context between encoder (producer) and entropy (consumer)
2876       */
2877     enc_q_ctxt_t s_enc_ques;
2878 
2879     /**
2880      *  Encoder memory manager ctxt
2881      */
2882     enc_mem_mngr_ctxt s_mem_mngr;
2883 
2884     /**
2885      * Semaphores of all the threads created in HLE
2886      * and Que handle for buffers b/w frame process and entropy
2887      */
2888     thrd_que_sem_hdl_t s_thrd_sem_ctxt;
2889 
2890     /**
2891      *  Reference /recon buffer Que pointer
2892      */
2893     recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES];
2894 
2895     /**
2896      * Number of buffers in Recon buffer queue
2897      */
2898     WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES];
2899 
2900     /**
2901      * Reference / recon buffer Que pointer for Pre Encode group
2902      * this will be just a container and no buffers will be allcoated
2903      */
2904     recon_pic_buf_t **pps_pre_enc_recon_buf_q;
2905 
2906     /**
2907      * Number of buffers in Recon buffer queue
2908      */
2909     WORD32 i4_pre_enc_num_buf_recon_q;
2910 
2911     /**
2912       * frame level CTB parameters and worst PU CU and TU in a CTB row
2913       */
2914     frm_ctb_ctxt_t s_frm_ctb_prms;
2915 
2916     /*
2917      * Moudle ctxt pointers of all modules
2918      */
2919     module_ctxt_t s_module_ctxt;
2920 
2921     /*
2922      * LAP static parameters
2923      */
2924     ihevce_lap_static_params_t s_lap_stat_prms;
2925 
2926     /*
2927      * Run time dynamic source params
2928      */
2929 
2930     ihevce_src_params_t s_runtime_src_prms;
2931 
2932     /*
2933      *Target params
2934      */
2935     ihevce_tgt_params_t s_runtime_tgt_params;
2936 
2937     /*
2938      *  Run time dynamic coding params
2939      */
2940     ihevce_coding_params_t s_runtime_coding_prms;
2941 
2942     /**
2943      * Pointer to static config params
2944      */
2945     ihevce_static_cfg_params_t *ps_stat_prms;
2946 
2947     /**
2948      * the following structure members used for copying recon buf info
2949      * in case of duplicate pics
2950      */
2951 
2952     /**
2953      * Array of reference picture list for pre enc group
2954      * Separate list for ping_pong instnaces
2955      * 2=> ref_pic_list0 and ref_pic_list1
2956      */
2957     recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
2958                                         [HEVCE_MAX_REF_PICS * 2];
2959 
2960     /**
2961      * Array of reference picture list for pre enc group
2962      * Separate list for ping_pong instnaces
2963      * 2=> ref_pic_list0 and ref_pic_list1
2964      */
2965     recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
2966                                           [HEVCE_MAX_REF_PICS * 2];
2967 
2968     /**
2969      *  Number of input frames per input queue
2970      */
2971     WORD32 i4_num_input_buf_per_queue;
2972 
2973     /**
2974      *  poc of the Clean Random Access(CRA)Ipic
2975      */
2976     WORD32 i4_cra_poc;
2977 
2978     /** Number of ref pics in list 0 for any given frame */
2979     WORD32 i4_num_ref_l0;
2980 
2981     /** Number of ref pics in list 1 for any given frame */
2982     WORD32 i4_num_ref_l1;
2983 
2984     /** Number of active ref pics in list 0 for cur frame */
2985     WORD32 i4_num_ref_l0_active;
2986 
2987     /** Number of active ref pics in list 1 for cur frame */
2988     WORD32 i4_num_ref_l1_active;
2989 
2990     /** Number of ref pics in list 0 for any given frame pre encode stage */
2991     WORD32 i4_pre_enc_num_ref_l0;
2992 
2993     /** Number of ref pics in list 1 for any given frame  pre encode stage */
2994     WORD32 i4_pre_enc_num_ref_l1;
2995 
2996     /** Number of active ref pics in list 0 for cur frame  pre encode stage */
2997     WORD32 i4_pre_enc_num_ref_l0_active;
2998 
2999     /** Number of active ref pics in list 1 for cur frame  pre encode stage */
3000     WORD32 i4_pre_enc_num_ref_l1_active;
3001 
3002     /**
3003      *  working mem to be used for frm level activities
3004      * One example is interplation at frame level. This requires memory
3005      * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes.
3006      * This is so since we generate interp output for max_width + 16 x
3007      * max_height + 16, and then the intermediate output is 16 bit and
3008      * is max_height + 16 + 7 rows
3009      */
3010     UWORD8 *pu1_frm_lvl_wkg_mem;
3011 
3012     /**
3013      * Multi thread processing context
3014      * This memory contains the variables and pointers shared across threads
3015      * in enc-group and pre-enc-group
3016      */
3017     multi_thrd_ctxt_t s_multi_thrd;
3018 
3019     /** I/O Queues created status */
3020     WORD32 i4_io_queues_created;
3021 
3022     WORD32 i4_end_flag;
3023 
3024     /** number of bit-rate instances running */
3025     WORD32 i4_num_bitrates;
3026 
3027     /** number of enc frames running in parallel */
3028     WORD32 i4_num_enc_loop_frm_pllel;
3029 
3030     /*ref bitrate id*/
3031     WORD32 i4_ref_mbr_id;
3032 
3033     /* Flag to indicate app, that end of processing has reached */
3034     WORD32 i4_frame_limit_reached;
3035 
3036     /*Structure to store the function selector
3037      * pointers for common and encoder */
3038     func_selector_t s_func_selector;
3039 
3040     /*ref resolution id*/
3041     WORD32 i4_resolution_id;
3042 
3043     /*hle context*/
3044     void *pv_hle_ctxt;
3045 
3046     rc_quant_t s_rc_quant;
3047     /*ME cost of P pic stored for the next ref B pic*/
3048     //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2];
3049 
3050     UWORD32 u4_cur_pic_encode_cnt;
3051     UWORD32 u4_cur_pic_encode_cnt_dbp;
3052     /*past 2 p pics high complexity status*/
3053     WORD32 ai4_is_past_pic_complex[2];
3054 
3055     WORD32 i4_is_I_reset_done;
3056     WORD32 i4_past_RC_reset_count;
3057 
3058     WORD32 i4_future_RC_reset;
3059 
3060     WORD32 i4_past_RC_scd_reset_count;
3061 
3062     WORD32 i4_future_RC_scd_reset;
3063     WORD32 i4_poc_reset_values;
3064 
3065     /*Place holder to store the length of LAP in first pass*/
3066     /** Number of frames to look-ahead for RC by -
3067      * counts 2 fields as one frame for interlaced
3068      */
3069     WORD32 i4_look_ahead_frames_in_first_pass;
3070 
3071     WORD32 ai4_mod_factor_derived_by_variance[2];
3072     float f_strength;
3073 
3074     /*for B frames use the avg activity
3075     from the layer 0 (I or P) which is the average over
3076     Lap2 window*/
3077     LWORD64 ai8_lap2_8x8_avg_act_from_T0[2];
3078 
3079     LWORD64 ai8_lap2_16x16_avg_act_from_T0[3];
3080 
3081     LWORD64 ai8_lap2_32x32_avg_act_from_T0[3];
3082 
3083     /*for B frames use the log of avg activity
3084     from the layer 0 (I or P) which is the average over
3085     Lap2 window*/
3086     long double ald_lap2_8x8_log_avg_act_from_T0[2];
3087 
3088     long double ald_lap2_16x16_log_avg_act_from_T0[3];
3089 
3090     long double ald_lap2_32x32_log_avg_act_from_T0[3];
3091 
3092     ihevce_tile_params_t *ps_tile_params_base;
3093 
3094     WORD32 ai4_column_width_array[MAX_TILE_COLUMNS];
3095 
3096     WORD32 ai4_row_height_array[MAX_TILE_ROWS];
3097 
3098     /* Architecture */
3099     IV_ARCH_T e_arch_type;
3100 
3101     UWORD8 u1_is_popcnt_available;
3102 
3103     WORD32 i4_active_scene_num;
3104 
3105     WORD32 i4_max_fr_enc_loop_parallel_rc;
3106     WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES];
3107     WORD32 i4_active_enc_frame_id;
3108 
3109     /**
3110     * LAP interface ctxt pointer
3111     */
3112     void *pv_lap_interface_ctxt;
3113 
3114     /* If enable, enables blu ray compatibility of op*/
3115     WORD32 i4_blu_ray_spec;
3116 
3117 } enc_ctxt_t;
3118 
3119 /**
3120 ******************************************************************************
3121 *  @brief  This struct contains the inter CTB params needed for the decision
3122 *   of the best inter CU results
3123 ******************************************************************************
3124 */
3125 typedef struct
3126 {
3127     hme_pred_buf_mngr_t s_pred_buf_mngr;
3128 
3129     /** X and y offset of ctb w.r.t. start of pic */
3130     WORD32 i4_ctb_x_off;
3131     WORD32 i4_ctb_y_off;
3132 
3133     /**
3134      * Pred buffer ptr, updated inside subpel refinement process. This
3135      * location passed to the leaf fxn for copying the winner pred buf
3136      */
3137     UWORD8 **ppu1_pred;
3138 
3139     /** Working mem passed to leaf fxns */
3140     UWORD8 *pu1_wkg_mem;
3141 
3142     /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
3143     WORD32 i4_pred_stride;
3144 
3145     /** Stride of input buf, updated inside subpel fxn */
3146     WORD32 i4_inp_stride;
3147 
3148     /** stride of recon buffer */
3149     WORD32 i4_rec_stride;
3150 
3151     /** Indicates if bi dir is enabled or not */
3152     WORD32 i4_bidir_enabled;
3153 
3154     /**
3155      * Total number of references of current picture which is enocded
3156      */
3157     UWORD8 u1_num_ref;
3158 
3159     /** Recon Pic buffer pointers for L0 list */
3160     recon_pic_buf_t **pps_rec_list_l0;
3161 
3162     /** Recon Pic buffer pointers for L1 list */
3163     recon_pic_buf_t **pps_rec_list_l1;
3164 
3165     /**
3166      * These pointers point to modified input, one each for one ref idx.
3167      * Instead of weighting the reference, we weight the input with inverse
3168      * wt and offset for list 0 and list 1.
3169      */
3170     UWORD8 *apu1_wt_inp[2][MAX_NUM_REF];
3171 
3172     /* Since ME uses weighted inputs, we use reciprocal of the actual weights */
3173     /* that are signaled in the bitstream */
3174     WORD32 *pi4_inv_wt;
3175     WORD32 *pi4_inv_wt_shift_val;
3176 
3177     /* Map between L0 Reference indices and LC indices */
3178     WORD8 *pi1_past_list;
3179 
3180     /* Map between L1 Reference indices and LC indices */
3181     WORD8 *pi1_future_list;
3182 
3183     /**
3184      * Points to the non-weighted input data for the current CTB
3185      */
3186     UWORD8 *pu1_non_wt_inp;
3187 
3188     /**
3189      * Store the pred lambda and lamda_qshifts for all the reference indices
3190      */
3191     WORD32 i4_lamda;
3192 
3193     UWORD8 u1_lamda_qshift;
3194 
3195     WORD32 wpred_log_wdc;
3196 
3197     /**
3198      * Number of active references in l0
3199      */
3200     UWORD8 u1_num_active_ref_l0;
3201 
3202     /**
3203      * Number of active references in l1
3204      */
3205     UWORD8 u1_num_active_ref_l1;
3206 
3207     /** The max_depth for inter tu_tree */
3208     UWORD8 u1_max_tr_depth;
3209 
3210     /** Quality Preset */
3211     WORD8 i1_quality_preset;
3212 
3213     /** SATD or SAD */
3214     UWORD8 u1_use_satd;
3215 
3216     /* Frame level QP */
3217     WORD32 i4_qstep_ls8;
3218 
3219     /* Pointer to an array of PU level src variances */
3220     UWORD32 *pu4_src_variance;
3221 
3222     WORD32 i4_alpha_stim_multiplier;
3223 
3224     UWORD8 u1_is_cu_noisy;
3225 
3226     ULWORD64 *pu8_part_src_sigmaX;
3227 
3228     ULWORD64 *pu8_part_src_sigmaXSquared;
3229 
3230     UWORD8 u1_max_2nx2n_tu_recur_cands;
3231 
3232 } inter_ctb_prms_t;
3233 
3234 /*****************************************************************************/
3235 /* Extern Variable Declarations                                              */
3236 /*****************************************************************************/
3237 extern const double lamda_modifier_for_I_pic[8];
3238 
3239 /*****************************************************************************/
3240 /* Extern Function Declarations                                              */
3241 /*****************************************************************************/
3242 
3243 #endif /* _IHEVCE_ENC_STRUCTS_H_ */
3244