1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_enc_structs.h
23 *
24 * \brief
25 *    This file contains structure definations of Encoder
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _IHEVCE_ENC_STRUCTS_H_
37 #define _IHEVCE_ENC_STRUCTS_H_
38 
39 /*****************************************************************************/
40 /* Constant Macros                                                           */
41 /*****************************************************************************/
42 #define HEVCE_MAX_WIDTH 1920
43 #define HEVCE_MAX_HEIGHT 1088
44 
45 #define HEVCE_MIN_WIDTH 64
46 #define HEVCE_MIN_HEIGHT 64
47 
48 #define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE)
49 #define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE)
50 
51 #define MIN_VERT_PROC_UNIT (8)
52 #define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT)
53 
54 #define HEVCE_MAX_REF_PICS 8
55 #define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1)
56 
57 #define PAD_HORZ 80
58 #define PAD_VERT 80
59 
60 #define DEFAULT_MAX_REFERENCE_PICS 4
61 
62 #define BLU_RAY_SUPPORT 231457
63 
64 /** @brief max number of parts in minCU : max 4 for NxN */
65 #define NUM_PU_PARTS 4
66 /** @brief max number of parts in Inter CU */
67 #define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS)
68 #define SEND_BI_RDOPT
69 #ifdef SEND_BI_RDOPT
70 /** @brief */
71 #define MAX_INTER_CU_CANDIDATES 4
72 #else
73 /** @brief */
74 #define MAX_INTER_CU_CANDIDATES 3
75 #endif
76 /** @brief */
77 #define MAX_INTRA_CU_CANDIDATES 3
78 
79 #define MAX_INTRA_CANDIDATES 35
80 
81 /** For each resolution & bit-rate instance, one entropy thread is created */
82 #define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES)
83 
84 /* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */
85 #define NUM_BUFS_DECOMP_HME 1
86 
87 /** Macro to indicate pre me and L0 ipe stagger in pre enc*/
88 /** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/
89 #define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ)
90 
91 #define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL)
92 
93 #define MIN_L0_IPE_ENC_STAGGER 1
94 
95 /*stagger between L0 IPE and enc*/
96 #define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER))
97 
98 #define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
99 
100 #define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
101 
102 /** @brief number of contexts buffers maintained at frame level b/w pre-encode : encode */
103 /*Explaination for minus 1: eg: MAX_PRE_ENC_STAGGER = 31 and MAX_L0_IPE_ENC_STAGGER = 5, In this case L1 produce 30 buffer,
104   l0 will start off with 30th buffer and enc will work on 33nd and 34rd frame.*/
105 /* NUM_BUFS_DECOMP_HME is added to take care of pipeline between Decomp-preintra and HME */
106 #define MAX_NUM_PREENC_ENC_BUFS                                                                    \
107     (MAX_PRE_ENC_STAGGER + MAX_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1)  //22//5
108 
109 #define MIN_NUM_PREENC_ENC_BUFS                                                                    \
110     (MAX_PRE_ENC_STAGGER + MIN_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1)
111 
112 /** @brief number of ctb contexts maintained at frame level b/w encode : entropy */
113 #define NUM_FRMPROC_ENTCOD_BUFS 8
114 
115 /** @brief number of extra recon buffs required for stagger design*/
116 #define NUM_EXTRA_RECON_BUFS 0
117 
118 /** recon picture buffer size need to be increased to support EncLoop Parallelism **/
119 #define NUM_EXTRA_RECON_BUFS_FOR_ELP 0
120 
121 /** @brief maximum number of bytes in 4x4 afetr scanning */
122 #define MAX_SCAN_COEFFS_BYTES_4x4 (48)
123 
124 /** @brief maximum number of luma coeffs bytes after scan at CTB level  */
125 #define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4)
126 
127 /** @brief maximum number of chroma coeffs bytes after scan at CTB level  */
128 #define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4)
129 
130 /** @brief maximum number of coeffs bytes after scan at CTB level  */
131 #define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB))
132 
133 /** @breif PU map CTB buffer buyes for neighbour availibility */
134 #define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW)
135 
136 /** @brief tottal system memory records */
137 #define TOTAL_SYSTEM_MEM_RECS 120
138 
139 /** @brief number of input async command buffers */
140 #define NUM_AYSNC_CMD_BUFS 4
141 
142 /** @brief Comand buffers size */
143 #define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */
144 
145 /** @brief Number of output buffers */
146 #define NUM_OUTPUT_BUFS 4
147 
148 /** @brief Lamda for SATD cost estimation */
149 #define LAMDA_SATD 1
150 
151 /** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */
152 #define MAX_GT_ONE 8
153 
154 /** MAX num ipntra pred modes */
155 #define MAX_NUM_IP_MODES 35
156 
157 /** Number of best intra modes used for intra mode refinement */
158 #define NUM_BEST_MODES 3
159 
160 /** Maximim number of parallel frame processing threads in pre enocde group */
161 #define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES
162 
163 /** Maximim number of parallel frame processing threads in encode group */
164 #define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES
165 
166 /** Macro to indicate teh PING_PONG buffers for stagger*/
167 #define PING_PONG_BUF 2
168 
169 /** Max number of layers in Motion estimation
170  * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h
171  */
172 
173 #define MAX_NUM_HME_LAYERS 5
174 /**
175 ******************************************************************************
176  *  @brief      Maximum number of layers allowed
177 ******************************************************************************
178  */
179 #define MAX_NUM_LAYERS 4
180 
181 #define NUM_RC_PIC_TYPE 9
182 
183 #define MAX_NUM_NODES_CU_TREE (85)
184 
185 /* macros to control Dynamic load balance */
186 #define DYN_LOAD_BAL_UPPER_LIMIT 0.80
187 
188 #define DYN_LOAD_BAL_LOWER_LIMIT 0.20
189 
190 #define NUM_SUB_GOP_DYN_BAL 1
191 
192 #define MIN_NUM_FRMS_DYN_BAL 4
193 
194 #define CORES_SRES_OR_MRES 2
195 
196 #define HME_HIGH_SAD_BLK_THRESH 35
197 
198 /* Enable to compare cabac states of final entropy thread with enc loop states */
199 #define VERIFY_ENCLOOP_CABAC_STATES 0
200 
201 #define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */
202 
203 /*****************************************************************************/
204 /* Function Macros                                                           */
205 /*****************************************************************************/
206 
207 /*****************************************************************************/
208 /* Typedefs                                                                  */
209 /*****************************************************************************/
210 typedef void (*pf_iq_it_rec)(
211     WORD16 *pi2_src,
212     WORD16 *pi2_tmp,
213     UWORD8 *pu1_pred,
214     WORD16 *pi2_dequant_coeff,
215     UWORD8 *pu1_dst,
216     WORD32 qp_div, /* qpscaled / 6 */
217     WORD32 qp_rem, /* qpscaled % 6 */
218     WORD32 src_strd,
219     WORD32 pred_strd,
220     WORD32 dst_strd,
221     WORD32 zero_cols,
222     WORD32 zero_rows);
223 
224 typedef void (*pf_intra_pred)(
225     UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode);
226 
227 typedef UWORD32 (*pf_res_trans_luma)(
228     UWORD8 *pu1_src,
229     UWORD8 *pu1_pred,
230     WORD32 *pi4_tmp,
231     WORD16 *pi2_dst,
232     WORD32 src_strd,
233     WORD32 pred_strd,
234     WORD32 dst_strd_chr_flag);
235 
236 typedef WORD32 (*pf_quant)(
237     WORD16 *pi2_coeffs,
238     WORD16 *pi2_quant_coeff,
239     WORD16 *pi2_dst,
240     WORD32 qp_div, /* qpscaled / 6 */
241     WORD32 qp_rem, /* qpscaled % 6 */
242     WORD32 q_add,
243     WORD32 src_strd,
244     WORD32 dst_strd,
245     UWORD8 *pu1_csbf_buf,
246     WORD32 csbf_strd,
247     WORD32 *zero_cols,
248     WORD32 *zero_row);
249 
250 /*****************************************************************************/
251 /* Enums                                                                     */
252 /*****************************************************************************/
253 /// supported partition shape
254 typedef enum
255 {
256     SIZE_2Nx2N = 0,  ///< symmetric motion partition,  2Nx2N
257     SIZE_2NxN = 1,  ///< symmetric motion partition,  2Nx N
258     SIZE_Nx2N = 2,  ///< symmetric motion partition,   Nx2N
259     SIZE_NxN = 3,  ///< symmetric motion partition,   Nx N
260     SIZE_2NxnU = 4,  ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2)
261     SIZE_2NxnD = 5,  ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
262     SIZE_nLx2N = 6,  ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
263     SIZE_nRx2N = 7  ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
264 } PART_SIZE_E;
265 
266 /** @brief  Interface level Queues of Encoder */
267 
268 typedef enum
269 {
270     IHEVCE_INPUT_DATA_CTRL_Q = 0,
271     IHEVCE_ENC_INPUT_Q,
272     IHEVCE_INPUT_ASYNCH_CTRL_Q,
273     IHEVCE_OUTPUT_DATA_Q,
274     IHEVCE_OUTPUT_STATUS_Q,
275     IHEVCE_RECON_DATA_Q,  //   /*que for holding recon buffer */
276 
277     IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */
278 
279     IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */
280 
281     IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */
282 
283     IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/
284 
285     /* should be last entry */
286     IHEVCE_MAX_NUM_QUEUES
287 
288 } IHEVCE_Q_DESC_T;
289 
290 /*****************************************************************************/
291 /* Structure                                                                 */
292 /*****************************************************************************/
293 
294 /**
295 RC_QP_QSCALE conversion structures
296 **/
297 typedef struct
298 {
299     WORD16 i2_min_qp;
300 
301     WORD16 i2_max_qp;
302 
303     WORD16 i2_min_qscale;
304 
305     WORD16 i2_max_qscale;
306 
307     WORD32 *pi4_qscale_to_qp;
308 
309     WORD32 *pi4_qp_to_qscale_q_factor;
310 
311     WORD32 *pi4_qp_to_qscale;
312 
313     WORD8 i1_qp_offset;
314 
315 } rc_quant_t;
316 
317 /**
318 ******************************************************************************
319  *  @brief     4x4 level structure which contains all the parameters
320  *             for neighbour prediction puopose
321 ******************************************************************************
322  */
323 typedef struct
324 {
325     /** PU motion vectors */
326     pu_mv_t mv;
327     /** Intra or Inter flag for each partition - 0 or 1  */
328     UWORD16 b1_intra_flag : 1;
329     /** CU skip flag - 0 or 1  */
330     UWORD16 b1_skip_flag : 1;
331     /** CU depth in CTB tree (0-3)  */
332     UWORD16 b2_cu_depth : 2;
333 
334     /** Y Qp  for loop filter */
335     WORD16 b8_qp : 8;
336 
337     /** Luma Intra Mode 0 - 34   */
338     UWORD16 b6_luma_intra_mode : 6;
339 
340     /** Y CBF  for BS compute */
341     UWORD16 b1_y_cbf : 1;
342     /** Pred L0 flag of current 4x4 */
343     UWORD16 b1_pred_l0_flag : 1;
344 
345     /** Pred L0 flag of current 4x4 */
346     UWORD16 b1_pred_l1_flag : 1;
347 } nbr_4x4_t;
348 
349 typedef struct
350 {
351     /** Bottom Left availability flag */
352     UWORD8 u1_bot_lt_avail;
353 
354     /** Left availability flag */
355     UWORD8 u1_left_avail;
356 
357     /** Top availability flag */
358     UWORD8 u1_top_avail;
359 
360     /** Top Right availability flag */
361     UWORD8 u1_top_rt_avail;
362 
363     /** Top Left availability flag */
364     UWORD8 u1_top_lt_avail;
365 
366 } nbr_avail_flags_t;
367 
368 typedef struct
369 {
370     /** prev intra flag*/
371     UWORD8 b1_prev_intra_luma_pred_flag : 1;
372 
373     /** mpm_idx */
374     UWORD8 b2_mpm_idx : 2;
375 
376     /** reminder pred mode */
377     UWORD8 b5_rem_intra_pred_mode : 5;
378 
379 } intra_prev_rem_flags_t;
380 
381 /**
382 ******************************************************************************
383  *  @brief     calc (T+Q+RDOQ) output TU structure; entropy input TU structure
384 ******************************************************************************
385  */
386 typedef struct
387 {
388     /** base tu structure */
389     tu_t s_tu;
390 
391     /** offset of luma data in ecd buffer */
392     WORD32 i4_luma_coeff_offset;
393 
394     /** offset of cb data in ecd buffer */
395     WORD32 ai4_cb_coeff_offset[2];
396 
397     /** offset of cr data in ecd buffer */
398     WORD32 ai4_cr_coeff_offset[2];
399 
400 } tu_enc_loop_out_t;
401 
402 typedef struct
403 {
404     /* L0 Motion Vector */
405     mv_t s_l0_mv;
406 
407     /* L1 Motion Vector */
408     mv_t s_l1_mv;
409 
410     /* L0 Ref index */
411     WORD8 i1_l0_ref_idx;
412 
413     /*  L1 Ref index */
414     WORD8 i1_l1_ref_idx;
415 
416     /* L0 Ref Pic Buf ID */
417     WORD8 i1_l0_pic_buf_id;
418 
419     /* L1 Ref Pic Buf ID */
420     WORD8 i1_l1_pic_buf_id;
421 
422     /** intra flag */
423     UWORD8 b1_intra_flag : 1;
424 
425     /* Pred mode */
426     UWORD8 b2_pred_mode : 2;
427 
428     /* reserved flag can be used for something later */
429     UWORD8 u1_reserved;
430 
431 } pu_col_mv_t;
432 
433 /*****************************************************************************/
434 /* Encoder uses same structure as pu_t for prediction unit                   */
435 /*****************************************************************************/
436 
437 /**
438 ******************************************************************************
439  *  @brief     Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure
440 ******************************************************************************
441  */
442 typedef struct
443 {
444     /* CU X position in terms of min CU (8x8) units */
445     UWORD32 b3_cu_pos_x : 3;
446 
447     /* CU Y position in terms of min CU (8x8) units */
448     UWORD32 b3_cu_pos_y : 3;
449 
450     /** CU size in terms of min CU (8x8) units */
451     UWORD32 b4_cu_size : 4;
452 
453     /** transquant bypass flag ; 0 for this encoder */
454     UWORD32 b1_tq_bypass_flag : 1;
455 
456     /** cu skip flag */
457     UWORD32 b1_skip_flag : 1;
458 
459     /** intra / inter CU flag */
460     UWORD32 b1_pred_mode_flag : 1;
461 
462     /** indicates partition information for CU
463      *  For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize
464      *  For inter 0 : @sa PART_SIZE_E
465      */
466     UWORD32 b3_part_mode : 3;
467 
468     /** 0 for this encoder */
469     UWORD32 b1_pcm_flag : 1;
470 
471     /** only applicable for intra cu */
472     UWORD32 b3_chroma_intra_pred_mode : 3;
473 
474     /** no residue flag for cu */
475     UWORD32 b1_no_residual_syntax_flag : 1;
476 
477     /* flag to indicate if current CU is the first
478     CU of the Quantisation group*/
479     UWORD32 b1_first_cu_in_qg : 1;
480 
481     /** Intra prev and reminder flags
482      * if part is NxN the tntries 1,2,3 will be valid
483      * other wise only enry 0 will be set.
484      */
485     intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS];
486 
487     /**
488      *  Access valid  number of pus in this array based on u1_part_mode
489      *  Moiton vector differentials and reference idx should be
490      *  populated in this structure
491      *  @remarks shall be accessed only for inter pus
492      */
493     pu_t *ps_pu;
494 
495     /**
496      *  pointer to first tu of this cu. Each TU need to be populated
497      *  in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu
498      */
499     tu_enc_loop_out_t *ps_enc_tu;
500 
501     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
502     UWORD16 u2_num_tus_in_cu;
503 
504     /** Coeff bufer pointer */
505     /* Pointer to transform coeff data */
506     /*************************************************************************/
507     /* Following format is repeated for every coded TU                       */
508     /* Luma Block                                                            */
509     /* num_coeffs      : 16 bits                                             */
510     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
511     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
512     /* coeff_data      : Non zero coefficients                               */
513     /* Cb Block (only for last TU in 4x4 case else for every luma TU)        */
514     /* num_coeffs      : 16 bits                                             */
515     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
516     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
517     /* coeff_data      : Non zero coefficients                               */
518     /* Cr Block (only for last TU in 4x4 case else for every luma TU)        */
519     /* num_coeffs      : 16 bits                                             */
520     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
521     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
522     /* coeff_data      : Non zero coefficients                               */
523     /*************************************************************************/
524     void *pv_coeff;
525 
526     /** qp used during for CU
527       * @remarks :
528       */
529     WORD8 i1_cu_qp;
530 
531 } cu_enc_loop_out_t;
532 
533 /**
534  * SAO
535  */
536 typedef struct
537 {
538     /**
539      * sao_type_idx_luma
540      */
541     UWORD32 b3_y_type_idx : 3;
542 
543     /**
544      * luma sao_band_position
545      */
546     UWORD32 b5_y_band_pos : 5;
547 
548     /**
549      * sao_type_idx_chroma
550      */
551     UWORD32 b3_cb_type_idx : 3;
552 
553     /**
554      * cb sao_band_position
555      */
556     UWORD32 b5_cb_band_pos : 5;
557 
558     /**
559      * sao_type_idx_chroma
560      */
561     UWORD32 b3_cr_type_idx : 3;
562 
563     /**
564      * cb sao_band_position
565      */
566     UWORD32 b5_cr_band_pos : 5;
567 
568     /*SAO Offsets
569      * In all these offsets, 0th element is not used
570      */
571     /**
572      * luma SaoOffsetVal[i]
573      */
574     WORD8 u1_y_offset[5];
575 
576     /**
577      * chroma cb SaoOffsetVal[i]
578      */
579     WORD8 u1_cb_offset[5];
580 
581     /**
582      * chroma cr SaoOffsetVal[i]
583      */
584     WORD8 u1_cr_offset[5];
585 
586     /**
587      * sao_merge_left_flag common for y,cb,cr
588      */
589     UWORD32 b1_sao_merge_left_flag : 1;
590 
591     /**
592      * sao_merge_up_flag common for y,cb,cr
593      */
594     UWORD32 b1_sao_merge_up_flag : 1;
595 
596 } sao_enc_t;
597 
598 /**
599 ******************************************************************************
600  *  @brief       ctb output structure; output of Encode loop, input to entropy
601 ******************************************************************************
602  */
603 typedef struct
604 {
605     /**
606      * bit0     :  depth0 split flag, (64x64 splits)
607      * bits 1-3 :  not used
608      * bits 4-7 :  depth1 split flags; valid iff depth0 split=1 (32x32 splits)
609      * bits 8-23:  depth2 split flags; (if 0 16x16 is cu else 8x8 min cu)
610 
611      * if a split flag of n is set for depth 1, check the following split flags
612      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
613      *
614      */
615     UWORD32 u4_cu_split_flags;
616 
617     /***************************************************************
618      * For any given CU position CU_posx, CU_posy access
619      *  au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)]
620      * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0]
621      ****************************************************************/
622 
623     /**
624      * access bits corresponding to actual CU size till leaf nodes
625      * bit0     :  (32x32 TU split flag)
626      * bits 1-3 :  not used
627      * bits 4-7 :  (16x16 TUsplit flags)
628      * bits 8-23:  (8x8  TU split flags)
629 
630      * if a split flag of n is set for depth 1, check the following split flags
631      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
632      *
633      * @remarks     As tu sizes are relative to CU sizes the producer has to
634      * make sure the correctness of u4_packed_tu_split_flags.
635      *
636      * @remarks     au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only
637      *              for 64x64 ctb.
638      */
639     UWORD32 au4_packed_tu_split_flags_cu[4];
640 
641     /**
642      *  pointer to first CU of CTB. Each CU need to be populated
643      *  in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb
644      */
645     cu_enc_loop_out_t *ps_enc_cu;
646 
647     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
648     UWORD8 u1_num_cus_in_ctb;
649 
650     /** CTB neighbour availability flags */
651     nbr_avail_flags_t s_ctb_nbr_avail_flags;
652 
653     /* SAO parameters of the CTB */
654     sao_enc_t s_sao;
655 
656 } ctb_enc_loop_out_t;
657 
658 /**
659 ******************************************************************************
660  *  @brief      cu inter candidate for encoder
661 ******************************************************************************
662  */
663 typedef struct
664 {
665     /** base pu structure
666      *  access valid  number of entries in this array based on u1_part_size
667      */
668     pu_t as_inter_pu[NUM_INTER_PU_PARTS];
669 
670     /* TU split flag : tu_split_flag[0] represents the transform splits
671      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
672      *  to respective 32x32  */
673     /* For a 8x8 TU - 1 bit used to indicate split */
674     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
675     /* For a 32x32 TU - See above */
676     WORD32 ai4_tu_split_flag[4];
677 
678     /* TU split flag : tu_split_flag[0] represents the transform splits
679      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
680      *  to respective 32x32  */
681     /* For a 8x8 TU - 1 bit used to indicate split */
682     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
683     /* For a 32x32 TU - See above */
684     WORD32 ai4_tu_early_cbf[4];
685 
686     /**Pointer to the buffer having predicted data after mc in SATD stage
687      * Since we have 2 buffers for each candidate pred data for best merge candidate
688      * can be in one of the 2 buffers.
689      */
690     UWORD8 *pu1_pred_data;
691 
692     UWORD16 *pu2_pred_data;
693 
694     UWORD8 *pu1_pred_data_scr;
695 
696     UWORD16 *pu2_pred_data_src;
697 
698     /* Total cost: SATD cost + MV cost */
699     WORD32 i4_total_cost;
700 
701     /** Stride for predicted data*/
702     WORD32 i4_pred_data_stride;
703 
704     /** @remarks u1_part_size can be non square only for  Inter   */
705     UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */
706 
707     /** evaluate transform for cusize iff this flag is 1 */
708     /** this flag should be set 0 if CU is 64x64         */
709     UWORD8 b1_eval_tx_cusize : 1;
710 
711     /** evaluate transform for cusize/2 iff this flag is 1 */
712     UWORD8 b1_eval_tx_cusize_by2 : 1;
713 
714     /** Skip Flag : ME should always set this 0 for the candidates */
715     UWORD8 b1_skip_flag : 1;
716 
717     UWORD8 b1_intra_has_won : 1;
718 
719     /* used to mark if this mode needs to be evaluated in auxiliary mode */
720     /* if 1, this mode will be evaluated otherwise not.*/
721     UWORD8 b1_eval_mark : 1;
722 
723 } cu_inter_cand_t;
724 
725 /**
726 ******************************************************************************
727  *  @brief      cu intra candidate for encoder
728 ******************************************************************************
729  */
730 typedef struct
731 {
732     UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES];
733 
734     /**
735      *  List of NxN PU candidates in CU  for each partition
736      *  valid only of if current cusize = mincusize
737      * +1 to signal the last flag invalid value of 255 needs to be stored
738      */
739     UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1];
740 
741     /* used to mark if this mode needs to be evaluated in auxiliary mode */
742     /* if 1, this mode will be evaluated otherwise not.*/
743     UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1];
744 
745     /**
746      *  List of 2Nx2N PU candidates in CU
747      * +1 to signal the last flag invalid value of 255 needs to be stored
748      */
749     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1];
750 
751     /**
752      *  List of 2Nx2N PU candidates in CU
753      * +1 to signal the last flag invalid value of 255 needs to be stored
754      */
755     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1];
756 
757     /* used to mark if this mode needs to be evaluated in auxiliary mode */
758     /* if 1, this mode will be evaluated otherwise not.*/
759     UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
760 
761     /* used to mark if this mode needs to be evaluated in auxiliary mode */
762     /* if 1, this mode will be evaluated otherwise not.*/
763     UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
764 
765     UWORD8 au1_num_modes_added[NUM_PU_PARTS];
766 
767     /** evaluate transform for cusize iff this flag is 1 */
768     /** this flag should be set 0 if CU is 64x64         */
769     UWORD8 b1_eval_tx_cusize : 1;
770 
771     /** evaluate transform for cusize/2 iff this flag is 1 */
772     UWORD8 b1_eval_tx_cusize_by2 : 1;
773 
774     /** number of intra candidates for SATD evaluation in */
775     UWORD8 b6_num_intra_cands : 6;
776 
777 } cu_intra_cand_t;
778 
779 /**
780 ******************************************************************************
781  *  @brief      cu structure for mode analysis/evaluation
782 ******************************************************************************
783  */
784 typedef struct
785 {
786     /** CU X position in terms of min CU (8x8) units */
787     UWORD8 b3_cu_pos_x : 3;
788 
789     /** CU Y position in terms of min CU (8x8) units */
790     UWORD8 b3_cu_pos_y : 3;
791 
792     /** reserved bytes */
793     UWORD8 b2_reserved : 2;
794 
795     /** CU size 2N (width or height) in pixels */
796     UWORD8 u1_cu_size;
797 
798     /** Intra CU candidates after FAST CU decision (output of IPE)
799      *  8421 algo along with transform size evalution will
800      *  be done for these modes in Encode loop pass.
801      */
802     cu_intra_cand_t s_cu_intra_cand;
803 
804     /** indicates the angular mode (0 - 34) for chroma,
805      *  Note : No provision currently to take chroma through RDOPT or SATD
806      */
807     UWORD8 u1_chroma_intra_pred_mode;
808 
809     /** number of inter candidates in as_cu_inter_cand[]
810       * shall be 0 for intra frames.
811       * These inters are evaluated for RDOPT apart from merge/skip candidates
812       */
813     UWORD8 u1_num_inter_cands;
814 
815     /** List of candidates to be evalauted (SATD/RDOPT) for this CU
816       * @remarks : all  merge/skip candidates not a part of this list
817       */
818     cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES];
819 
820     WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
821 
822 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
823     WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
824 #endif
825 
826     /* Flag to convey if Inta or Inter is the best candidate among the
827     candidates populated
828      0: If inter is the winner and 1: if Intra is winner*/
829     UWORD8 u1_best_is_intra;
830 
831     /** number of intra rdopt candidates
832       * @remarks : shall be <= u1_num_intra_cands
833       */
834     UWORD8 u1_num_intra_rdopt_cands;
835     /** qp used during for CU
836       * @remarks :
837       */
838     WORD8 i1_cu_qp;
839     /** Activity factor used in pre enc thread for deriving the Qp
840       * @remarks : This is in Q format
841       */
842     WORD32 i4_act_factor[4][2];
843 
844 } cu_analyse_t;
845 
846 /**
847 ******************************************************************************
848  *  @brief      Structure for CU recursion
849 ******************************************************************************
850  */
851 typedef struct cur_ctb_cu_tree_t
852 {
853     /** CU X position in terms of min CU (8x8) units */
854     UWORD8 b3_cu_pos_x : 3;
855 
856     /** CU X position in terms of min CU (8x8) units */
857     UWORD8 b3_cu_pos_y : 3;
858 
859     /** reserved bytes */
860     UWORD8 b2_reserved : 2;
861 
862     UWORD8 u1_cu_size;
863 
864     UWORD8 u1_intra_eval_enable;
865 
866     UWORD8 u1_inter_eval_enable;
867 
868     /* Flag that indicates whether to evaluate this node */
869     /* during RDOPT evaluation. This does not mean that */
870     /* evaluation of the children need to be abandoned */
871     UWORD8 is_node_valid;
872 
873     LWORD64 i8_best_rdopt_cost;
874 
875     struct cur_ctb_cu_tree_t *ps_child_node_tl;
876 
877     struct cur_ctb_cu_tree_t *ps_child_node_tr;
878 
879     struct cur_ctb_cu_tree_t *ps_child_node_bl;
880 
881     struct cur_ctb_cu_tree_t *ps_child_node_br;
882 
883 } cur_ctb_cu_tree_t;
884 
885 typedef struct
886 {
887     WORD32 num_best_results;
888 
889     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
890 
891 } block_data_32x32_t;
892 
893 /**
894 ******************************************************************************
895  *  @brief      Structure for storing data about all the 64x64
896  *              block in a 64x64 CTB
897 ******************************************************************************
898  */
899 typedef block_data_32x32_t block_data_64x64_t;
900 
901 /**
902 ******************************************************************************
903  *  @brief      Structure for storing data about all 16 16x16
904  *              blocks in a 64x64 CTB and each of their partitions
905 ******************************************************************************
906  */
907 typedef struct
908 {
909     WORD32 num_best_results;
910 
911     /**
912      * mask of active partitions, Totally 17 bits. For a given partition
913      * id, as per PART_ID_T enum the corresponding bit position is 1/0
914      * indicating that partition is active or inactive
915      */
916     /*WORD32 i4_part_mask;*/
917 
918     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
919 
920 } block_data_16x16_t;
921 
922 typedef struct
923 {
924     WORD32 num_best_results;
925 
926     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
927 } block_data_8x8_t;
928 
929 /**
930 ******************************************************************************
931  *  @brief      Structure for data export from ME to Enc_Loop
932 ******************************************************************************
933  */
934 typedef struct
935 {
936     block_data_8x8_t as_8x8_block_data[64];
937 
938     block_data_16x16_t as_block_data[16];
939 
940     block_data_32x32_t as_32x32_block_data[4];
941 
942     block_data_64x64_t s_64x64_block_data;
943 
944 } me_ctb_data_t;
945 
946 /**
947 ******************************************************************************
948  *  @brief   noise detection related structure
949  *
950 ******************************************************************************
951  */
952 
953 typedef struct
954 {
955     WORD32 i4_noise_present;
956 
957     UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB];
958 
959     UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB];
960 } ihevce_ctb_noise_params;
961 
962 /**
963 ******************************************************************************
964  *  @brief      ctb structure for mode analysis/evaluation
965 ******************************************************************************
966  */
967 typedef struct
968 {
969     /**
970      * CU decision in a ctb is frozen by ME/IPE and populated in
971      * u4_packed_cu_split_flags.
972      * @remarks
973      * TODO:review comment
974      * bit0     :  64x64 split flag,  (depth0 flag for 64x64 ctb unused for smaller ctb)
975      * bits 1-3 :  not used
976      * bits 4-7 :  32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb)
977      * bits 8-23:  16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] )
978 
979      * if a split flag of n is set for depth 1, check the following split flags
980      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
981      *
982      */
983     UWORD32 u4_cu_split_flags;
984 
985     UWORD8 u1_num_cus_in_ctb;
986 
987     cur_ctb_cu_tree_t *ps_cu_tree;
988 
989     me_ctb_data_t *ps_me_ctb_data;
990 
991     ihevce_ctb_noise_params s_ctb_noise_params;
992 
993 } ctb_analyse_t;
994 /**
995 ******************************************************************************
996  *  @brief Structures for tapping ssd and bit-estimate information for all CUs
997 ******************************************************************************
998  */
999 
1000 typedef struct
1001 {
1002     LWORD64 i8_cost;
1003     WORD32 i4_idx;
1004 } cost_idx_t;
1005 
1006 /**
1007 ******************************************************************************
1008  *  @brief      reference/non reference pic context for encoder
1009 ******************************************************************************
1010  */
1011 typedef struct
1012 
1013 {
1014     /**
1015      * YUV buffer discriptor for the recon
1016      * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) +  2 * PAD_HORZ)*
1017      *                              ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT)
1018      */
1019     iv_enc_yuv_buf_t s_yuv_buf_desc;
1020 
1021     iv_enc_yuv_buf_src_t s_yuv_buf_desc_src;
1022 
1023     /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid            */
1024     /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */
1025     /* the fullpel plane for use as reference */
1026     UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE];
1027 
1028     /**
1029      * frm level pointer to pu bank for colocated  mv access
1030      * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) *
1031      *                         (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE)
1032      */
1033     pu_col_mv_t *ps_frm_col_mv;
1034     /**
1035      ************************************************************************
1036      * Pointer to a PU map stored at frame level,
1037      * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min
1038      * granularirty of MIN_PU_SIZE size.
1039      ************************************************************************
1040      */
1041     UWORD8 *pu1_frm_pu_map;
1042 
1043     /** CTB level frame buffer to store the accumulated sum of
1044      * number of PUs for every row */
1045     UWORD16 *pu2_num_pu_map;
1046 
1047     /** Offsets in the PU buffer at every CTB level */
1048     UWORD32 *pu4_pu_off;
1049 
1050     /**  Collocated POC for reference list 0
1051      * ToDo: Change the array size when multiple slices are to be supported */
1052     WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS];
1053 
1054     /** Collocated POC for reference list 1 */
1055     WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS];
1056 
1057     /** 0 = top field,  1 = bottom field  */
1058     WORD32 i4_bottom_field;
1059 
1060     /** top field first input in case of interlaced case */
1061     WORD32 i4_topfield_first;
1062 
1063     /** top field first input in case of interlaced case */
1064     WORD32 i4_poc;
1065 
1066     /** unique buffer id */
1067     WORD32 i4_buf_id;
1068 
1069     /** is this reference frame or not */
1070     WORD32 i4_is_reference;
1071 
1072     /** Picture type of current picture */
1073     WORD32 i4_pic_type;
1074 
1075     /** Flag to indicate whether current pictute is free or in use */
1076     WORD32 i4_is_free;
1077 
1078     /** Bit0 -  of this Flag to indicate whether current pictute needs to be deblocked,
1079         padded and hpel planes need to be generated.
1080         These are turned off typically in non referecne pictures when psnr
1081         and recon dump is disabled.
1082 
1083         Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled
1084      */
1085     WORD32 i4_deblk_pad_hpel_cur_pic;
1086 
1087     /**
1088      * weight and offset for this ref pic. To be initialized for every pic
1089      * based on the lap output
1090      */
1091     ihevce_wght_offst_t s_weight_offset;
1092 
1093     /**
1094      * Reciprocal of the lumaweight in q15 format
1095      */
1096     WORD32 i4_inv_luma_wt;
1097 
1098     /**
1099      * Log to base 2 of the common denominator used for luma weights across all ref pics
1100      */
1101     WORD32 i4_log2_wt_denom;
1102 
1103     /**
1104      * Used as Reference for encoding current picture flag
1105      */
1106     WORD32 i4_used_by_cur_pic_flag;
1107 
1108 #if ADAPT_COLOCATED_FROM_L0_FLAG
1109     WORD32 i4_frame_qp;
1110 #endif
1111     /*
1112     * IDR GOP number
1113     */
1114 
1115     WORD32 i4_idr_gop_num;
1116 
1117     /*
1118     * non-ref-free_flag
1119     */
1120     WORD32 i4_non_ref_free_flag;
1121     /**
1122       * Dependency manager instance for ME - Prev recon dep
1123       */
1124     void *pv_dep_mngr_recon;
1125 
1126     /*display num*/
1127     WORD32 i4_display_num;
1128 } recon_pic_buf_t;
1129 
1130 /**
1131 ******************************************************************************
1132  *  @brief  Lambda values used for various cost computations
1133 ******************************************************************************
1134  */
1135 typedef struct
1136 {
1137     /************************************************************************/
1138     /* The fields with the string 'type2' in their names are required */
1139     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
1140     /* to the bit_depth != internal_bit_depth are stored in these fields */
1141     /************************************************************************/
1142 
1143     /**
1144      * Closed loop SSD Lambda
1145      * This is multiplied with bits for RD cost computations in SSD mode
1146      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1147      */
1148     LWORD64 i8_cl_ssd_lambda_qf;
1149 
1150     LWORD64 i8_cl_ssd_type2_lambda_qf;
1151 
1152     /**
1153      * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp)
1154      * This is multiplied with bits for RD cost computations in SSD mode
1155      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1156      */
1157     LWORD64 i8_cl_ssd_lambda_chroma_qf;
1158 
1159     LWORD64 i8_cl_ssd_type2_lambda_chroma_qf;
1160 
1161     /**
1162      * Closed loop SAD Lambda
1163      * This is multiplied with bits for RD cost computations in SAD mode
1164      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1165      */
1166     WORD32 i4_cl_sad_lambda_qf;
1167 
1168     WORD32 i4_cl_sad_type2_lambda_qf;
1169 
1170     /**
1171      * Open loop SAD Lambda
1172      * This is multiplied with bits for RD cost computations in SAD mode
1173      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1174      */
1175     WORD32 i4_ol_sad_lambda_qf;
1176 
1177     WORD32 i4_ol_sad_type2_lambda_qf;
1178 
1179     /**
1180      * Closed loop SATD Lambda
1181      * This is multiplied with bits for RD cost computations in SATD mode
1182      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1183      */
1184     WORD32 i4_cl_satd_lambda_qf;
1185 
1186     WORD32 i4_cl_satd_type2_lambda_qf;
1187 
1188     /**
1189      * Open loop SATD Lambda
1190      * This is multiplied with bits for RD cost computations in SATD mode
1191      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1192      */
1193     WORD32 i4_ol_satd_lambda_qf;
1194 
1195     WORD32 i4_ol_satd_type2_lambda_qf;
1196 
1197     double lambda_modifier;
1198 
1199     double lambda_uv_modifier;
1200 
1201     UWORD32 u4_chroma_cost_weighing_factor;
1202 
1203 } frm_lambda_ctxt_t;
1204 /**
1205 ******************************************************************************
1206 *  @brief  Mode attributes for 4x4 block populated by early decision
1207 ******************************************************************************
1208  */
1209 typedef struct
1210 {
1211     /* If best mode is present or not */
1212     UWORD8 mode_present;
1213 
1214     /** Best mode for the current 4x4 prediction block */
1215     UWORD8 best_mode;
1216 
1217     /** sad for the best mode for the current 4x4 prediction block */
1218     UWORD16 sad;
1219 
1220     /** cost for the best mode for the current 4x4 prediction block */
1221     UWORD16 sad_cost;
1222 
1223 } ihevce_ed_mode_attr_t;  //early decision
1224 /**
1225 ******************************************************************************
1226  *  @brief  Structure at 8x8 block level which has parameters such as cur satd
1227  * for QP mod @ L0 level
1228 ******************************************************************************
1229  */
1230 typedef struct
1231 {
1232     /*Store SATD of current data at 8*8 level for current layer (L0)*/
1233     WORD32 i4_8x8_cur_satd;
1234 } ihevce_8x8_L0_satd_t;
1235 /**
1236 ******************************************************************************
1237  *  @brief  Structure at 8x8 block level mean for MEAN based QP mod
1238 ******************************************************************************
1239  */
1240 typedef struct
1241 {
1242     /*Store SATD of current data at 8*8 level for current layer (L0)*/
1243     WORD16 i2_8x8_cur_mean;
1244 } ihevce_8x8_L0_mean_t;
1245 
1246 //#define DEBUG_ED_CTB_POS
1247 /**
1248 ******************************************************************************
1249  *  @brief  Structure at 4x4 block level which has parameters about early
1250  *          intra or inter decision
1251 ******************************************************************************
1252  */
1253 typedef struct
1254 {
1255     /**
1256      * Final parameter of Intra-Inter early decision for the current 4x4.
1257      * 0 - invalid decision
1258      * 1 - eval intra only
1259      * 2 - eval inter only
1260      * 3 - eval both intra and inter
1261      */
1262     UWORD8 intra_or_inter : 2;
1263 
1264     UWORD8 merge_success : 1;
1265 
1266     /** Best mode for the current 4x4 prediction block */
1267     UWORD8 best_mode;
1268 
1269     /* sad cost for the best prediction mode */
1270     //UWORD16 best_sad_cost;
1271 
1272     /** Best mode for the current 4x4 prediction block */
1273     UWORD8 best_merge_mode;
1274 
1275     /*Store SATD at 4*4 level for current layer (L1)*/
1276     WORD32 i4_4x4_satd;
1277 
1278     /*Store SATD of current data at 4*4 level for current layer (L1)*/
1279     WORD32 i4_4x4_cur_satd;
1280 
1281 } ihevce_ed_blk_t;  //early decision
1282 
1283 /* l1 ipe ctb analyze structure */
1284 /* Contains cu level qp mod related information for all possible cu
1285 sizes (16,32,64 in L0) in a CTB*/
1286 typedef struct
1287 {
1288     WORD32 i4_sum_4x4_satd[16];
1289     WORD32 i4_min_4x4_satd[16];
1290 
1291     /*satd for L1_8x8 blocks in L1_32x32
1292     16 - num L1_8x8 in L1_32x32
1293     2 =>
1294         0 - sum of L1_4x4 @ L1_8x8
1295           - equivalent to transform size of 16x16 @ L0
1296         1 - min/median of L1_4x4 @ L1_8x8
1297           - equivalent to transform size of 8x8 @ L0
1298     */
1299     WORD32 i4_8x8_satd[16][2];
1300 
1301     /*satd for L1_16x16 blocks in L1_32x32
1302     4 - num L1_16x16 in L1_32x32
1303     3 =>
1304         0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
1305           - equivalent to transform size of 32x32 @ L0
1306         1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
1307           - equivalent to transform size of 16x16 @ L0
1308         2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16
1309           - equivalent to transform size of 8x8 @ L0
1310     */
1311     WORD32 i4_16x16_satd[4][3];
1312 
1313     /*satd for 32x32 block in L1*/
1314     /*Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */
1315     /*satd for L1_32x32 blocks in L1_32x32
1316     1 - num L1_32x32 in L1_32x32
1317     4 =>
1318         0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
1319           - equivalent to transform size of 32x32 @ L0
1320         1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32
1321           - equivalent to transform size of 16x16 @ L0
1322         2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32
1323           - equivalent to transform size of 8x8 @ L0
1324         3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
1325     */
1326     WORD32 i4_32x32_satd[1][4];
1327 
1328     /*Store SATD at 8x8 level for current layer (L1)*/
1329     WORD32 i4_best_satd_8x8[16];
1330 
1331     /* EIID: This will be used for early inter intra decisions */
1332     /*SAD at 8x8 level for current layer (l1) */
1333     /*Cost based on sad at 8x8 level for current layer (l1) */
1334     WORD32 i4_best_sad_cost_8x8_l1_ipe[16];
1335 
1336     WORD32 i4_best_sad_8x8_l1_ipe[16];
1337     /* SAD at 8x8 level for ME. All other cost are IPE cost */
1338     WORD32 i4_best_sad_cost_8x8_l1_me[16];
1339 
1340     /* SAD at 8x8 level for ME. for given reference */
1341     WORD32 i4_sad_cost_me_for_ref[16];
1342 
1343     /* SAD at 8x8 level for ME. for given reference */
1344     WORD32 i4_sad_me_for_ref[16];
1345 
1346     /* SAD at 8x8 level for ME. All other cost are IPE cost */
1347     WORD32 i4_best_sad_8x8_l1_me[16];
1348 
1349     WORD32 i4_best_sad_8x8_l1_me_for_decide[16];
1350 
1351     /*Mean @ L0 16x16*/
1352     WORD32 ai4_16x16_mean[16];
1353 
1354     /*Mean @ L0 32x32*/
1355     WORD32 ai4_32x32_mean[4];
1356 
1357     /*Mean @ L0 64x64*/
1358     WORD32 i4_64x64_mean;
1359 
1360 } ihevce_ed_ctb_l1_t;  //early decision
1361 
1362 /**
1363 ******************************************************************************
1364  *  @brief   8x8 Intra analyze structure
1365 ******************************************************************************
1366  */
1367 typedef struct
1368 {
1369     /** Best intra modes for 8x8 transform.
1370      *  Insert 255 in the end to limit number of modes
1371      */
1372     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
1373 
1374     /** Best 8x8 intra modes for 4x4 transform
1375      *  Insert 255 in the end to limit number of modes
1376      */
1377     UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1];
1378 
1379     /** Best 4x4 intra modes
1380      *  Insert 255 in the end to limit number of modes
1381      */
1382     UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1];
1383 
1384     /** best 8x8 intra sad/SATD cost */
1385     WORD32 i4_best_intra_cost;
1386 
1387     /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */
1388     UWORD8 b1_enable_nxn : 1;
1389 
1390     /** valid cu flag : required for incomplete ctbs at frame boundaries */
1391     UWORD8 b1_valid_cu : 1;
1392 
1393     /** dummy bits */
1394     UWORD8 b6_reserved : 6;
1395 
1396 } intra8_analyse_t;
1397 
1398 /**
1399 ******************************************************************************
1400  *  @brief   16x16 Intra analyze structure
1401 ******************************************************************************
1402  */
1403 typedef struct
1404 {
1405     /** Best intra modes for 16x16 transform.
1406      *  Insert 255 in the end to limit number of modes
1407      */
1408     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
1409 
1410     /** Best 16x16 intra modes for 8x8 transform
1411      *  Insert 255 in the end to limit number of modes
1412      */
1413     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
1414 
1415     /** 8x8 children intra analyze for this 16x16 */
1416     intra8_analyse_t as_intra8_analyse[4];
1417 
1418     /* best 16x16 intra sad/SATD cost */
1419     WORD32 i4_best_intra_cost;
1420 
1421     /* indicates if 16x16 is best cu or 8x8 cu */
1422     UWORD8 b1_split_flag : 1;
1423 
1424     /* indicates if 8x8 vs 16x16 rdo evaluation needed */
1425     /* or only 8x8's rdo evaluation needed */
1426     UWORD8 b1_merge_flag : 1;
1427 
1428     /**
1429      * valid cu flag : required for incomplete ctbs at frame boundaries
1430      * or if CTB size is lower than 32
1431      */
1432     UWORD8 b1_valid_cu : 1;
1433 
1434     /** dummy bits */
1435     UWORD8 b6_reserved : 5;
1436 
1437 } intra16_analyse_t;
1438 
1439 /**
1440 ******************************************************************************
1441  *  @brief   32x32 Intra analyze structure
1442 ******************************************************************************
1443  */
1444 typedef struct
1445 {
1446     /** Best intra modes for 32x32 transform.
1447      *  Insert 255 in the end to limit number of modes
1448      */
1449     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
1450 
1451     /** Best 32x32 intra modes for 16x16 transform
1452      *  Insert 255 in the end to limit number of modes
1453      */
1454     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
1455 
1456     /** 16x16 children intra analyze for this 32x32 */
1457     intra16_analyse_t as_intra16_analyse[4];
1458 
1459     /* best 32x32 intra sad/SATD cost               */
1460     WORD32 i4_best_intra_cost;
1461 
1462     /* indicates if 32x32 is best cu or 16x16 cu    */
1463     UWORD8 b1_split_flag : 1;
1464 
1465     /* indicates if 32x32 vs 16x16 rdo evaluation needed */
1466     /* or 16x16 vs 8x8 evaluation is needed */
1467     UWORD8 b1_merge_flag : 1;
1468 
1469     /**
1470      * valid cu flag : required for incomplete ctbs at frame boundaries
1471      * or if CTB size is lower than 64
1472      */
1473     UWORD8 b1_valid_cu : 1;
1474 
1475     /** dummy bits */
1476     UWORD8 b6_reserved : 5;
1477 
1478 } intra32_analyse_t;
1479 
1480 /**
1481 ******************************************************************************
1482  *  @brief  IPE L0 analyze structure for L0 ME to do intra/inter CU decisions
1483  *          This is a CTB level structure encapsulating IPE modes, cost at all
1484  *          level. IPE also recommemds max intra CU sizes which is required
1485  *          by ME for CU size determination in intra dominant CTB
1486 ******************************************************************************
1487  */
1488 typedef struct
1489 {
1490     /** Best 64x64 intra modes for 32x32 transform.
1491      *  Insert 255 in the end to limit number of modes
1492      */
1493     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
1494 
1495     /** 32x32 children intra analyze for this 32x32    */
1496     intra32_analyse_t as_intra32_analyse[4];
1497 
1498     /* indicates if 64x64 is best CUs or 32x32 CUs      */
1499     UWORD8 u1_split_flag;
1500 
1501     /* CTB level best 8x8 intra costs  */
1502     WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB];
1503 
1504     /* CTB level best 16x16 intra costs */
1505     WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2];
1506 
1507     /* CTB level best 32x32 intra costs */
1508     WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4];
1509 
1510     /* best 64x64 intra cost */
1511     WORD32 i4_best64x64_intra_cost;
1512 
1513     /**
1514      * CTB level early intra / inter decision at 8x8 block level
1515      * 0 - invalid decision
1516      * 1 - eval intra only
1517      * 2 - eval inter only
1518      * 3 - eval both intra and inter
1519      */
1520     /* Z scan format */
1521     WORD8 ai1_early_intra_inter_decision[MAX_CU_IN_CTB];
1522 
1523     /*
1524     @L0 level
1525     4 => 0 - 32x32 TU in 64x64 CU
1526          1 - 16x16 TU in 64x64 CU
1527          2 - 8x8  TU in 64x64 CU
1528          3 - 64x64 CU
1529     2 => Intra/Inter */
1530     WORD32 i4_64x64_act_factor[4][2];
1531 
1532     /*
1533     @L0 level
1534     4 => num 32x32 in CTB
1535     3 => 0 - 32x32 TU in 64x64 CU
1536          1 - 16x16 TU in 64x64 CU
1537          2 - 8x8  TU in 64x64 CU
1538     2 => Intra/Inter */
1539     WORD32 i4_32x32_act_factor[4][3][2];
1540 
1541     /*
1542     @L0 level
1543     16 => num 16x16 in CTB
1544     2 => 0 - 16x16 TU in 64x64 CU
1545          1 - 8x8  TU in 64x64 CU
1546     2 => Intra/Inter */
1547     WORD32 i4_16x16_act_factor[16][2][2];
1548 
1549     WORD32 nodes_created_in_cu_tree;
1550 
1551     cur_ctb_cu_tree_t *ps_cu_tree_root;
1552 
1553     WORD32 ai4_8x8_act_factor[16];
1554     WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB];
1555     WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB];
1556     WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB];
1557     WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB];
1558 
1559     /*Ctb level accumalated satd*/
1560     WORD32 i4_ctb_acc_satd;
1561 
1562     /*Ctb level accumalated mpm bits*/
1563     WORD32 i4_ctb_acc_mpm_bits;
1564 
1565 } ipe_l0_ctb_analyse_for_me_t;
1566 
1567 typedef struct
1568 {
1569     WORD16 i2_mv_x;
1570     WORD16 i2_mv_y;
1571 } global_mv_t;
1572 
1573 /**
1574 ******************************************************************************
1575  *  @brief  Pre Encode pass and ME pass shared variables and buffers
1576 ******************************************************************************
1577  */
1578 typedef struct
1579 {
1580     /**
1581      * Buffer id
1582      */
1583     WORD32 i4_buf_id;
1584 
1585     /**
1586     * Flag will be set to 1 by frame processing thread after receiving flush
1587     * command from application
1588     */
1589     WORD32 i4_end_flag;
1590 
1591     /** frame leve ctb analyse  buffer pointer */
1592     ctb_analyse_t *ps_ctb_analyse;
1593 
1594     /** frame level cu analyse  buffer pointer for IPE */
1595     //cu_analyse_t       *ps_cu_analyse;
1596 
1597     /** current input pointer */
1598     ihevce_lap_enc_buf_t *ps_curr_inp;
1599 
1600     /** current inp buffer id */
1601     WORD32 curr_inp_buf_id;
1602 
1603     /** Slice header parameters   */
1604     slice_header_t s_slice_hdr;
1605 
1606     /** sps parameters activated by current slice  */
1607     sps_t *ps_sps;
1608 
1609     /** pps parameters activated by current slice  */
1610     pps_t *ps_pps;
1611 
1612     /** vps parameters activated by current slice  */
1613     vps_t *ps_vps;
1614     /**  Pointer to Penultilate Layer context memory internally has MV bank buff and related params */
1615     void *pv_me_lyr_ctxt;
1616 
1617     /**  Pointer to Penultilate Layer  NV bank context memory */
1618     void *pv_me_lyr_bnk_ctxt;
1619 
1620     /**  Pointer to Penultilate Layer MV bank buff */
1621     void *pv_me_mv_bank;
1622 
1623     /**  Pointer to Penultilate Layer reference idx buffer */
1624     void *pv_me_ref_idx;
1625     /**
1626      * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost)
1627      * The order of storing is raster scan order within CTB and
1628      * CTB order is raster scan within frame.
1629      */
1630     double *plf_intra_8x8_cost;
1631 
1632     /**
1633      * L0 layer ctb anaylse frame level buffer.
1634      * IPE wil populate the cost and best modes at all levels in this buffer
1635      *  for every CTB in a frame
1636      */
1637     // moved to shorter buffer queue
1638     //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
1639 
1640     /** Layer L1 buffer pointer */
1641     ihevce_ed_blk_t *ps_layer1_buf;
1642 
1643     /** Layer L2 buffer pointer */
1644     ihevce_ed_blk_t *ps_layer2_buf;
1645 
1646     /*ME reverse map info*/
1647     UWORD8 *pu1_me_reverse_map_info;
1648 
1649     /** Buffer pointer for CTB level information in pre intra pass*/
1650     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
1651 
1652     /* L0 cur 8x8 satd for QP mod*/
1653     ihevce_8x8_L0_satd_t *ps_layer0_cur_satd;
1654 
1655     /* L0 cur 8x8 mean for QP mod*/
1656     ihevce_8x8_L0_mean_t *ps_layer0_cur_mean;
1657 
1658     /** vps parameters activated by current slice  */
1659     sei_params_t s_sei;
1660 
1661     /** nal_type for the slice to be encoded  */
1662     WORD32 i4_slice_nal_type;
1663 
1664     /** input time stamp in terms of ticks: lower 32  */
1665     WORD32 i4_inp_timestamp_low;
1666 
1667     /** input time stamp in terms of ticks: higher 32 */
1668     WORD32 i4_inp_timestamp_high;
1669 
1670     /** input frame ctxt of app to be retured in output buffer */
1671     void *pv_app_frm_ctxt;
1672 
1673     /** current frm valid flag :
1674      * will be 1 if valid input was processed by frame proc thrd
1675      */
1676     WORD32 i4_frm_proc_valid_flag;
1677 
1678     /**
1679      * Qp to be used for current frame
1680      */
1681     WORD32 i4_curr_frm_qp;
1682 
1683     /**
1684      * Frame level Lambda parameters
1685      */
1686     frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES];
1687 
1688     /** Frame-levelSATDcost accumalator */
1689     LWORD64 i8_frame_acc_satd_cost;
1690 
1691     /** Frame - L1 coarse me cost accumulated */
1692     LWORD64 i8_acc_frame_coarse_me_cost;
1693     /** Frame - L1 coarse me cost accumulated */
1694     //LWORD64 i8_acc_frame_coarse_me_cost_for_ref;
1695 
1696     /** Frame - L1 coarse me sad accumulated */
1697     LWORD64 i8_acc_frame_coarse_me_sad;
1698 
1699     /* Averge activity of 4x4 blocks from previous frame
1700     *  If L1, maps to 8*8 in L0
1701     */
1702     WORD32 i4_curr_frame_4x4_avg_act;
1703 
1704     WORD32 ai4_mod_factor_derived_by_variance[2];
1705 
1706     float f_strength;
1707 
1708     /* Averge activity of 8x8 blocks from previous frame
1709     *  If L1, maps to 16*16 in L0
1710     */
1711 
1712     long double ld_curr_frame_8x8_log_avg[2];
1713 
1714     LWORD64 i8_curr_frame_8x8_avg_act[2];
1715 
1716     LWORD64 i8_curr_frame_8x8_sum_act[2];
1717 
1718     WORD32 i4_curr_frame_8x8_sum_act_for_strength[2];
1719 
1720     ULWORD64 u8_curr_frame_8x8_sum_act_sqr;
1721 
1722     WORD32 i4_curr_frame_8x8_num_blks[2];
1723 
1724     LWORD64 i8_acc_frame_8x8_sum_act[2];
1725     LWORD64 i8_acc_frame_8x8_sum_act_sqr;
1726     WORD32 i4_acc_frame_8x8_num_blks[2];
1727     LWORD64 i8_acc_frame_8x8_sum_act_for_strength;
1728     LWORD64 i8_curr_frame_8x8_sum_act_for_strength;
1729 
1730     /* Averge activity of 16x16 blocks from previous frame
1731     *  If L1, maps to 32*32 in L0
1732     */
1733 
1734     long double ld_curr_frame_16x16_log_avg[3];
1735 
1736     LWORD64 i8_curr_frame_16x16_avg_act[3];
1737 
1738     LWORD64 i8_curr_frame_16x16_sum_act[3];
1739 
1740     WORD32 i4_curr_frame_16x16_num_blks[3];
1741 
1742     LWORD64 i8_acc_frame_16x16_sum_act[3];
1743     WORD32 i4_acc_frame_16x16_num_blks[3];
1744 
1745     /* Averge activity of 32x32 blocks from previous frame
1746     *  If L1, maps to 64*64 in L0
1747     */
1748 
1749     long double ld_curr_frame_32x32_log_avg[3];
1750 
1751     LWORD64 i8_curr_frame_32x32_avg_act[3];
1752 
1753     global_mv_t s_global_mv[MAX_NUM_REF];
1754     LWORD64 i8_curr_frame_32x32_sum_act[3];
1755 
1756     WORD32 i4_curr_frame_32x32_num_blks[3];
1757 
1758     LWORD64 i8_acc_frame_32x32_sum_act[3];
1759     WORD32 i4_acc_frame_32x32_num_blks[3];
1760 
1761     LWORD64 i8_acc_num_blks_high_sad;
1762 
1763     LWORD64 i8_total_blks;
1764 
1765     WORD32 i4_complexity_percentage;
1766 
1767     WORD32 i4_is_high_complex_region;
1768 
1769     WORD32 i4_avg_noise_thrshld_4x4;
1770 
1771     LWORD64 i8_curr_frame_mean_sum;
1772     WORD32 i4_curr_frame_mean_num_blks;
1773     LWORD64 i8_curr_frame_avg_mean_act;
1774 
1775 } pre_enc_me_ctxt_t;
1776 
1777 /**
1778 ******************************************************************************
1779  *  @brief  buffers from L0 IPE to ME and enc loop
1780 ******************************************************************************
1781  */
1782 typedef struct
1783 {
1784     WORD32 i4_size;
1785 
1786     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
1787 } pre_enc_L0_ipe_encloop_ctxt_t;
1788 /**
1789 ******************************************************************************
1790  *  @brief  Frame process and Entropy coding pass shared variables and buffers
1791 ******************************************************************************
1792  */
1793 
1794 typedef struct
1795 {
1796     /*PIC level Info*/
1797     ULWORD64 i8_total_cu;
1798     ULWORD64 i8_total_cu_min_8x8;
1799     ULWORD64 i8_total_pu;
1800     ULWORD64 i8_total_intra_cu;
1801     ULWORD64 i8_total_inter_cu;
1802     ULWORD64 i8_total_skip_cu;
1803     ULWORD64 i8_total_cu_based_on_size[4];
1804 
1805     ULWORD64 i8_total_intra_pu;
1806     ULWORD64 i8_total_merge_pu;
1807     ULWORD64 i8_total_non_skipped_inter_pu;
1808 
1809     ULWORD64 i8_total_2nx2n_intra_pu[4];
1810     ULWORD64 i8_total_nxn_intra_pu;
1811     ULWORD64 i8_total_2nx2n_inter_pu[4];
1812     ULWORD64 i8_total_smp_inter_pu[4];
1813     ULWORD64 i8_total_amp_inter_pu[3];
1814     ULWORD64 i8_total_nxn_inter_pu[3];
1815 
1816     ULWORD64 i8_total_L0_mode;
1817     ULWORD64 i8_total_L1_mode;
1818     ULWORD64 i8_total_BI_mode;
1819 
1820     ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE];
1821     ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE];
1822 
1823     ULWORD64 i8_total_tu;
1824     ULWORD64 i8_total_non_coded_tu;
1825     ULWORD64 i8_total_inter_coded_tu;
1826     ULWORD64 i8_total_intra_coded_tu;
1827 
1828     ULWORD64 i8_total_tu_based_on_size[4];
1829     ULWORD64 i8_total_tu_cu64[4];
1830     ULWORD64 i8_total_tu_cu32[4];
1831     ULWORD64 i8_total_tu_cu16[3];
1832     ULWORD64 i8_total_tu_cu8[2];
1833 
1834     LWORD64 i8_total_qp;
1835     LWORD64 i8_total_qp_min_cu;
1836     WORD32 i4_min_qp;
1837     WORD32 i4_max_qp;
1838     LWORD64 i8_sum_squared_frame_qp;
1839     LWORD64 i8_total_frame_qp;
1840     WORD32 i4_max_frame_qp;
1841     float f_total_buffer_underflow;
1842     float f_total_buffer_overflow;
1843     float f_max_buffer_underflow;
1844     float f_max_buffer_overflow;
1845 
1846     UWORD8 i1_num_ref_idx_l0_active;
1847     UWORD8 i1_num_ref_idx_l1_active;
1848 
1849     WORD32 i4_ref_poc_l0[MAX_DPB_SIZE];
1850     WORD32 i4_ref_poc_l1[MAX_DPB_SIZE];
1851 
1852     WORD8 i1_list_entry_l0[MAX_DPB_SIZE];
1853     DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE];
1854     WORD16 i2_luma_offset_l0[MAX_DPB_SIZE];
1855     WORD8 i1_list_entry_l1[MAX_DPB_SIZE];
1856     DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE];
1857     WORD16 i2_luma_offset_l1[MAX_DPB_SIZE];
1858 
1859     ULWORD64 u8_bits_estimated_intra;
1860     ULWORD64 u8_bits_estimated_inter;
1861     ULWORD64 u8_bits_estimated_slice_header;
1862     ULWORD64 u8_bits_estimated_sao;
1863     ULWORD64 u8_bits_estimated_split_cu_flag;
1864     ULWORD64 u8_bits_estimated_cu_hdr_bits;
1865     ULWORD64 u8_bits_estimated_split_tu_flag;
1866     ULWORD64 u8_bits_estimated_qp_delta_bits;
1867     ULWORD64 u8_bits_estimated_cbf_luma_bits;
1868     ULWORD64 u8_bits_estimated_cbf_chroma_bits;
1869 
1870     ULWORD64 u8_bits_estimated_res_luma_bits;
1871     ULWORD64 u8_bits_estimated_res_chroma_bits;
1872 
1873     ULWORD64 u8_bits_estimated_ref_id;
1874     ULWORD64 u8_bits_estimated_mvd;
1875     ULWORD64 u8_bits_estimated_merge_flag;
1876     ULWORD64 u8_bits_estimated_mpm_luma;
1877     ULWORD64 u8_bits_estimated_mpm_chroma;
1878 
1879     ULWORD64 u8_total_bits_generated;
1880     ULWORD64 u8_total_bits_vbv;
1881 
1882     ULWORD64 u8_total_I_bits_generated;
1883     ULWORD64 u8_total_P_bits_generated;
1884     ULWORD64 u8_total_B_bits_generated;
1885 
1886     UWORD32 u4_frame_sad;
1887     UWORD32 u4_frame_intra_sad;
1888     UWORD32 u4_frame_inter_sad;
1889 
1890     ULWORD64 i8_frame_cost;
1891     ULWORD64 i8_frame_intra_cost;
1892     ULWORD64 i8_frame_inter_cost;
1893 } s_pic_level_acc_info_t;
1894 
1895 typedef struct
1896 {
1897     UWORD32 u4_target_bit_rate_sei_entropy;
1898     UWORD32 u4_buffer_size_sei_entropy;
1899     UWORD32 u4_dbf_entropy;
1900 
1901 } s_pic_level_sei_info_t;
1902 /**
1903 ******************************************************************************
1904 *  @brief  ME pass and Main enocde pass shared variables and buffers
1905 ******************************************************************************
1906 */
1907 typedef struct
1908 {
1909     /**
1910     * Buffer id
1911     */
1912     WORD32 i4_buf_id;
1913 
1914     /**
1915     * Flag will be set to 1 by frame processing thread after receiving flush
1916     * command from application
1917     */
1918     WORD32 i4_end_flag;
1919 
1920     /** current input pointer */
1921     ihevce_lap_enc_buf_t *ps_curr_inp;
1922 
1923     /** current inp buffer id */
1924     WORD32 curr_inp_buf_id;
1925 
1926     /** current input buffers from ME */
1927     pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms;
1928 
1929     /** current inp buffer id from ME */
1930     WORD32 curr_inp_from_me_buf_id;
1931 
1932     /** current input buffers from L0 IPE */
1933     pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms;
1934 
1935     /** current inp buffer id from L0 IPE */
1936     WORD32 curr_inp_from_l0_ipe_buf_id;
1937 
1938     /** Slice header parameters   */
1939     slice_header_t s_slice_hdr;
1940 
1941     /** current frm valid flag :
1942      * will be 1 if valid input was processed by frame proc thrd
1943      */
1944     WORD32 i4_frm_proc_valid_flag;
1945 
1946     /**
1947      * Array of reference picture list for ping instance
1948      * 2=> ref_pic_list0 and ref_pic_list1
1949      */
1950     recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
1951 
1952     /**
1953      * Array of reference picture list
1954      * 2=> ref_pic_list0 and ref_pic_list1
1955      */
1956     recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
1957 
1958     /**  Job Queue Memory encode */
1959     job_queue_t *ps_job_q_enc;
1960 
1961     /** Array of Job Queue handles of enc group for ping and pong instance*/
1962     job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES];
1963 
1964     /** Array of Job Queue handles of enc group for re-encode*/
1965     job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES];
1966     /** frame level me_ctb_data_t buffer pointer
1967       */
1968     me_ctb_data_t *ps_cur_ctb_me_data;
1969 
1970     /** frame level cur_ctb_cu_tree_t buffer pointer for ME
1971       */
1972     cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree;
1973 
1974     /** Pointer to Dep. Mngr for CTBs processed in every row of a frame.
1975      * ME is producer, EncLoop is the consumer
1976      */
1977     void *pv_dep_mngr_encloop_dep_me;
1978 
1979 } me_enc_rdopt_ctxt_t;
1980 
1981 typedef struct
1982 {
1983     UWORD32 u4_payload_type;
1984     UWORD32 u4_payload_length;
1985     UWORD8 *pu1_sei_payload;
1986 } sei_payload_t;
1987 
1988 typedef struct
1989 {
1990     /**
1991     * Flag will be set to 1 by frame processing thread after receiving flush
1992     * command from application
1993     */
1994     WORD32 i4_end_flag;
1995 
1996     /** frame level ctb allocation for ctb after aligning to max cu size */
1997     ctb_enc_loop_out_t *ps_frm_ctb_data;
1998 
1999     /** frame level cu allocation for ctb after aligning to max cu size  */
2000     cu_enc_loop_out_t *ps_frm_cu_data;
2001 
2002     /** frame level tu allocation for ctb after aligning to max cu size  */
2003     tu_enc_loop_out_t *ps_frm_tu_data;
2004 
2005     /** frame level pu allocation for ctb after aligning to max cu size  */
2006     pu_t *ps_frm_pu_data;
2007 
2008     /**  frame level coeff allocation for ctb after aligning to max cu size */
2009     void *pv_coeff_data;
2010 
2011     /** Slice header parameters   */
2012     slice_header_t s_slice_hdr;
2013 
2014     /** sps parameters activated by current slice  */
2015     sps_t *ps_sps;
2016 
2017     /** pps parameters activated by current slice  */
2018     pps_t *ps_pps;
2019 
2020     /** vps parameters activated by current slice  */
2021     vps_t *ps_vps;
2022 
2023     /** vps parameters activated by current slice  */
2024     sei_params_t s_sei;
2025 
2026     /* Flag to indicate if AUD NAL is present */
2027     WORD8 i1_aud_present_flag;
2028 
2029     /* Flag to indicate if EOS NAL is present */
2030     WORD8 i1_eos_present_flag;
2031 
2032     /** nal_type for the slice to be encoded  */
2033     WORD32 i4_slice_nal_type;
2034 
2035     /** input time stamp in terms of ticks: lower 32  */
2036     WORD32 i4_inp_timestamp_low;
2037 
2038     /** input time stamp in terms of ticks: higher 32 */
2039     WORD32 i4_inp_timestamp_high;
2040 
2041     /** input frame ctxt of app to be retured in output buffer */
2042     void *pv_app_frm_ctxt;
2043 
2044     /** current frm valid flag :
2045      * will be 1 if valid input was processed by frame proc thrd
2046      */
2047     WORD32 i4_frm_proc_valid_flag;
2048 
2049     /** To support entropy sync the bitstream offset of each CTB row
2050      * is populated in this array any put in slice header in the end
2051      */
2052     WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM];
2053 
2054     /** RDopt estimation of bytes generated based on which rc update happens
2055      *
2056      */
2057     WORD32 i4_rdopt_bits_generated_estimate;
2058 
2059     /* These params are passed from enc-threads to entropy thread for
2060         passing params needed for PSNR caclulation and encoding
2061         summary prints */
2062     DOUBLE lf_luma_mse;
2063     DOUBLE lf_cb_mse;
2064     DOUBLE lf_cr_mse;
2065 
2066     DOUBLE lf_luma_ssim;
2067     DOUBLE lf_cb_ssim;
2068     DOUBLE lf_cr_ssim;
2069 
2070     WORD32 i4_qp;
2071     WORD32 i4_poc;
2072     WORD32 i4_display_num;
2073     WORD32 i4_pic_type;
2074 
2075     /** I-only SCD */
2076     WORD32 i4_is_I_scenecut;
2077 
2078     WORD32 i4_is_non_I_scenecut;
2079     WORD32 i4_sub_pic_level_rc;
2080 
2081     WORD32 ai4_frame_bits_estimated;
2082     s_pic_level_acc_info_t s_pic_level_info;
2083 
2084     LWORD64 i8_buf_level_bitrate_change;
2085 
2086     WORD32 i4_is_end_of_idr_gop;
2087 
2088     sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD];
2089 
2090     UWORD32 u4_num_sei_payload;
2091     /* Flag used only in mres single output case to flush out one res and start with next */
2092     WORD32 i4_out_flush_flag;
2093 
2094 } frm_proc_ent_cod_ctxt_t;
2095 
2096 /**
2097 ******************************************************************************
2098 *  @brief  ME pass and Main enocde pass shared variables and buffers
2099 ******************************************************************************
2100 */
2101 typedef struct
2102 {
2103     /*BitRate ID*/
2104     WORD32 i4_br_id;
2105 
2106     /*Frame ID*/
2107     WORD32 i4_frm_id;
2108 
2109     /*Number of CTB, after ich data is populated*/
2110     WORD32 i4_ctb_count_in_data;
2111 
2112     /*Number of CTB, after ich scale is computed*/
2113     WORD32 i4_ctb_count_out_scale;
2114 
2115     /*Bits estimated for the frame */
2116     /* For NON-I SCD max buf bits*/
2117     LWORD64 i8_frame_bits_estimated;
2118 
2119     /* Bits consumed till the nctb*/
2120     LWORD64 i8_nctb_bits_consumed;
2121 
2122     /* Bits consumed till the nctb*/
2123     LWORD64 i8_acc_bits_consumed;
2124 
2125     /*Frame level Best of Ipe and ME sad*/
2126     LWORD64 i8_frame_l1_me_sad;
2127 
2128     /*SAD accumalted till NCTB*/
2129     LWORD64 i8_nctb_l1_me_sad;
2130 
2131     /*Frame level IPE sad*/
2132     LWORD64 i8_frame_l1_ipe_sad;
2133 
2134     /*SAD accumalted till NCTB*/
2135     LWORD64 i8_nctb_l1_ipe_sad;
2136 
2137     /*Frame level L0 IPE satd*/
2138     LWORD64 i8_frame_l0_ipe_satd;
2139 
2140     /*L0 SATD accumalted till NCTB*/
2141     LWORD64 i8_nctb_l0_ipe_satd;
2142 
2143     /*Frame level Activity factor acc at 8x8 level */
2144     LWORD64 i8_frame_l1_activity_fact;
2145 
2146     /*NCTB Activity factor acc at 8x8 level */
2147     LWORD64 i8_nctb_l1_activity_fact;
2148 
2149     /*L0 MPM bits accumalted till NCTB*/
2150     LWORD64 i8_nctb_l0_mpm_bits;
2151 
2152     /*Encoder hdr accumalted till NCTB*/
2153     LWORD64 i8_nctb_hdr_bits_consumed;
2154 
2155 } ihevce_sub_pic_rc_ctxt_t;
2156 
2157 /**
2158 ******************************************************************************
2159  *  @brief  Memoery manager context (stores the memory tables allcoated)
2160 ******************************************************************************
2161  */
2162 typedef struct
2163 {
2164     /**
2165     * Total number of memtabs (Modules and system)
2166     * during create time
2167     */
2168     WORD32 i4_num_create_memtabs;
2169 
2170     /**
2171     * Pointer to the mem tabs
2172     * of crate time
2173     */
2174     iv_mem_rec_t *ps_create_memtab;
2175 
2176     /**
2177     * Total number of memtabs Data and control Ques
2178     * during Ques create time
2179     */
2180     WORD32 i4_num_q_memtabs;
2181 
2182     /**
2183     * Pointer to the mem tabs
2184     * of crate time
2185     */
2186     iv_mem_rec_t *ps_q_memtab;
2187 
2188 } enc_mem_mngr_ctxt;
2189 
2190 /**
2191 ******************************************************************************
2192  *  @brief  Encoder Interafce Queues Context
2193 ******************************************************************************
2194  */
2195 typedef struct
2196 {
2197     /** Number of Queues at interface context level */
2198     WORD32 i4_num_queues;
2199 
2200     /** Array of Queues handle */
2201     void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES];
2202 
2203     /** Mutex for encuring thread safety of the access of the queues */
2204     void *pv_q_mutex_hdl;
2205 
2206 } enc_q_ctxt_t;
2207 
2208 /**
2209 ******************************************************************************
2210  *  @brief  Module context of different modules in encoder
2211 ******************************************************************************
2212  */
2213 
2214 typedef struct
2215 {
2216     /** Motion estimation context pointer */
2217     void *pv_me_ctxt;
2218     /** Coarse Motion estimation context pointer */
2219     void *pv_coarse_me_ctxt;
2220 
2221     /** Intra Prediction context pointer */
2222     void *pv_ipe_ctxt;
2223 
2224     /** Encode Loop context pointer */
2225     void *pv_enc_loop_ctxt;
2226 
2227     /** Entropy Coding context pointer */
2228     void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES];
2229 
2230     /** Look Ahead Processing context pointer */
2231     void *pv_lap_ctxt;
2232     /** Rate control context pointer */
2233     void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES];
2234     /** Decomposition pre intra context pointer */
2235     void *pv_decomp_pre_intra_ctxt;
2236 
2237 } module_ctxt_t;
2238 
2239 /**
2240 ******************************************************************************
2241  *  @brief  Threads semaphore handles
2242 ******************************************************************************
2243  */
2244 typedef struct
2245 {
2246     /** LAP semaphore handle */
2247     void *pv_lap_sem_handle;
2248 
2249     /** Encode frame Process semaphore handle */
2250     void *pv_enc_frm_proc_sem_handle;
2251 
2252     /** Pre Encode frame Process semaphore handle */
2253     void *pv_pre_enc_frm_proc_sem_handle;
2254 
2255     /** Entropy coding semaphore handle
2256         One semaphore for each entropy thread, i.e. for each bit-rate instance*/
2257     void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2258 
2259     /**
2260      *  Semaphore handle corresponding to get free inp frame buff
2261      *  function call from app if called in blocking mode
2262      */
2263     void *pv_inp_data_sem_handle;
2264 
2265     /**
2266      *  Semaphore handle corresponding to get free inp control command buff
2267      *  function call from app if called in blocking mode
2268      */
2269     void *pv_inp_ctrl_sem_handle;
2270 
2271     /**
2272      *  Semaphore handle corresponding to get filled out bitstream buff
2273      *  function call from app if called in blocking mode
2274      */
2275     void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2276 
2277     /**
2278      *  Semaphore handle corresponding to get filled out recon buff
2279      *  function call from app if called in blocking mode
2280      */
2281     void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2282 
2283     /**
2284      *  Semaphore handle corresponding to get filled out control status buff
2285      *  function call from app if called in blocking mode
2286      */
2287     void *pv_out_ctrl_sem_handle;
2288 
2289     /**
2290      *  Semaphore handle corresponding to get filled out control status buff
2291      *  function call from app if called in blocking mode
2292      */
2293     void *pv_lap_inp_data_sem_hdl;
2294 
2295     /**
2296      *  Semaphore handle corresponding to get filled out control status buff
2297      *  function call from app if called in blocking mode
2298      */
2299     void *pv_preenc_inp_data_sem_hdl;
2300 
2301     /**
2302      *  Semaphore handle corresponding to Multi Res Single output case
2303      */
2304     void *pv_ent_common_mres_sem_hdl;
2305     void *pv_out_common_mres_sem_hdl;
2306 
2307 } thrd_que_sem_hdl_t;
2308 
2309 /**
2310 ******************************************************************************
2311  *  @brief  Frame level structure which has parameters about CTBs
2312 ******************************************************************************
2313  */
2314 typedef struct
2315 {
2316     /** CTB size of all CTB in a frame in pixels
2317      *  this will be create time value,
2318      *  run time change in this value is not supported
2319      */
2320     WORD32 i4_ctb_size;
2321 
2322     /** Minimum CU size of CTB in a frame in pixels
2323      *  this will be create time value,
2324      *  run time change in this value is not supported
2325      */
2326     WORD32 i4_min_cu_size;
2327 
2328     /** Worst case num CUs in CTB based on i4_ctb_size */
2329     WORD32 i4_num_cus_in_ctb;
2330 
2331     /** Worst case num PUs in CTB based on i4_ctb_size */
2332     WORD32 i4_num_pus_in_ctb;
2333 
2334     /** Worst case num TUs in CTB based on i4_ctb_size */
2335     WORD32 i4_num_tus_in_ctb;
2336 
2337     /** Number of CTBs in horizontal direction
2338       * this is based on run time source width and i4_ctb_size
2339       */
2340     WORD32 i4_num_ctbs_horz;
2341 
2342     /** Number of CTBs in vertical direction
2343      *  this is based on run time source height and i4_ctb_size
2344      */
2345     WORD32 i4_num_ctbs_vert;
2346 
2347     /** MAX CUs in horizontal direction
2348      * this is based on run time source width, i4_ctb_size and  i4_num_cus_in_ctb
2349      */
2350     WORD32 i4_max_cus_in_row;
2351 
2352     /** MAX PUs in horizontal direction
2353      * this is based on run time source width, i4_ctb_size and  i4_num_pus_in_ctb
2354      */
2355     WORD32 i4_max_pus_in_row;
2356 
2357     /** MAX TUs in horizontal direction
2358      * this is based on run time source width, i4_ctb_size and  i4_num_tus_in_ctb
2359      */
2360     WORD32 i4_max_tus_in_row;
2361 
2362     /**
2363      * CU aligned picture width (currently aligned to MAX CU size)
2364      * should be modified to be aligned to MIN CU size
2365      */
2366 
2367     WORD32 i4_cu_aligned_pic_wd;
2368 
2369     /**
2370      * CU aligned picture height (currently aligned to MAX CU size)
2371      * should be modified to be aligned to MIN CU size
2372      */
2373 
2374     WORD32 i4_cu_aligned_pic_ht;
2375 
2376     /* Pointer to a frame level memory,
2377     Stride is = 1 + (num ctbs in a ctb-row) + 1
2378     Hieght is = 1 + (num ctbs in a ctb-col)
2379     Contains tile-id of each ctb */
2380     WORD32 *pi4_tile_id_map;
2381 
2382     /* stride in units of ctb */
2383     WORD32 i4_tile_id_ctb_map_stride;
2384 
2385 } frm_ctb_ctxt_t;
2386 
2387 /**
2388 ******************************************************************************
2389  *  @brief  ME Job Queue desc
2390 ******************************************************************************
2391  */
2392 typedef struct
2393 {
2394     /** Number of output dependencies which need to be set after
2395      *  current job is complete,
2396      *  should be less than or equal to MAX_OUT_DEP defined in
2397      *  ihevce_multi_thrd_structs.h
2398      */
2399     WORD32 i4_num_output_dep;
2400 
2401     /** Array of offsets from the start of output dependent layer's Job Ques
2402      *  which are dependent on current Job to be complete
2403      */
2404     WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP];
2405 
2406     /** Number of input dependencies to be resolved for current job to start
2407      *  these many jobs in lower layer should be complete to
2408      *  start the current JOB
2409      */
2410     WORD32 i4_num_inp_dep;
2411 
2412 } multi_thrd_me_job_q_prms_t;
2413 
2414 /**
2415  *  @brief  structure in which recon data
2416  *          and related parameters are sent from Encoder
2417  */
2418 typedef struct
2419 {
2420     /** Kept for maintaining backwards compatibility in future */
2421     WORD32 i4_size;
2422 
2423     /** Buffer id for the current buffer */
2424     WORD32 i4_buf_id;
2425 
2426     /** POC of the current buffer */
2427     WORD32 i4_poc;
2428 
2429     /** End flag to communicate this is last frame output from encoder */
2430     WORD32 i4_end_flag;
2431 
2432     /** End flag to communicate encoder that this is the last buffer from application
2433         1 - Last buf, 0 - Not last buffer. No other values are supported.
2434         Application has to set the appropriate value before queing in encoder queue */
2435 
2436     WORD32 i4_is_last_buf;
2437 
2438     /** Recon luma buffer pointer */
2439     void *pv_y_buf;
2440 
2441     /** Recon cb buffer pointer */
2442     void *pv_cb_buf;
2443 
2444     /** Recon cr buffer pointer */
2445     void *pv_cr_buf;
2446 
2447     /** Luma size **/
2448     WORD32 i4_y_pixels;
2449 
2450     /** Chroma size **/
2451     WORD32 i4_uv_pixels;
2452 
2453 } iv_enc_recon_data_buffs_t;
2454 
2455 /**
2456 ******************************************************************************
2457  *  @brief  Multi Thread context structure
2458 ******************************************************************************
2459  */
2460 typedef struct
2461 {
2462     /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/
2463     WORD32 i4_force_end_flag;
2464 
2465     /** Force all active threads flag
2466       * This flag will be set to 1 if all Number of cores givento the encoder
2467       * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode
2468       * All pre enc threads and enc threads will run of the same cores with
2469       * time sharing ar frame level
2470       */
2471     WORD32 i4_all_thrds_active_flag;
2472 
2473     /** Flag to indicate that core manager has been configured to enable
2474      * sequential execution
2475      */
2476     WORD32 i4_seq_mode_enabled_flag;
2477     /*-----------------------------------------------------------------------*/
2478     /*--------- Params related to encode group  -----------------------------*/
2479     /*-----------------------------------------------------------------------*/
2480 
2481     /** Number of processing threads created runtime in encode group */
2482     WORD32 i4_num_enc_proc_thrds;
2483 
2484     /** Number of processing threads active for a given frame
2485      * This value will be monitored at frame level, so as to
2486      * have provsion for increasing / decreasing threads
2487      * based on Load balance b/w stage in encoder
2488      */
2489     WORD32 i4_num_active_enc_thrds;
2490     /**  Job Queue Memory encode */
2491     job_queue_t *ps_job_q_enc[PING_PONG_BUF];
2492 
2493     /** Array of Job Queue handles of enc group for ping and pong instance*/
2494     job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES][PING_PONG_BUF];
2495 
2496     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
2497     void *pv_job_q_mutex_hdl_enc_grp_me;
2498 
2499     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
2500     void *pv_job_q_mutex_hdl_enc_grp_enc_loop;
2501 
2502     /** Array of Semaphore handles (for each frame processing threads ) */
2503     void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
2504 
2505     /** Array for communcating start processing from master thread to indivisual
2506     *   threads in Enocde group of threads
2507     *  till 0 : wait
2508     *  1  : start
2509     * After reading the start signal, corresponding thread hould reset it to 0
2510     */
2511     WORD32 ai4_enc_frm_proc_start[MAX_NUM_FRM_PROC_THRDS_ENC];
2512 
2513     /** Note: For Enc loop pass similar memory is used whihc is part of frm_proc_ent_cod_ctxt_t
2514     *  for Row level Sync hence not explicitly declared here
2515     */
2516 
2517     /** Array for ME to export the Job que dependency for all layers */
2518     multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM];
2519 
2520     /* pointer to the mutex handle*/
2521     void *apv_mutex_handle[MAX_NUM_ME_PARALLEL];
2522 
2523     /* pointer to the mutex handle for frame init*/
2524     void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL];
2525 
2526     /* pointer to the mutex handle for frame init*/
2527     void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
2528 
2529     /*pointer to the mutex handle*/
2530     void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL];
2531 
2532     /* Flag to indicate that master has done ME init*/
2533     WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL];
2534 
2535     /* Counter to keep track of me num of thrds exiting critical section*/
2536     WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL];
2537 
2538     /* Flag to indicate that master has done the frame init*/
2539     WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
2540 
2541     /* Counter to keep track of num of thrds exiting critical section*/
2542     WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL];
2543 
2544     /* Counter to keep track of num of thrds exiting critical section for re-encode*/
2545     WORD32 num_thrds_exited_for_reenc;
2546 
2547     /* Array to store the curr qp for ping and pong instance*/
2548     WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2549 
2550     /* Pointers to store output buffers for ping and pong instance*/
2551     frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2552 
2553     /* Pointer to store input buffers for me*/
2554     pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL];
2555 
2556     /*pointers to store output buffers from me */
2557     me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS];
2558 
2559     /*pointers to store input buffers to enc-rdopt */
2560     me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS];
2561 
2562     /*Shared memory for Sub Pic rc */
2563     /*Qscale calulated by sub pic rc bit control for Intra Pic*/
2564     WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2565 
2566     /*Header bits error by sub pic rc bit control*/
2567     float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2568 
2569     /*Accumalated ME SAD for NCTB*/
2570     LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2571                            [MAX_NUM_FRM_PROC_THRDS_ENC];
2572 
2573     /*Accumalated IPE SAD for NCTB*/
2574     LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2575                             [MAX_NUM_FRM_PROC_THRDS_ENC];
2576 
2577     /*Accumalated L0 IPE SAD for NCTB*/
2578     LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2579                                [MAX_NUM_FRM_PROC_THRDS_ENC];
2580 
2581     /*Accumalated Activity Factor for NCTB*/
2582     LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2583                                [MAX_NUM_FRM_PROC_THRDS_ENC];
2584 
2585     /*Accumalated Ctb counter across all threads*/
2586     WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2587 
2588     /*Bits threshold reached for across all threads*/
2589     WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2590 
2591     /*To hold the Previous In-frame RC chunk QP*/
2592     WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2593 
2594     /*Accumalated Ctb counter across all threads*/
2595     WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2596 
2597     /*Flag to check if thread is initialized */
2598     WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2599                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
2600 
2601     /*Accumalated Ctb counter across all threads*/
2602     //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC];
2603 
2604     /*Accumalated bits consumed for nctbs across all threads*/
2605     LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2606                                   [MAX_NUM_FRM_PROC_THRDS_ENC];
2607 
2608     /*Accumalated hdr bits consumed for nctbs across all threads*/
2609     LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2610                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
2611 
2612     /*Accumalated l0 mpm bits consumed for nctbs across all threads*/
2613     LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2614                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
2615 
2616     /*Accumalated bits consumed for total ctbs across all threads*/
2617     LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2618                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
2619 
2620     /*Accumalated bits consumed for total ctbs across all threads*/
2621     LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2622                                         [MAX_NUM_FRM_PROC_THRDS_ENC];
2623 
2624     /*Qscale calulated by sub pic rc bit control */
2625     WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2626     /* End of Sub pic rc variables */
2627 
2628     /* Pointers to store input (only L0 IPE)*/
2629     pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL];
2630 
2631     /** Slice header parameters   */
2632     /** temporarily store the slice header parameters in enc-loop thread
2633     which will be copied to curr_out when buffer is aquired */
2634     //slice_header_t      as_slice_hdr[PING_PONG_BUF];
2635 
2636     /* Array to store input buffer ids for ping and pong instances*/
2637     //WORD32 in_buf_id[PING_PONG_BUF];
2638 
2639     /* Array tp store L0 IPE input buf ids*/
2640     WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL];
2641 
2642     /* Array to store output buffer ids for ping and pong instances*/
2643     WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
2644 
2645     /* Variable to indicate ping and pong instance for each thread*/
2646     WORD32 ping_pong[MAX_NUM_FRM_PROC_THRDS_ENC];
2647 
2648     /* Array of pointers to store the recon buf pointers*/
2649     iv_enc_recon_data_buffs_t
2650         *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
2651 
2652     /* Array of pointers to frame recon for ping and pong instances*/
2653     recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];
2654 
2655     /* Array of recon buffer ids for ping and pong instance*/
2656     WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
2657 
2658     /* End flag to signal end of all the frames in me*/
2659     WORD32 me_end_flag;
2660 
2661     /* End flag to signal end of all the frames in enc*/
2662     WORD32 enc_end_flag;
2663 
2664     /* Counter to keep track of num thrds done*/
2665     WORD32 num_thrds_done;
2666 
2667     /* Flags to keep track of dumped ping pong recon buffer*/
2668     WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
2669 
2670     /* Flags to keep track of dumped ping pong output buffer*/
2671     WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
2672 
2673     /* flag to produce output buffer by the thread who ever is finishing
2674     enc-loop processing first, so that the entropy thread can start processing */
2675     WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2676 
2677     /* Flags to keep track of dumped ping pong input buffer*/
2678     WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
2679 
2680     /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/
2681     WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
2682 
2683     /** Dependency manager for checking whether prev. EncLoop done before
2684         current frame EncLoop starts */
2685     void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL];
2686 
2687     /** Dependency manager for checking whether prev. EncLoop done before
2688         re-encode of the current frame */
2689     void *pv_dep_mngr_prev_frame_enc_done_for_reenc;
2690 
2691     /** Dependency manager for checking whether prev. me done before
2692         current frame me starts */
2693     void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL];
2694 
2695     /** ME coarsest layer JOB queue type */
2696     WORD32 i4_me_coarsest_lyr_type;
2697 
2698     /** number of encloop frames running in parallel */
2699     WORD32 i4_num_enc_loop_frm_pllel;
2700 
2701     /** number of me frames running in parallel */
2702     WORD32 i4_num_me_frm_pllel;
2703 
2704     /*-----------------------------------------------------------------------*/
2705     /*--------- Params related to pre-enc stage -----------------------------*/
2706     /*-----------------------------------------------------------------------*/
2707 
2708     /** Number of processing threads created runtime in pre encode group */
2709     WORD32 i4_num_pre_enc_proc_thrds;
2710 
2711     /** Number of processing threads active for a given frame
2712      * This value will be monitored at frame level, so as to
2713      * have provsion for increasing / decreasing threads
2714      * based on Load balance b/w stage in encoder
2715      */
2716     WORD32 i4_num_active_pre_enc_thrds;
2717     /** number of threads that have done processing the current frame
2718         Use to find out the last thread that is coming out of pre-enc processing
2719         so that the last thread can do de-init of pre-enc stage */
2720     WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2721 
2722     /** number of threads that have done processing the current frame
2723         Use to find out the first thread and last inoder to get qp query. As the query
2724         is not read only , the quer should be done only once by thread that comes first
2725         and other threads should get same value*/
2726     WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2727 
2728     /** number of threads that have done proessing decomp_intra
2729         Used to find out the last thread that is coming out so that
2730         the last thread can set flag for decomp_pre_intra_finish */
2731     WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2732 
2733     /** number of threads that have done proessing coarse_me
2734         Used to find out the last thread that is coming out so that
2735         the last thread can set flag for coarse_me_finish */
2736     WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2737 
2738     /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done.
2739       Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */
2740     WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2741 
2742     /** Dependency manager for checking whether prev. frame decomp_intra
2743         done before current frame  decomp_intra starts */
2744     void *pv_dep_mngr_prev_frame_pre_enc_l1;
2745 
2746     /** Dependency manager for checking whether prev. frame L0 IPE done before
2747         current frame L0 IPE starts */
2748     void *pv_dep_mngr_prev_frame_pre_enc_l0;
2749 
2750     /** Dependency manager for checking whether prev. frame coarse_me done before
2751         current frame coarse_me starts */
2752     void *pv_dep_mngr_prev_frame_pre_enc_coarse_me;
2753 
2754     /** flag to indicate if pre_enc_init is done for current frame */
2755     WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2756 
2757     /** flag to indicate if pre_enc_hme_init is done for current frame */
2758     WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2759 
2760     /** flag to indicate if pre_enc_deinit is done for current frame */
2761     WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2762 
2763     /** Flag to indicate the end of processing when all the frames are done processing */
2764     WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2765 
2766     /** Flag to indicate the control blocking mode indicating input command to pre-enc
2767     group should be blocking or unblocking */
2768     WORD32 i4_ctrl_blocking_mode;
2769 
2770     /** Current input pointer */
2771     ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2772 
2773     WORD32 i4_last_inp_buf;
2774 
2775     /* buffer id for input buffer */
2776     WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2777 
2778     /** Current output pointer */
2779     pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2780 
2781     /*Current L0 IPE to enc output pointer */
2782     pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc;
2783 
2784     /** buffer id for output buffer */
2785     WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2786 
2787     /** buffer id for L0 IPE enc buffer*/
2788     WORD32 i4_L0_IPE_out_buf_id;
2789 
2790     /** current frame recon pointer */
2791     recon_pic_buf_t *aps_frm_recon_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2792 
2793     /** Current picture Qp */
2794     WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2795 
2796     /** Decomp layer buffers indicies */
2797     WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2798 
2799     /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done,
2800       an array of MAX_PRE_ENC_STAGGER might not be required*/
2801     WORD32 i4_qp_update_l0_ipe;
2802 
2803     /** Current picture encoded is the last picture to be encoded flag */
2804     WORD32 i4_last_pic_flag;
2805 
2806     /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */
2807     void *pv_job_q_mutex_hdl_pre_enc_decomp;
2808 
2809     /** Mutex for ensuring thread safety of the access of Job queues in HME group */
2810     void *pv_job_q_mutex_hdl_pre_enc_hme;
2811 
2812     /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */
2813     void *pv_job_q_mutex_hdl_pre_enc_l0ipe;
2814 
2815     /** mutex handle for pre-enc init */
2816     void *pv_mutex_hdl_pre_enc_init;
2817 
2818     /** mutex handle for pre-enc decomp deinit */
2819     void *pv_mutex_hdl_pre_enc_decomp_deinit;
2820 
2821     /** mutex handle for pre enc hme init */
2822     void *pv_mutex_hdl_pre_enc_hme_init;
2823 
2824     /** mutex handle for pre-enc hme deinit */
2825     void *pv_mutex_hdl_pre_enc_hme_deinit;
2826 
2827     /*qp qurey before l0 ipe is done by multiple frame*/
2828     /** mutex handle for L0 ipe(pre-enc init)*/
2829     void *pv_mutex_hdl_l0_ipe_init;
2830 
2831     /** mutex handle for pre-enc deinit */
2832     void *pv_mutex_hdl_pre_enc_deinit;
2833 
2834     /** Array of Semaphore handles (for each frame processing threads ) */
2835     void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
2836     /** array which will tell the number of CTB processed in each row,
2837     *   used for Row level sync in IPE pass
2838     */
2839     WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM];
2840 
2841     /**  Job Queue Memory pre encode */
2842     job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2843 
2844     /** Array of Job Queue handles enc group */
2845     job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2846                                              [NUM_PRE_ENC_JOBS_QUES];
2847 
2848     /* accumulate intra sad across all thread to get qp before L0 IPE*/
2849     WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2850                              [MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
2851 
2852     WORD32 i4_delay_pre_me_btw_l0_ipe;
2853 
2854     /*** This variable has the maximum delay between hme and l0ipe ***/
2855     /*** This is used for wrapping around L0IPE index ***/
2856     WORD32 i4_max_delay_pre_me_btw_l0_ipe;
2857 
2858     /* This is to register the handles of Dep Mngr b/w EncLoop and ME */
2859     /* This is used to delete the Mngr at the end                          */
2860     void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS];
2861     /*flag to track buffer in me/enc que is produced or not*/
2862     WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS];
2863 
2864     /*out buf que id for me */
2865     WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS];
2866 
2867     /*in buf que id for enc from me*/
2868     WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS];
2869 
2870     /* This is used to tell whether the free of recon buffers are done or not */
2871     WORD32 i4_is_recon_free_done;
2872 
2873     /* index for DVSR population */
2874     WORD32 i4_idx_dvsr_p;
2875     WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2876                                  [(HEVCE_MAX_HEIGHT >> 1) / 8];
2877 
2878     WORD32 i4_rc_l0_qp;
2879 
2880     /* Used for mres single out cases. Checks whether a particular resolution is active or passive */
2881     /* Only one resolution should be active for mres_single_out case */
2882     WORD32 *pi4_active_res_id;
2883 
2884     /**
2885      * Sub Pic bit control mutex lock handle
2886      */
2887     void *pv_sub_pic_rc_mutex_lock_hdl;
2888 
2889     void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl;
2890 
2891     WORD32 i4_encode;
2892     WORD32 i4_in_frame_rc_enabled;
2893     WORD32 i4_num_re_enc;
2894 
2895 } multi_thrd_ctxt_t;
2896 
2897 /**
2898  *  @brief    Structure to describe tile params
2899  */
2900 typedef struct
2901 {
2902     /* flag to indicate tile encoding enabled/disabled */
2903     WORD32 i4_tiles_enabled_flag;
2904 
2905     /* flag to indicate unifrom spacing of tiles */
2906     WORD32 i4_uniform_spacing_flag;
2907 
2908     /* num tiles in a tile-row. num tiles in tile-col */
2909     WORD32 i4_num_tile_cols;
2910     WORD32 i4_num_tile_rows;
2911 
2912     /* Curr tile width and height*/
2913     WORD32 i4_curr_tile_width;
2914     WORD32 i4_curr_tile_height;
2915 
2916     /* Curr tile width and heignt in CTB units*/
2917     WORD32 i4_curr_tile_wd_in_ctb_unit;
2918     WORD32 i4_curr_tile_ht_in_ctb_unit;
2919 
2920     /* frame resolution */
2921     //WORD32  i4_frame_width;  /* encode-width  */
2922     //WORD32  i4_frame_height; /* encode-height */
2923 
2924     /* total num of tiles "in frame" */
2925     WORD32 i4_num_tiles;
2926 
2927     /* Curr tile id. Assigned by raster scan order in a frame */
2928     WORD32 i4_curr_tile_id;
2929 
2930     /* x-pos of first ctb of the slice in ctb */
2931     /* y-pos of first ctb of the slice in ctb */
2932     WORD32 i4_first_ctb_x;
2933     WORD32 i4_first_ctb_y;
2934 
2935     /* x-pos of first ctb of the slice in samples */
2936     /* y-pos of first ctb of the slice in samples */
2937     WORD32 i4_first_sample_x;
2938     WORD32 i4_first_sample_y;
2939 
2940 } ihevce_tile_params_t;
2941 
2942 /**
2943 ******************************************************************************
2944  *  @brief  Encoder context structure
2945 ******************************************************************************
2946  */
2947 
2948 typedef struct
2949 {
2950     /**
2951      *  vps parameters
2952      */
2953     vps_t as_vps[IHEVCE_MAX_NUM_BITRATES];
2954 
2955     /**
2956      *  sps parameters
2957      */
2958     sps_t as_sps[IHEVCE_MAX_NUM_BITRATES];
2959 
2960     /**
2961      *  pps parameters
2962      *  Required for each bitrate separately, mainly because
2963      *  init qp etc parameters needs to be different for each instance
2964      */
2965     pps_t as_pps[IHEVCE_MAX_NUM_BITRATES];
2966 
2967     /**
2968      * Rate control mutex lock handle
2969      */
2970     void *pv_rc_mutex_lock_hdl;
2971 
2972     /** frame level cu analyse  buffer pointer for ME
2973      * ME will get ps_ctb_analyse structure populated with ps_cu pointers
2974      * pointing to ps_cu_analyse buffer from IPE.
2975       */
2976     //cu_analyse_t       *ps_cu_analyse_inter[PING_PONG_BUF];
2977 
2978     /**
2979       *  CTB frame context between encoder (producer) and entropy (consumer)
2980       */
2981     enc_q_ctxt_t s_enc_ques;
2982 
2983     /**
2984      *  Encoder memory manager ctxt
2985      */
2986     enc_mem_mngr_ctxt s_mem_mngr;
2987 
2988     /**
2989      * Semaphores of all the threads created in HLE
2990      * and Que handle for buffers b/w frame process and entropy
2991      */
2992     thrd_que_sem_hdl_t s_thrd_sem_ctxt;
2993 
2994     /**
2995      *  Reference /recon buffer Que pointer
2996      */
2997     recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES];
2998 
2999     /**
3000      * Number of buffers in Recon buffer queue
3001      */
3002     WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES];
3003 
3004     /**
3005      * Reference / recon buffer Que pointer for Pre Encode group
3006      * this will be just a container and no buffers will be allcoated
3007      */
3008     recon_pic_buf_t **pps_pre_enc_recon_buf_q;
3009 
3010     /**
3011      * Number of buffers in Recon buffer queue
3012      */
3013     WORD32 i4_pre_enc_num_buf_recon_q;
3014 
3015     /**
3016       * frame level CTB parameters and worst PU CU and TU in a CTB row
3017       */
3018     frm_ctb_ctxt_t s_frm_ctb_prms;
3019 
3020     /*
3021      * Moudle ctxt pointers of all modules
3022      */
3023     module_ctxt_t s_module_ctxt;
3024 
3025     /*
3026      * LAP static parameters
3027      */
3028     ihevce_lap_static_params_t s_lap_stat_prms;
3029 
3030     /*
3031      * Run time dynamic source params
3032      */
3033 
3034     ihevce_src_params_t s_runtime_src_prms;
3035 
3036     /*
3037      *Target params
3038      */
3039     ihevce_tgt_params_t s_runtime_tgt_params;
3040 
3041     /*
3042      *  Run time dynamic coding params
3043      */
3044     ihevce_coding_params_t s_runtime_coding_prms;
3045 
3046     /**
3047      * Pointer to static config params
3048      */
3049     ihevce_static_cfg_params_t *ps_stat_prms;
3050 
3051     /**
3052      * the following structure members used for copying recon buf info
3053      * in case of duplicate pics
3054      */
3055 
3056     /**
3057      * Array of reference picture list for pre enc group
3058      * Separate list for ping_pong instnaces
3059      * 2=> ref_pic_list0 and ref_pic_list1
3060      */
3061     recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
3062                                         [HEVCE_MAX_REF_PICS * 2];
3063 
3064     /**
3065      * Array of reference picture list for pre enc group
3066      * Separate list for ping_pong instnaces
3067      * 2=> ref_pic_list0 and ref_pic_list1
3068      */
3069     recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
3070                                           [HEVCE_MAX_REF_PICS * 2];
3071 
3072     /**
3073      *  Number of input frames per input queue
3074      */
3075     WORD32 i4_num_input_buf_per_queue;
3076 
3077     /**
3078      *  poc of the Clean Random Access(CRA)Ipic
3079      */
3080     WORD32 i4_cra_poc;
3081 
3082     /** Number of ref pics in list 0 for any given frame */
3083     WORD32 i4_num_ref_l0;
3084 
3085     /** Number of ref pics in list 1 for any given frame */
3086     WORD32 i4_num_ref_l1;
3087 
3088     /** Number of active ref pics in list 0 for cur frame */
3089     WORD32 i4_num_ref_l0_active;
3090 
3091     /** Number of active ref pics in list 1 for cur frame */
3092     WORD32 i4_num_ref_l1_active;
3093 
3094     /** Number of ref pics in list 0 for any given frame pre encode stage */
3095     WORD32 i4_pre_enc_num_ref_l0;
3096 
3097     /** Number of ref pics in list 1 for any given frame  pre encode stage */
3098     WORD32 i4_pre_enc_num_ref_l1;
3099 
3100     /** Number of active ref pics in list 0 for cur frame  pre encode stage */
3101     WORD32 i4_pre_enc_num_ref_l0_active;
3102 
3103     /** Number of active ref pics in list 1 for cur frame  pre encode stage */
3104     WORD32 i4_pre_enc_num_ref_l1_active;
3105 
3106     /**
3107      *  working mem to be used for frm level activities
3108      * One example is interplation at frame level. This requires memory
3109      * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes.
3110      * This is so since we generate interp output for max_width + 16 x
3111      * max_height + 16, and then the intermediate output is 16 bit and
3112      * is max_height + 16 + 7 rows
3113      */
3114     UWORD8 *pu1_frm_lvl_wkg_mem;
3115 
3116     /**
3117      * Multi thread processing context
3118      * This memory contains the variables and pointers shared across threads
3119      * in enc-group and pre-enc-group
3120      */
3121     multi_thrd_ctxt_t s_multi_thrd;
3122 
3123     /** I/O Queues created status */
3124     WORD32 i4_io_queues_created;
3125 
3126     WORD32 i4_end_flag;
3127 
3128     /** number of bit-rate instances running */
3129     WORD32 i4_num_bitrates;
3130 
3131     /** number of enc frames running in parallel */
3132     WORD32 i4_num_enc_loop_frm_pllel;
3133 
3134     /*ref bitrate id*/
3135     WORD32 i4_ref_mbr_id;
3136 
3137     /* Flag to indicate app, that end of processing has reached */
3138     WORD32 i4_frame_limit_reached;
3139 
3140     /*Structure to store the function selector
3141      * pointers for common and encoder */
3142     func_selector_t s_func_selector;
3143 
3144     /*ref resolution id*/
3145     WORD32 i4_resolution_id;
3146 
3147     /*hle context*/
3148     void *pv_hle_ctxt;
3149 
3150     rc_quant_t s_rc_quant;
3151     /*ME cost of P pic stored for the next ref B pic*/
3152     //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2];
3153 
3154     UWORD32 u4_cur_pic_encode_cnt;
3155     UWORD32 u4_cur_pic_encode_cnt_dbp;
3156     /*past 2 p pics high complexity status*/
3157     WORD32 ai4_is_past_pic_complex[2];
3158 
3159     WORD32 i4_is_I_reset_done;
3160     WORD32 i4_past_RC_reset_count;
3161 
3162     WORD32 i4_future_RC_reset;
3163 
3164     WORD32 i4_past_RC_scd_reset_count;
3165 
3166     WORD32 i4_future_RC_scd_reset;
3167     WORD32 i4_poc_reset_values;
3168 
3169     /*Place holder to store the length of LAP in first pass*/
3170     /** Number of frames to look-ahead for RC by -
3171      * counts 2 fields as one frame for interlaced
3172      */
3173     WORD32 i4_look_ahead_frames_in_first_pass;
3174 
3175     WORD32 ai4_mod_factor_derived_by_variance[2];
3176     float f_strength;
3177 
3178     /*for B frames use the avg activity
3179     from the layer 0 (I or P) which is the average over
3180     Lap2 window*/
3181     LWORD64 ai8_lap2_8x8_avg_act_from_T0[2];
3182 
3183     LWORD64 ai8_lap2_16x16_avg_act_from_T0[3];
3184 
3185     LWORD64 ai8_lap2_32x32_avg_act_from_T0[3];
3186 
3187     /*for B frames use the log of avg activity
3188     from the layer 0 (I or P) which is the average over
3189     Lap2 window*/
3190     long double ald_lap2_8x8_log_avg_act_from_T0[2];
3191 
3192     long double ald_lap2_16x16_log_avg_act_from_T0[3];
3193 
3194     long double ald_lap2_32x32_log_avg_act_from_T0[3];
3195 
3196     ihevce_tile_params_t *ps_tile_params_base;
3197 
3198     WORD32 ai4_column_width_array[MAX_TILE_COLUMNS];
3199 
3200     WORD32 ai4_row_height_array[MAX_TILE_ROWS];
3201 
3202     /* Architecture */
3203     IV_ARCH_T e_arch_type;
3204 
3205     UWORD8 u1_is_popcnt_available;
3206 
3207     WORD32 i4_active_scene_num;
3208 
3209     WORD32 i4_max_fr_enc_loop_parallel_rc;
3210     WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES];
3211     WORD32 i4_active_enc_frame_id;
3212 
3213     /**
3214     * LAP interface ctxt pointer
3215     */
3216     void *pv_lap_interface_ctxt;
3217 
3218     /* If enable, enables blu ray compatibility of op*/
3219     WORD32 i4_blu_ray_spec;
3220 
3221 } enc_ctxt_t;
3222 
3223 /**
3224 ******************************************************************************
3225 *  @brief  This struct contains the inter CTB params needed for the decision
3226 *   of the best inter CU results
3227 ******************************************************************************
3228 */
3229 typedef struct
3230 {
3231     hme_pred_buf_mngr_t s_pred_buf_mngr;
3232 
3233     /** X and y offset of ctb w.r.t. start of pic */
3234     WORD32 i4_ctb_x_off;
3235     WORD32 i4_ctb_y_off;
3236 
3237     /**
3238      * Pred buffer ptr, updated inside subpel refinement process. This
3239      * location passed to the leaf fxn for copying the winner pred buf
3240      */
3241     UWORD8 **ppu1_pred;
3242 
3243     /** Working mem passed to leaf fxns */
3244     UWORD8 *pu1_wkg_mem;
3245 
3246     /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
3247     WORD32 i4_pred_stride;
3248 
3249     /** Stride of input buf, updated inside subpel fxn */
3250     WORD32 i4_inp_stride;
3251 
3252     /** stride of recon buffer */
3253     WORD32 i4_rec_stride;
3254 
3255     /** Indicates if bi dir is enabled or not */
3256     WORD32 i4_bidir_enabled;
3257 
3258     /**
3259      * Total number of references of current picture which is enocded
3260      */
3261     UWORD8 u1_num_ref;
3262 
3263     /** Recon Pic buffer pointers for L0 list */
3264     recon_pic_buf_t **pps_rec_list_l0;
3265 
3266     /** Recon Pic buffer pointers for L1 list */
3267     recon_pic_buf_t **pps_rec_list_l1;
3268 
3269     /**
3270      * These pointers point to modified input, one each for one ref idx.
3271      * Instead of weighting the reference, we weight the input with inverse
3272      * wt and offset for list 0 and list 1.
3273      */
3274     UWORD8 *apu1_wt_inp[2][MAX_NUM_REF];
3275 
3276     /* Since ME uses weighted inputs, we use reciprocal of the actual weights */
3277     /* that are signaled in the bitstream */
3278     WORD32 *pi4_inv_wt;
3279     WORD32 *pi4_inv_wt_shift_val;
3280 
3281     /* Map between L0 Reference indices and LC indices */
3282     WORD8 *pi1_past_list;
3283 
3284     /* Map between L1 Reference indices and LC indices */
3285     WORD8 *pi1_future_list;
3286 
3287     /**
3288      * Points to the non-weighted input data for the current CTB
3289      */
3290     UWORD8 *pu1_non_wt_inp;
3291 
3292     /**
3293      * Store the pred lambda and lamda_qshifts for all the reference indices
3294      */
3295     WORD32 i4_lamda;
3296 
3297     UWORD8 u1_lamda_qshift;
3298 
3299     WORD32 wpred_log_wdc;
3300 
3301     /**
3302      * Number of active references in l0
3303      */
3304     UWORD8 u1_num_active_ref_l0;
3305 
3306     /**
3307      * Number of active references in l1
3308      */
3309     UWORD8 u1_num_active_ref_l1;
3310 
3311     /** The max_depth for inter tu_tree */
3312     UWORD8 u1_max_tr_depth;
3313 
3314     /** Quality Preset */
3315     WORD8 i1_quality_preset;
3316 
3317     /** SATD or SAD */
3318     UWORD8 u1_use_satd;
3319 
3320     /* Frame level QP */
3321     WORD32 i4_qstep_ls8;
3322 
3323     /* Pointer to an array of PU level src variances */
3324     UWORD32 *pu4_src_variance;
3325 
3326     WORD32 i4_alpha_stim_multiplier;
3327 
3328     UWORD8 u1_is_cu_noisy;
3329 
3330     ULWORD64 *pu8_part_src_sigmaX;
3331 
3332     ULWORD64 *pu8_part_src_sigmaXSquared;
3333 
3334     UWORD8 u1_max_2nx2n_tu_recur_cands;
3335 
3336 } inter_ctb_prms_t;
3337 
3338 /*****************************************************************************/
3339 /* Extern Variable Declarations                                              */
3340 /*****************************************************************************/
3341 extern const double lamda_modifier_for_I_pic[8];
3342 
3343 /*****************************************************************************/
3344 /* Extern Function Declarations                                              */
3345 /*****************************************************************************/
3346 
3347 #endif /* _IHEVCE_ENC_STRUCTS_H_ */
3348