1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file ihevce_cabac_tu.c
23 *
24 * @brief
25 *  This file contains function definitions for cabac entropy coding of
26 *  transform units of HEVC syntax
27 *
28 * @author
29 *  ittiam
30 *
31 * @List of Functions
32 *  ihevce_cabac_encode_qp_delta()
33 *  ihevce_cabac_encode_last_coeff_x_y()
34 *  ihevce_encode_transform_tree()
35 *  ihevce_cabac_residue_encode()
36 *  ihevce_cabac_residue_encode_rdopt()
37 *  ihevce_cabac_residue_encode_rdoq()
38 *  ihevce_code_all_sig_coeffs_as_0_explicitly()
39 *  ihevce_find_new_last_csb()
40 *  ihevce_copy_backup_ctxt()
41 *  ihevce_estimate_num_bits_till_next_non_zero_coeff()
42 *
43 ******************************************************************************
44 */
45 
46 /*****************************************************************************/
47 /* File Includes                                                             */
48 /*****************************************************************************/
49 
50 /* System include files */
51 #include <stdio.h>
52 #include <string.h>
53 #include <stdlib.h>
54 #include <assert.h>
55 #include <stdarg.h>
56 #include <math.h>
57 
58 /* User include files */
59 #include "ihevc_typedefs.h"
60 #include "itt_video_api.h"
61 #include "ihevce_api.h"
62 
63 #include "rc_cntrl_param.h"
64 #include "rc_frame_info_collector.h"
65 #include "rc_look_ahead_params.h"
66 
67 #include "ihevc_defs.h"
68 #include "ihevc_structs.h"
69 #include "ihevc_platform_macros.h"
70 #include "ihevc_deblk.h"
71 #include "ihevc_itrans_recon.h"
72 #include "ihevc_chroma_itrans_recon.h"
73 #include "ihevc_chroma_intra_pred.h"
74 #include "ihevc_intra_pred.h"
75 #include "ihevc_inter_pred.h"
76 #include "ihevc_mem_fns.h"
77 #include "ihevc_padding.h"
78 #include "ihevc_weighted_pred.h"
79 #include "ihevc_sao.h"
80 #include "ihevc_resi_trans.h"
81 #include "ihevc_quant_iquant_ssd.h"
82 #include "ihevc_cabac_tables.h"
83 #include "ihevc_trans_macros.h"
84 #include "ihevc_trans_tables.h"
85 
86 #include "ihevce_defs.h"
87 #include "ihevce_lap_enc_structs.h"
88 #include "ihevce_multi_thrd_structs.h"
89 #include "ihevce_me_common_defs.h"
90 #include "ihevce_had_satd.h"
91 #include "ihevce_error_codes.h"
92 #include "ihevce_bitstream.h"
93 #include "ihevce_cabac.h"
94 #include "ihevce_rdoq_macros.h"
95 #include "ihevce_function_selector.h"
96 #include "ihevce_enc_structs.h"
97 #include "ihevce_entropy_structs.h"
98 #include "ihevce_cmn_utils_instr_set_router.h"
99 #include "ihevce_enc_loop_structs.h"
100 #include "ihevce_bs_compute_ctb.h"
101 #include "ihevce_global_tables.h"
102 #include "ihevce_common_utils.h"
103 #include "ihevce_trace.h"
104 
105 /*****************************************************************************/
106 /* Globals                                                                   */
107 /*****************************************************************************/
108 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
109 
110 /**
111 ******************************************************************************
112 * @brief  LUT for deriving of last significant coeff prefix.
113 *
114 * @input   : last_significant_coeff
115 *
116 * @output  : last_significant_prefix (does not include the
117 *
118 * @remarks Look up tables taken frm HM-8.0-dev
119 ******************************************************************************
120 */
121 const UWORD8 gu1_hevce_last_coeff_prefix[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
122                                                  8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 };
123 
124 /**
125 *****************************************************************************
126 * @brief  LUT for deriving of last significant coeff suffix
127 *
128 * @input   : last significant prefix
129 *
130 * @output  : prefix code that needs to be subtracted from last_pos to get
131 *           suffix as per equation 7-55 in section 7.4.12.
132 *
133 *           It returns the following code for last_significant_prefix > 3
134 *            ((1 << ((last_significant_coeff_x_prefix >> 1) - 1))  *
135 *            (2 + (last_significant_coeff_x_prefix & 1))
136 *
137 *
138 * @remarks Look up tables taken frm HM-8.0-dev
139 *****************************************************************************
140 */
141 const UWORD8 gu1_hevce_last_coeff_prefix_code[10] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24 };
142 
143 /**
144 *****************************************************************************
145 * @brief  returns raster index of 4x4 block for diag up-right/horz/vert scans
146 *
147 * @input   : scan type and scan idx
148 *
149 * @output  : packed y pos(msb 4bit) and x pos(lsb 2bit)
150 *
151 *****************************************************************************
152 */
153 const UWORD8 gu1_hevce_scan4x4[3][16] = {
154     /* diag up right */
155     { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15 },
156 
157     /* horz */
158     { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
159 
160     /* vert */
161     { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }
162 };
163 
164 /**
165 *****************************************************************************
166 * @brief  returns context increment for sig coeff based on csbf neigbour
167 *         flags (bottom and right) and current coeff postion in 4x4 block
168 *         See section 9.3.3.1.4 for details on this context increment
169 *
170 * @input   : neigbour csbf flags(bit0:rightcsbf, bit1:bottom csbf)
171 *           coeff idx in raster order (0-15)
172 *
173 * @output  : context increment for sig coeff flag
174 *
175 *****************************************************************************
176 */
177 const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16] = {
178     /* nbr csbf = 0:  sigCtx = (xP+yP == 0) ? 2 : (xP+yP < 3) ? 1: 0 */
179     { 2, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
180 
181     /* nbr csbf = 1:  sigCtx = (yP == 0) ? 2 : (yP == 1) ? 1: 0      */
182     { 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
183 
184     /* nbr csbf = 2:  sigCtx = (xP == 0) ? 2 : (xP == 1) ? 1: 0      */
185     { 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0 },
186 
187     /* nbr csbf = 3:  sigCtx = 2                                     */
188     { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }
189 };
190 
191 const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
192 
193 /**
194 *****************************************************************************
195 * @brief  returns context increment for sig coeff for 4x4 tranform size as
196 *         per Table 9-39 in section 9.3.3.1.4
197 *
198 * @input   : coeff idx in raster order (0-15)
199 *
200 * @output  : context increment for sig coeff flag
201 *
202 *****************************************************************************
203 */
204 const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16] = { 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 0 };
205 
206 #define DISABLE_ZCSBF 0
207 
208 #define TEST_CABAC_BITESTIMATE 0
209 
210 /*****************************************************************************/
211 /* Function Definitions                                                      */
212 /*****************************************************************************/
213 /**
214 ******************************************************************************
215 *
216 *  @brief Entropy encoding of qp_delta in a tu as per sec 9.3.2 Table 9-32
217 *
218 *  @par   Description
219 *  trunacted unary binarization is done based upto abs_delta of 5 and the rest
220 *  is coded as 0th order Exponential Golomb code
221 *
222 *  @param[inout]   ps_cabac
223 *  pointer to cabac encoding context (handle)
224 *
225 *  @param[in]      qp_delta
226 *  delta qp that needs to be encoded
227 *
228 *  @return      success or failure error code
229 *
230 ******************************************************************************
231 */
ihevce_cabac_encode_qp_delta(cab_ctxt_t * ps_cabac,WORD32 qp_delta)232 WORD32 ihevce_cabac_encode_qp_delta(cab_ctxt_t *ps_cabac, WORD32 qp_delta)
233 {
234     WORD32 qp_delta_abs = ABS(qp_delta);
235     WORD32 c_max = TU_MAX_QP_DELTA_ABS;
236     WORD32 ctxt_inc = IHEVC_CAB_QP_DELTA_ABS;
237     WORD32 ctxt_inc_max = CTXT_MAX_QP_DELTA_ABS;
238     WORD32 ret = IHEVCE_SUCCESS;
239 
240     /* qp_delta_abs is coded as combination of tunary and eg0 code  */
241     /* See Table 9-32 and Table 9-37 for details on cu_qp_delta_abs */
242     ret |= ihevce_cabac_encode_tunary(
243         ps_cabac, MIN(qp_delta_abs, c_max), c_max, ctxt_inc, 0, ctxt_inc_max);
244     if(qp_delta_abs >= c_max)
245     {
246         ret |= ihevce_cabac_encode_egk(ps_cabac, qp_delta_abs - c_max, 0);
247     }
248     AEV_TRACE("cu_qp_delta_abs", qp_delta_abs, ps_cabac->u4_range);
249 
250     /* code the qp delta sign flag */
251     if(qp_delta_abs)
252     {
253         WORD32 sign = (qp_delta < 0) ? 1 : 0;
254         ret |= ihevce_cabac_encode_bypass_bin(ps_cabac, sign);
255         AEV_TRACE("cu_qp_delta_sign", sign, ps_cabac->u4_range);
256     }
257 
258     return (ret);
259 }
260 
261 /**
262 ******************************************************************************
263 *
264 *  @brief Encodes position of the last coded coeff (in scan order) of TU
265 *
266 *  @par   Description
267 *  Entropy encode of last coded coeff of a TU as per section:7.3.13
268 *
269 *  @param[inout]   ps_cabac
270 *  pointer to cabac context (handle)
271 *
272 *  @param[in]      last_coeff_x
273 *  x co-ordinate of the last coded coeff of TU(in scan order)
274 *
275 *  @param[in]      last_coeff_y
276 *  x co-ordinate of the last coded coeff of TU (in scan order
277 *
278 *  @param[in]      log2_tr_size
279 *  transform block size corresponding to this node in quad tree
280 *
281 *  @param[in]      is_luma
282 *  indicates if residual block corresponds to luma or chroma block
283 *
284 *  @return      success or failure error code
285 *
286 ******************************************************************************
287 */
ihevce_cabac_encode_last_coeff_x_y(cab_ctxt_t * ps_cabac,WORD32 last_coeff_x,WORD32 last_coeff_y,WORD32 log2_tr_size,WORD32 is_luma)288 WORD32 ihevce_cabac_encode_last_coeff_x_y(
289     cab_ctxt_t *ps_cabac,
290     WORD32 last_coeff_x,
291     WORD32 last_coeff_y,
292     WORD32 log2_tr_size,
293     WORD32 is_luma)
294 {
295     WORD32 ret = IHEVCE_SUCCESS;
296 
297     WORD32 last_coeff_x_prefix;
298     WORD32 last_coeff_y_prefix;
299     WORD32 suffix, suf_length;
300     WORD32 c_max;
301     WORD32 ctxt_idx_x, ctxt_idx_y, ctx_shift;
302 
303     /* derive the prefix code */
304     last_coeff_x_prefix = gu1_hevce_last_coeff_prefix[last_coeff_x];
305     last_coeff_y_prefix = gu1_hevce_last_coeff_prefix[last_coeff_y];
306 
307     c_max = gu1_hevce_last_coeff_prefix[(1 << log2_tr_size) - 1];
308 
309     /* context increment as per section 9.3.3.1.2 */
310     if(is_luma)
311     {
312         WORD32 ctx_offset = (3 * (log2_tr_size - 2)) + ((log2_tr_size - 1) >> 2);
313 
314         ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + ctx_offset;
315         ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + ctx_offset;
316         ctx_shift = (log2_tr_size + 1) >> 2;
317     }
318     else
319     {
320         ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + 15;
321         ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + 15;
322         ctx_shift = log2_tr_size - 2;
323     }
324 
325     /* code the last_coeff_x_prefix as tunary binarized code */
326     ret |= ihevce_cabac_encode_tunary(
327         ps_cabac, last_coeff_x_prefix, c_max, ctxt_idx_x, ctx_shift, c_max);
328 
329     AEV_TRACE("last_coeff_x_prefix", last_coeff_x_prefix, ps_cabac->u4_range);
330 
331     /* code the last_coeff_y_prefix as tunary binarized code */
332     ret |= ihevce_cabac_encode_tunary(
333         ps_cabac, last_coeff_y_prefix, c_max, ctxt_idx_y, ctx_shift, c_max);
334 
335     AEV_TRACE("last_coeff_y_prefix", last_coeff_y_prefix, ps_cabac->u4_range);
336 
337     if(last_coeff_x_prefix > 3)
338     {
339         /* code the last_coeff_x_suffix as FLC bypass code */
340         suffix = last_coeff_x - gu1_hevce_last_coeff_prefix_code[last_coeff_x_prefix];
341 
342         suf_length = ((last_coeff_x_prefix - 2) >> 1);
343 
344         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
345 
346         AEV_TRACE("last_coeff_x_suffix", suffix, ps_cabac->u4_range);
347     }
348 
349     if(last_coeff_y_prefix > 3)
350     {
351         /* code the last_coeff_y_suffix as FLC bypass code */
352         suffix = last_coeff_y - gu1_hevce_last_coeff_prefix_code[last_coeff_y_prefix];
353 
354         suf_length = ((last_coeff_y_prefix - 2) >> 1);
355 
356         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
357 
358         AEV_TRACE("last_coeff_y_suffix", suffix, ps_cabac->u4_range);
359     }
360 
361     return (ret);
362 }
363 
364 /**
365 ******************************************************************************
366 *
367 *  @brief Encodes a transform tree as per section 7.3.11
368 *
369 *  @par   Description
370 *  Uses recursion till a leaf node is reached where a transform unit
371 *  is coded. While recursing split_transform_flag and parent chroma cbf flags
372 *  are coded before recursing to leaf node
373 *
374 *  @param[inout]   ps_entropy_ctxt
375 *  pointer to entropy context (handle)
376 *
377 *  @param[in]      x0_ctb
378 *  x co-ordinate w.r.t ctb start of current tu node of coding tree
379 *
380 *  @param[in]      y0_ctb
381 *  y co-ordinate w.r.t ctb start of current cu node of coding tree
382 *
383 *  @param[in]      log2_tr_size
384 *  transform block size corresponding to this node in quad tree
385 *
386 *  @param[in]      tr_depth
387 *  current depth of the tree
388 *
389 *  @param[in]      tr_depth
390 *  current depth of the tree
391 *
392 *  @param[in]      blk_num
393 *  current block number in the quad tree (required for chorma 4x4 coding)
394 *
395 *  @return      success or failure error code
396 *
397 ******************************************************************************
398 */
ihevce_encode_transform_tree(entropy_context_t * ps_entropy_ctxt,WORD32 x0_ctb,WORD32 y0_ctb,WORD32 log2_tr_size,WORD32 tr_depth,WORD32 blk_num,cu_enc_loop_out_t * ps_enc_cu)399 WORD32 ihevce_encode_transform_tree(
400     entropy_context_t *ps_entropy_ctxt,
401     WORD32 x0_ctb,
402     WORD32 y0_ctb,
403     WORD32 log2_tr_size,
404     WORD32 tr_depth,
405     WORD32 blk_num,
406     cu_enc_loop_out_t *ps_enc_cu)
407 {
408     WORD32 ret = IHEVCE_SUCCESS;
409     sps_t *ps_sps = ps_entropy_ctxt->ps_sps;
410     WORD32 split_tr_flag;
411 
412     WORD32 tu_idx = ps_entropy_ctxt->i4_tu_idx;
413     tu_enc_loop_out_t *ps_enc_tu = ps_enc_cu->ps_enc_tu + tu_idx;
414 
415     /* TU size in pels */
416     WORD32 tu_size = 4 << ps_enc_tu->s_tu.b3_size;
417 
418     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
419 
420     WORD32 max_tr_depth;
421     WORD32 is_intra = (ps_enc_cu->b1_pred_mode_flag == PRED_MODE_INTRA);
422     WORD32 log2_min_trafo_size, log2_max_trafo_size;
423     UWORD32 u4_bits_estimated_prev;
424 
425     WORD32 intra_nxn_pu = 0;
426     WORD32 ctxt_inc;
427     WORD32 cbf_luma = 0;
428     WORD32 ai4_cbf_cb[2] = { 0, 0 };
429     WORD32 ai4_cbf_cr[2] = { 0, 0 };
430     UWORD32 tu_split_bits = 0;
431     UWORD8 u1_is_422 = (ps_sps->i1_chroma_format_idc == 2);
432 
433     tu_split_bits = ps_cabac->u4_bits_estimated_q12;
434     /* intialize min / max transform sizes based on sps */
435     log2_min_trafo_size = ps_sps->i1_log2_min_transform_block_size;
436 
437     log2_max_trafo_size = log2_min_trafo_size + ps_sps->i1_log2_diff_max_min_transform_block_size;
438 
439     /* intialize max transform depth for intra / inter signalled in sps */
440     if(is_intra)
441     {
442         max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_intra;
443         intra_nxn_pu = ps_enc_cu->b3_part_mode == PART_NxN;
444     }
445     else
446     {
447         max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
448     }
449 
450     /* Sanity checks */
451     ASSERT(tr_depth <= 4);
452     ASSERT(log2_min_trafo_size >= 2);
453     ASSERT(log2_max_trafo_size <= 5);
454     ASSERT((tu_idx >= 0) && (tu_idx < ps_enc_cu->u2_num_tus_in_cu));
455     ASSERT((tu_size >= 4) && (tu_size <= (1 << log2_tr_size)));
456 
457     /* Encode split transform flag based on following conditions; sec 7.3.11 */
458     if((log2_tr_size <= log2_max_trafo_size) && (log2_tr_size > log2_min_trafo_size) &&
459        (tr_depth < max_tr_depth) && (!(intra_nxn_pu && (tr_depth == 0))))
460     {
461         /* encode the split transform flag, context derived as per Table9-37 */
462         ctxt_inc = IHEVC_CAB_SPLIT_TFM + (5 - log2_tr_size);
463 
464         /* split if actual tu size is smaller than target tu size */
465         split_tr_flag = tu_size < (1 << log2_tr_size);
466         u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
467         ret |= ihevce_cabac_encode_bin(ps_cabac, split_tr_flag, ctxt_inc);
468 
469         if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
470         {  // clang-format off
471             /*PIC INFO : populate cu split flag*/
472             ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_split_tu_flag +=
473                 (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
474         }  // clang-format on
475 
476         AEV_TRACE("split_transform_flag", split_tr_flag, ps_cabac->u4_range);
477     }
478     else
479     {
480         WORD32 inter_split;
481         /*********************************************************************/
482         /*                                                                   */
483         /* split tr is implicitly derived as 1 if  (see section 7.4.10)      */
484         /*  a. log2_tr_size > log2_max_trafo_size                            */
485         /*  b. intra cu has NXN pu                                           */
486         /*  c. inter cu is not 2Nx2N && max_transform_hierarchy_depth_inter=0*/
487         /*                                                                   */
488         /* split tu is implicitly derived as 0 otherwise                     */
489         /*********************************************************************/
490         inter_split = (!is_intra) && (max_tr_depth == 0) && (tr_depth == 0) &&
491                       (ps_enc_cu->b3_part_mode != PART_2Nx2N);
492 
493         if((log2_tr_size > log2_max_trafo_size) || (intra_nxn_pu && (tr_depth == 0)) ||
494            (inter_split))
495         {
496             split_tr_flag = 1;
497         }
498         else
499         {
500             split_tr_flag = 0;
501         }
502     }
503     /*accumulate only tu tree bits*/
504     ps_cabac->u4_true_tu_split_flag_q12 += ps_cabac->u4_bits_estimated_q12 - tu_split_bits;
505 
506     /* Encode the cbf flags for chroma before the split as per sec 7.3.11   */
507     if(log2_tr_size > 2)
508     {
509         /* encode the cbf cb, context derived as per Table 9-37 */
510         ctxt_inc = IHEVC_CAB_CBCR_IDX + tr_depth;
511 
512         /* Note chroma cbf is coded for depth=0 or if parent cbf was coded */
513         if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1]) ||
514            (ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1]))
515         {
516 #if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
517             /*************************************************************/
518             /* Bit-Efficient chroma cbf signalling                       */
519             /* if children nodes have 0 cbf parent cbf can be coded as 0 */
520             /* peeking through all the child nodes for cb to check if    */
521             /* parent can be coded as 0                                  */
522             /*************************************************************/
523             WORD32 tu_cnt = 0;
524             while(1)
525             {
526                 WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
527                 WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
528                 WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
529 
530                 ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
531 
532                 if((ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1))
533                 {
534                     ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
535                     ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
536                     break;
537                 }
538 
539                 /* 8x8 parent has only one 4x4 valid chroma block for 420 */
540                 if(3 == log2_tr_size)
541                     break;
542 
543                 if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
544                    (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
545                 {
546                     ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
547                     ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
548                     ASSERT(
549                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) &&
550                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1));
551                     break;
552                 }
553 
554                 tu_cnt++;
555             }
556 #else
557             /* read cbf only when split is 0 (child node) else force cbf=1 */
558             ai4_cbf_cb[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf;
559             ai4_cbf_cb[1] =
560                 (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf_subtu1;
561 
562 #endif
563             if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
564             {
565                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
566                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0], ctxt_inc);
567 
568                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
569                 {  // clang-format off
570                     /*PIC INFO : Populate CBF cr bits*/
571                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
572                         (ps_cabac->u4_bits_estimated_q12 -
573                             u4_bits_estimated_prev);
574                 }  // clang-format on
575 
576                 AEV_TRACE("cbf_cb", ai4_cbf_cb[0], ps_cabac->u4_range);
577 
578                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
579                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[1], ctxt_inc);
580 
581                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
582                 {  // clang-format off
583                     /*PIC INFO : Populate CBF cr bits*/
584                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
585                         (ps_cabac->u4_bits_estimated_q12 -
586                             u4_bits_estimated_prev);
587                 }  // clang-format on
588 
589                 AEV_TRACE("cbf_cb", ai4_cbf_cb[1], ps_cabac->u4_range);
590             }
591             else
592             {
593                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
594                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0] || ai4_cbf_cb[1], ctxt_inc);
595 
596                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
597                 {  // clang-format off
598                     /*PIC INFO : Populate CBF cr bits*/
599                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
600                         (ps_cabac->u4_bits_estimated_q12 -
601                             u4_bits_estimated_prev);
602                 }  // clang-format on
603 
604                 AEV_TRACE("cbf_cb", ai4_cbf_cb[0] || ai4_cbf_cb[1], ps_cabac->u4_range);
605             }
606         }
607         else
608         {
609             ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
610             ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
611         }
612 
613         if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1]) ||
614            (ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1]))
615         {
616 #if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
617             /*************************************************************/
618             /* Bit-Efficient chroma cbf signalling                       */
619             /* if children nodes have 0 cbf parent cbf can be coded as 0 */
620             /* peeking through all the child nodes for cr to check if    */
621             /* parent can be coded as 0                                  */
622             /*************************************************************/
623             WORD32 tu_cnt = 0;
624             while(1)
625             {
626                 WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
627                 WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
628                 WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
629 
630                 ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
631 
632                 if((ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1))
633                 {
634                     ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
635                     ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
636                     break;
637                 }
638 
639                 /* 8x8 parent has only one 4x4 valid chroma block for 420 */
640                 if(3 == log2_tr_size)
641                     break;
642 
643                 if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
644                    (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
645                 {
646                     ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
647                     ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
648                     ASSERT(
649                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) &&
650                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1));
651                     break;
652                 }
653 
654                 tu_cnt++;
655             }
656 #else
657             /* read cbf only when split is 0 (child node) else force cbf=1 */
658             ai4_cbf_cr[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf;
659             ai4_cbf_cr[1] =
660                 (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf_subtu1;
661 #endif
662 
663             if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
664             {
665                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
666                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0], ctxt_inc);
667 
668                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
669                 {  // clang-format off
670                     /*PIC INFO : Populate CBF cr bits*/
671                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
672                         (ps_cabac->u4_bits_estimated_q12 -
673                             u4_bits_estimated_prev);
674                 }  // clang-format on
675 
676                 AEV_TRACE("cbf_cr", ai4_cbf_cr[0], ps_cabac->u4_range);
677 
678                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
679                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[1], ctxt_inc);
680 
681                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
682                 {  // clang-format off
683                     /*PIC INFO : Populate CBF cr bits*/
684                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
685                         (ps_cabac->u4_bits_estimated_q12 -
686                             u4_bits_estimated_prev);
687                 }  // clang-format on
688 
689                 AEV_TRACE("cbf_cr", ai4_cbf_cr[1], ps_cabac->u4_range);
690             }
691             else
692             {
693                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
694                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0] || ai4_cbf_cr[1], ctxt_inc);
695 
696                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
697                 {  // clang-format off
698                     /*PIC INFO : Populate CBF cr bits*/
699                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
700                         (ps_cabac->u4_bits_estimated_q12 -
701                             u4_bits_estimated_prev);
702                 }  // clang-format on
703 
704                 AEV_TRACE("cbf_cr", ai4_cbf_cr[0] || ai4_cbf_cr[1], ps_cabac->u4_range);
705             }
706         }
707         else
708         {
709             ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
710             ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
711         }
712 
713         ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth] = ai4_cbf_cb[0];
714         ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth] = ai4_cbf_cr[0];
715         ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth] = ai4_cbf_cb[1];
716         ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth] = ai4_cbf_cr[1];
717     }
718     else
719     {
720         ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
721         ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
722         ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
723         ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
724     }
725 
726     if(split_tr_flag)
727     {
728         /* recurse into quad child nodes till a leaf node is reached */
729         WORD32 x1_ctb = x0_ctb + ((1 << log2_tr_size) >> 1);
730         WORD32 y1_ctb = y0_ctb + ((1 << log2_tr_size) >> 1);
731 
732         /* node0 of quad tree */
733         ret |= ihevce_encode_transform_tree(
734             ps_entropy_ctxt,
735             x0_ctb,
736             y0_ctb,
737             log2_tr_size - 1,
738             tr_depth + 1,
739             0, /* block 0 */
740             ps_enc_cu);
741 
742         /* node1 of quad tree */
743         ret |= ihevce_encode_transform_tree(
744             ps_entropy_ctxt,
745             x1_ctb,
746             y0_ctb,
747             log2_tr_size - 1,
748             tr_depth + 1,
749             1, /* block 1 */
750             ps_enc_cu);
751 
752         /* node2 of quad tree */
753         ret |= ihevce_encode_transform_tree(
754             ps_entropy_ctxt,
755             x0_ctb,
756             y1_ctb,
757             log2_tr_size - 1,
758             tr_depth + 1,
759             2, /* block 2 */
760             ps_enc_cu);
761 
762         /* node3 of quad tree */
763         ret |= ihevce_encode_transform_tree(
764             ps_entropy_ctxt,
765             x1_ctb,
766             y1_ctb,
767             log2_tr_size - 1,
768             tr_depth + 1,
769             3, /* block 3 */
770             ps_enc_cu);
771     }
772     else
773     {
774         /* leaf node is reached! Encode the TU */
775         WORD32 encode_delta_qp;
776         void *pv_coeff;
777         void *pv_cu_coeff = ps_enc_cu->pv_coeff;
778 
779         /* condition to encode qp of cu in first coded tu */
780         encode_delta_qp = ps_entropy_ctxt->i1_encode_qp_delta &&
781                           (ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS);
782 
783         if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
784         {  // clang-format off
785             /*PIC INFO : Tota TUs based on size*/
786             if(32 == tu_size)
787             {
788                 ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3]++;
789             }
790             else
791             {
792                 ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[tu_size >> 3]++;
793             }
794         }  // clang-format on
795 
796         /* sanity checks */
797         ASSERT(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0);
798         ASSERT((ps_enc_tu->s_tu.b4_pos_x << 2) == x0_ctb);
799         ASSERT((ps_enc_tu->s_tu.b4_pos_y << 2) == y0_ctb);
800         ASSERT(tu_size == (1 << log2_tr_size));
801 
802         /********************************************************************/
803         /* encode luma cbf if any of following conditions are true          */
804         /* intra cu | transform depth > 0 | any of chroma cbfs are coded    */
805         /*                                                                  */
806         /* Note that these conditions mean that cbf_luma need not be        */
807         /* signalled and implicitly derived as 1 for inter cu whose tfr size*/
808         /* is same as cu size and cbf for cb+cr are zero as no_residue_flag */
809         /* at cu level = 1 indicated cbf luma is coded                      */
810         /********************************************************************/
811         if(is_intra || (tr_depth != 0) || ai4_cbf_cb[0] || ai4_cbf_cr[0] ||
812            ((u1_is_422) && (ai4_cbf_cb[1] || ai4_cbf_cr[1])))
813         {
814             /* encode  cbf luma, context derived as per Table 9-37 */
815             cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
816 
817             ctxt_inc = IHEVC_CAB_CBF_LUMA_IDX;
818             ctxt_inc += (tr_depth == 0) ? 1 : 0;
819 
820             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
821             {
822                 if(1 == cbf_luma)
823                 {
824                     // clang-format off
825                     /*PIC INFO: Populated coded Intra/Inter TUs in CU*/
826                     if(1 == is_intra)
827                         ps_entropy_ctxt->ps_pic_level_info->i8_total_intra_coded_tu++;
828                     else
829                         ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
830                     // clang-format on
831                 }
832                 else
833                 { /*PIC INFO: Populated coded non-coded TUs in CU*/
834                     ps_entropy_ctxt->ps_pic_level_info->i8_total_non_coded_tu++;
835                 }
836             }
837             u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
838             ret |= ihevce_cabac_encode_bin(ps_cabac, cbf_luma, ctxt_inc);
839 
840             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
841             {  // clang-format off
842                 /*PIC INFO : Populate CBF luma bits*/
843                 ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_luma_bits +=
844                     (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
845             }  // clang-format on
846             AEV_TRACE("cbf_luma", cbf_luma, ps_cabac->u4_range);
847         }
848         else
849         {
850             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
851             {
852                 /*PIC INFO: Populated coded Inter TUs in CU*/
853                 ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
854             }
855 
856             /* shall be 1 as no_residue_flag was encoded as 1 in inter cu */
857             ASSERT(1 == ps_enc_tu->s_tu.b1_y_cbf);
858             cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
859         }
860 
861         /*******************************************************************/
862         /* code qp delta conditionally if following conditions are true    */
863         /* any cbf coded (luma/cb/cr) and qp_delta_coded is 0 for this cu  */
864         /* see section 7.3.12 Transform unit Syntax                        */
865         /*******************************************************************/
866         {
867             WORD32 cbf_chroma = (ai4_cbf_cb[0] || ai4_cbf_cr[0]) ||
868                                 (u1_is_422 && (ai4_cbf_cb[1] || ai4_cbf_cr[1]));
869 
870             if((cbf_luma || cbf_chroma) && encode_delta_qp)
871             {
872                 WORD32 tu_qp = ps_enc_tu->s_tu.b7_qp;
873                 WORD32 qp_pred, qp_left, qp_top;
874                 WORD32 qp_delta = tu_qp - ps_entropy_ctxt->i1_cur_qp;
875                 WORD32 x_nbr_indx, y_nbr_indx;
876 
877                 /* Added code for handling the QP neighbour population depending
878                    on the diff_cu_qp_delta_depth: Lokesh  */
879                 /* minus 2 becoz the pos_x and pos_y are given in the order of
880                  * 8x8 blocks rather than pixels */
881                 WORD32 log2_min_cu_qp_delta_size =
882                     ps_entropy_ctxt->i1_log2_ctb_size -
883                     ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
884                 //WORD32 min_cu_qp_delta_size = 1 << log2_min_cu_qp_delta_size;
885 
886                 //WORD32 curr_pos_x = ps_enc_cu->b3_cu_pos_x << 3;
887                 //WORD32 curr_pos_y = ps_enc_cu->b3_cu_pos_y << 3;
888 
889                 WORD32 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
890 
891                 ps_entropy_ctxt->i4_qg_pos_x = ps_enc_cu->b3_cu_pos_x & block_addr_align;
892                 ps_entropy_ctxt->i4_qg_pos_y = ps_enc_cu->b3_cu_pos_y & block_addr_align;
893 
894                 x_nbr_indx = ps_entropy_ctxt->i4_qg_pos_x - 1;
895                 y_nbr_indx = ps_entropy_ctxt->i4_qg_pos_y - 1;
896 
897                 if(ps_entropy_ctxt->i4_qg_pos_x > 0)
898                 {
899                     // clang-format off
900                     qp_left =
901                         ps_entropy_ctxt->ai4_8x8_cu_qp[x_nbr_indx +
902                                             (ps_entropy_ctxt->i4_qg_pos_y * 8)];
903                     // clang-format on
904                 }
905                 if(ps_entropy_ctxt->i4_qg_pos_y > 0)
906                 {
907                     // clang-format off
908                     qp_top = ps_entropy_ctxt->ai4_8x8_cu_qp[ps_entropy_ctxt->i4_qg_pos_x +
909                                                  y_nbr_indx * 8];
910                     // clang-format on
911                 }
912                 if(ps_entropy_ctxt->i4_qg_pos_x == 0)
913                 {
914                     /*previous coded Qp*/
915                     qp_left = ps_entropy_ctxt->i1_cur_qp;
916                 }
917                 if(ps_entropy_ctxt->i4_qg_pos_y == 0)
918                 {
919                     /*previous coded Qp*/
920                     qp_top = ps_entropy_ctxt->i1_cur_qp;
921                 }
922 
923                 qp_pred = (qp_left + qp_top + 1) >> 1;
924                 // clang-format off
925                 /* start of every frame encode qp delta wrt slice qp when entrop
926                  * sync is enabled */
927                 if(ps_entropy_ctxt->i4_ctb_x == 0 &&
928                     ps_entropy_ctxt->i4_qg_pos_x == 0 &&
929                     ps_entropy_ctxt->i4_qg_pos_y == 0 &&
930                     ps_entropy_ctxt->s_cabac_ctxt.i1_entropy_coding_sync_enabled_flag)
931                 // clang-format on
932                 {
933                     qp_pred = ps_entropy_ctxt->ps_slice_hdr->i1_slice_qp_delta +
934                               ps_entropy_ctxt->ps_pps->i1_pic_init_qp;
935                 }
936                 qp_delta = tu_qp - qp_pred;
937 
938                 /*PIC INFO : Populate QP delta bits*/
939                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
940 
941                 /* code the qp delta */
942                 ret |= ihevce_cabac_encode_qp_delta(ps_cabac, qp_delta);
943 
944                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
945                 {
946                     // clang-format off
947                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_qp_delta_bits +=
948                         (ps_cabac->u4_bits_estimated_q12 -
949                             u4_bits_estimated_prev);
950                     // clang-format on
951                 }
952 
953                 ps_entropy_ctxt->i1_cur_qp = tu_qp;
954                 //ps_entropy_ctxt->i1_cur_qp = Qp_pred;
955                 ps_entropy_ctxt->i1_encode_qp_delta = 0;
956                 //ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
957             }
958 
959             if(cbf_luma || cbf_chroma)
960             {
961                 ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
962             }
963 
964             /* code the residue of for luma and chroma tu based on cbf */
965             if((cbf_luma) && (1 == ps_entropy_ctxt->i4_enable_res_encode))
966             {
967                 u4_bits_estimated_prev = ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
968                 /* code the luma residue */
969                 pv_coeff = (void *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->i4_luma_coeff_offset);
970 
971                 ret |= ihevce_cabac_residue_encode(ps_entropy_ctxt, pv_coeff, log2_tr_size, 1);
972 
973                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
974                 {  // clang-format off
975                     /*PIC INFO : Populate Residue Luma Bits*/
976                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_luma_bits +=
977                         (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
978                             u4_bits_estimated_prev);
979                 }  // clang-format on
980             }
981 
982             /* code chroma residue based on tranform size                  */
983             /* For Inta 4x4 pu chroma is coded after all 4 luma blks coded */
984             /* Note: chroma not encoded in rdopt mode                      */
985             if(((log2_tr_size > 2) || (3 == blk_num)) /* &&
986                 (CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode) */
987             )
988             {
989                 WORD32 log2_chroma_tr_size;
990                 WORD32 i4_subtu_idx;
991                 void *pv_coeff_cb, *pv_coeff_cr;
992 
993                 WORD32 i4_num_subtus = u1_is_422 + 1;
994 
995                 if(1 == ps_entropy_ctxt->i4_enable_res_encode)
996                 {
997                     for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
998                     {
999                         if(ai4_cbf_cb[i4_subtu_idx])
1000                         {
1001                             /* initailize chroma transform size and coeff based
1002                              * on luma size */
1003                             if(2 == log2_tr_size)
1004                             {
1005                                 /*********************************************************/
1006                                 /* For Intra 4x4, chroma transform size is 4 and chroma  */
1007                                 /* coeff offset is present  in the first Luma block      */
1008                                 /*********************************************************/
1009                                 log2_chroma_tr_size = 2;
1010 
1011                                 /* -3 is for going to first luma tu of the 4 TUs in min CU */
1012                                 pv_coeff_cb =
1013                                     (void
1014                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cb_coeff_offset[i4_subtu_idx]);
1015                             }
1016                             else
1017                             {
1018                                 log2_chroma_tr_size = (log2_tr_size - 1);
1019 
1020                                 pv_coeff_cb =
1021                                     (void
1022                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cb_coeff_offset[i4_subtu_idx]);
1023                             }
1024                             // clang-format off
1025                             u4_bits_estimated_prev =
1026                                 ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
1027                             // clang-format on
1028                             /* code the cb residue */
1029                             ret |= ihevce_cabac_residue_encode(
1030                                 ps_entropy_ctxt, pv_coeff_cb, log2_chroma_tr_size, 0);
1031 
1032                             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
1033                             {  // clang-format off
1034                                 /*PIC INFO : Populate Residue Chroma cr Bits*/
1035                                 ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
1036                                     (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
1037                                         u4_bits_estimated_prev);
1038                             }  // clang-format on
1039                         }
1040                     }
1041                 }
1042 
1043                 if(1 == ps_entropy_ctxt->i4_enable_res_encode)
1044                 {
1045                     for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
1046                     {
1047                         if(ai4_cbf_cr[i4_subtu_idx])
1048                         {
1049                             /* initailize chroma transform size and coeff based on luma size */
1050                             if(2 == log2_tr_size)
1051                             {
1052                                 /*********************************************************/
1053                                 /* For Intra 4x4, chroma transform size is 4 and chroma  */
1054                                 /* coeff offset is present  in the first Luma block      */
1055                                 /*********************************************************/
1056                                 log2_chroma_tr_size = 2;
1057 
1058                                 pv_coeff_cr =
1059                                     (void
1060                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cr_coeff_offset[i4_subtu_idx]);
1061                             }
1062                             else
1063                             {
1064                                 log2_chroma_tr_size = (log2_tr_size - 1);
1065 
1066                                 pv_coeff_cr =
1067                                     (void
1068                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cr_coeff_offset[i4_subtu_idx]);
1069                             }
1070                             // clang-format off
1071                             u4_bits_estimated_prev =
1072                                 ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
1073                             // clang-format on
1074                             /* code the cb residue */
1075                             ret |= ihevce_cabac_residue_encode(
1076                                 ps_entropy_ctxt, pv_coeff_cr, log2_chroma_tr_size, 0);
1077                             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
1078                             {  // clang-format off
1079                                 /*PIC INFO : Populate Residue Chroma cr Bits*/
1080                                 ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
1081                                     (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
1082                                         u4_bits_estimated_prev);
1083                             }  // clang-format on
1084                         }
1085                     }
1086                 }
1087             }
1088         }
1089 
1090         /* update tu_idx after encoding current tu */
1091         ps_entropy_ctxt->i4_tu_idx++;
1092     }
1093 
1094     return ret;
1095 }
1096 
1097 /**
1098 ******************************************************************************
1099 *
1100 *  @brief Encodes a transform residual block as per section 7.3.13
1101 *
1102 *  @par   Description
1103 *   The residual block is read from a compressed coeff buffer populated during
1104 *   the scanning of the quantized coeffs. The contents of the buffer are
1105 *   breifly explained in param description of pv_coeff
1106 *
1107 *  @remarks Does not support sign data hiding and transform skip flag currently
1108 *
1109 *  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
1110 *           HM.8.0-dev for related abs_greater_than_1 context initialization
1111 *           and rice_max paramtere used for coeff abs level remaining
1112 *
1113 *  @param[inout]   ps_entropy_ctxt
1114 *  pointer to entropy context (handle)
1115 *
1116 *  @param[in]      pv_coeff
1117 *  Compressed residue buffer containing following information:
1118 *
1119 *  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
1120 *
1121 *  For each 4x4 subblock starting from last_subblock_num (in scan order)
1122 *     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
1123 *
1124 *    `If cur_csbf
1125 *      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
1126 *      Read 2 bytes : abs_gt1_flags (max of 8 only)
1127 *      Read 2 bytes : coeff_sign_flags
1128 *
1129 *      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
1130 *      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
1131 *
1132 *  @param[in]      log2_tr_size
1133 *  transform size of the current TU
1134 *
1135 *  @param[in]      is_luma
1136 *  boolean indicating if the texture type is luma / chroma
1137 *
1138 *
1139 *  @return      success or failure error code
1140 *
1141 ******************************************************************************
1142 */
ihevce_cabac_residue_encode(entropy_context_t * ps_entropy_ctxt,void * pv_coeff,WORD32 log2_tr_size,WORD32 is_luma)1143 WORD32 ihevce_cabac_residue_encode(
1144     entropy_context_t *ps_entropy_ctxt, void *pv_coeff, WORD32 log2_tr_size, WORD32 is_luma)
1145 {
1146     WORD32 ret = IHEVCE_SUCCESS;
1147     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
1148     WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
1149 
1150     UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
1151     UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
1152 
1153     /* last sig coeff indices in scan order */
1154     WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
1155     WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
1156 
1157     /* read the scan type : upright diag / horz / vert */
1158     WORD32 scan_type = pu1_coeff_buf_hdr[2];
1159 
1160     /************************************************************************/
1161     /* position of the last coded sub block. This sub block contains coeff  */
1162     /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
1163     /* be derived here it better to be populated by scanning module         */
1164     /************************************************************************/
1165     WORD32 last_csb = pu1_coeff_buf_hdr[3];
1166 
1167     WORD32 cur_csbf = 0, nbr_csbf;
1168     WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1169     WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
1170 
1171     WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
1172 
1173     WORD32 i;
1174 
1175     /* sanity checks */
1176     /* transform skip not supported */
1177     ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
1178 
1179     cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
1180 
1181     i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
1182 
1183     if(SCAN_VERT == scan_type)
1184     {
1185         /* last coeff x and y are swapped for vertical scan */
1186         SWAP(last_sig_coeff_x, last_sig_coeff_y);
1187     }
1188 
1189     /* Encode the last_sig_coeff_x and last_sig_coeff_y */
1190     ret |= ihevce_cabac_encode_last_coeff_x_y(
1191         ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
1192 
1193     /*************************************************************************/
1194     /* derive base context index for sig coeff as per section 9.3.3.1.4      */
1195     /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
1196     /*************************************************************************/
1197     if(is_luma)
1198     {
1199         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1200         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1201 
1202         if(3 == log2_tr_size)
1203         {
1204             /* 8x8 transform size */
1205             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
1206         }
1207         else if(3 < log2_tr_size)
1208         {
1209             /* larger transform sizes */
1210             sig_coeff_base_ctxt += 21;
1211         }
1212     }
1213     else
1214     {
1215         /* chroma context initializations */
1216         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1217         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1218 
1219         if(3 == log2_tr_size)
1220         {
1221             /* 8x8 transform size */
1222             sig_coeff_base_ctxt += 9;
1223         }
1224         else if(3 < log2_tr_size)
1225         {
1226             /* larger transform sizes */
1227             sig_coeff_base_ctxt += 12;
1228         }
1229     }
1230 
1231     /* go to csbf flags */
1232     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
1233 
1234     /************************************************************************/
1235     /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
1236     /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
1237     /************************************************************************/
1238     for(i = last_csb; i >= 0; i--)
1239     {
1240         UWORD16 u2_marker_csbf;
1241         WORD32 ctxt_idx;
1242 
1243         u2_marker_csbf = *pu2_sig_coeff_buf;
1244         pu2_sig_coeff_buf++;
1245 
1246         /* sanity checks for marker present in every csbf flag */
1247         ASSERT((u2_marker_csbf >> 4) == 0xBAD);
1248 
1249         /* extract the current and neigbour csbf flags */
1250         cur_csbf = u2_marker_csbf & 0x1;
1251         nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
1252 
1253         /*********************************************************************/
1254         /* code the csbf flags; last and first csb not sent as it is derived */
1255         /*********************************************************************/
1256         if((i < last_csb) && (i > 0))
1257         {
1258             ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1259 
1260             /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1261             ctxt_idx += nbr_csbf ? 1 : 0;
1262             ctxt_idx += is_luma ? 0 : 2;
1263 
1264             ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
1265             AEV_TRACE("coded_sub_block_flag", cur_csbf, ps_cabac->u4_range);
1266         }
1267         else
1268         {
1269             /* sanity check, this csb contains the last_sig_coeff */
1270             if(i == last_csb)
1271             {
1272                 ASSERT(cur_csbf == 1);
1273             }
1274         }
1275 
1276         if(cur_csbf)
1277         {
1278             /*****************************************************************/
1279             /* encode the sig coeff map as per section 7.3.13                */
1280             /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
1281             /*****************************************************************/
1282 
1283             /* Added for Sign bit data hiding*/
1284             WORD32 first_scan_pos = 16;
1285             WORD32 last_scan_pos = -1;
1286             WORD32 sign_hidden = 0;
1287 
1288             UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
1289             WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
1290             WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
1291 
1292             WORD32 sig_coeff_map = u2_gt0_flags;
1293 
1294             WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
1295 
1296             WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
1297             WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
1298 
1299             WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
1300             WORD32 bit; /* temp boolean */
1301 
1302             /* total count of coeffs to be coded as abs level remaining */
1303             WORD32 num_coeffs_remaining = 0;
1304 
1305             /* count of coeffs to be coded as  abslevel-1 */
1306             WORD32 num_coeffs_base1 = 0;
1307             WORD32 scan_pos;
1308             WORD32 first_gt1_coeff = 0;
1309 
1310             if((i != 0) || (0 == last_csb))
1311             {
1312                 /* sanity check, atleast one coeff is coded as csbf is set */
1313                 ASSERT(sig_coeff_map != 0);
1314             }
1315 
1316             pu2_sig_coeff_buf += 3;
1317 
1318             scan_pos = 15;
1319             if(i == last_csb)
1320             {
1321                 /*************************************************************/
1322                 /* clear last_scan_pos for last block in scan order as this  */
1323                 /* is communicated  throught last_coeff_x and last_coeff_y   */
1324                 /*************************************************************/
1325                 WORD32 next_sig = CLZ(sig_coeff_map) + 1;
1326 
1327                 scan_pos = WORD_SIZE - next_sig;
1328 
1329                 /* prepare the bins for gt1 flags */
1330                 EXTRACT_BIT(bit, gt1_flags, scan_pos);
1331 
1332                 /* insert gt1 bin in lsb */
1333                 gt1_bins |= bit;
1334 
1335                 /* prepare the bins for sign flags */
1336                 EXTRACT_BIT(bit, sign_flags, scan_pos);
1337 
1338                 /* insert sign bin in lsb */
1339                 sign_bins |= bit;
1340 
1341                 sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
1342 
1343                 if(-1 == last_scan_pos)
1344                     last_scan_pos = scan_pos;
1345 
1346                 scan_pos--;
1347                 num_coded++;
1348             }
1349 
1350             /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
1351             infer_coeff = (i < last_csb) && (i > 0);
1352 
1353             /* encode the required sigcoeff flags (abslevel > 0)   */
1354             while(scan_pos >= 0)
1355             {
1356                 WORD32 y_pos_x_pos;
1357                 WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
1358 
1359                 WORD32 sig_coeff;
1360 
1361                 EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
1362 
1363                 /* derive the x,y pos */
1364                 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
1365 
1366                 /* derive the context inc as per section 9.3.3.1.4 */
1367                 if(2 == log2_tr_size)
1368                 {
1369                     /* 4x4 transform size increment uses lookup */
1370                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1371                 }
1372                 else if(scan_pos || i)
1373                 {
1374                     /* ctxt for AC coeff depends on curpos and neigbour csbf */
1375                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
1376 
1377                     /* based on luma subblock pos */
1378                     sig_ctxinc += (i && is_luma) ? 3 : 0;
1379                 }
1380                 else
1381                 {
1382                     /* DC coeff has fixed context for luma and chroma */
1383                     sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
1384                                                   : IHEVC_CAB_COEFF_FLAG + 27;
1385                 }
1386 
1387                 /*************************************************************/
1388                 /* encode sig coeff only if required                         */
1389                 /* decoder infers 0,0 coeff when all the other coeffs are 0  */
1390                 /*************************************************************/
1391                 if(scan_pos || (!infer_coeff))
1392                 {
1393                     ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
1394                     ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
1395                     AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
1396                 }
1397 
1398                 if(sig_coeff)
1399                 {
1400                     /* prepare the bins for gt1 flags */
1401                     EXTRACT_BIT(bit, gt1_flags, scan_pos);
1402 
1403                     /* shift and insert gt1 bin in lsb */
1404                     gt1_bins <<= 1;
1405                     gt1_bins |= bit;
1406 
1407                     /* prepare the bins for sign flags */
1408                     EXTRACT_BIT(bit, sign_flags, scan_pos);
1409 
1410                     /* shift and insert sign bin in lsb */
1411                     sign_bins <<= 1;
1412                     sign_bins |= bit;
1413 
1414                     num_coded++;
1415 
1416                     /* 0,0 coeff can no more be inferred :( */
1417                     infer_coeff = 0;
1418 
1419                     if(-1 == last_scan_pos)
1420                         last_scan_pos = scan_pos;
1421 
1422                     first_scan_pos = scan_pos;
1423                 }
1424 
1425                 scan_pos--;
1426             }
1427 
1428             /* Added for sign bit hiding*/
1429             sign_hidden = ((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag);
1430 
1431             /****************************************************************/
1432             /* encode the abs level greater than 1 bins; Section 7.3.13     */
1433             /* These have already been prepared during sig_coeff_map encode */
1434             /* Context modelling done as per section 9.3.3.1.5              */
1435             /****************************************************************/
1436             {
1437                 WORD32 j;
1438 
1439                 /* context set based on luma subblock pos */
1440                 WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
1441 
1442                 /* count of coeffs with abslevel > 1; max of 8 to be coded */
1443                 WORD32 num_gt1_bins = MIN(8, num_coded);
1444 
1445                 if(num_coded > 8)
1446                 {
1447                     /* pull back the bins to required number */
1448                     gt1_bins >>= (num_coded - 8);
1449 
1450                     num_coeffs_remaining += (num_coded - 8);
1451                     num_coeffs_base1 = (num_coded - 8);
1452                 }
1453 
1454                 /* See section 9.3.3.1.5           */
1455                 ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
1456 
1457                 gt1_ctxt = 1;
1458 
1459                 for(j = num_gt1_bins - 1; j >= 0; j--)
1460                 {
1461                     /* Encodet the abs level gt1 bins */
1462                     ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1463 
1464                     EXTRACT_BIT(bit, gt1_bins, j);
1465 
1466                     ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
1467 
1468                     AEV_TRACE("coeff_abs_level_greater1_flag", bit, ps_cabac->u4_range);
1469 
1470                     if(bit)
1471                     {
1472                         gt1_ctxt = 0;
1473                         num_coeffs_remaining++;
1474                     }
1475                     else if(gt1_ctxt && (gt1_ctxt < 3))
1476                     {
1477                         gt1_ctxt++;
1478                     }
1479                 }
1480 
1481                 /*************************************************************/
1482                 /* encode abs level greater than 2 bin; Section 7.3.13       */
1483                 /*************************************************************/
1484                 if(gt1_bins)
1485                 {
1486                     WORD32 gt2_bin;
1487 
1488                     first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
1489                     gt2_bin = (first_gt1_coeff > 2);
1490 
1491                     /* atleast one level > 2 */
1492                     ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
1493 
1494                     ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
1495 
1496                     ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
1497 
1498                     if(!gt2_bin)
1499                     {
1500                         /* sanity check */
1501                         ASSERT(first_gt1_coeff == 2);
1502 
1503                         /* no need to send this coeff as bypass bins */
1504                         pu2_sig_coeff_buf++;
1505                         num_coeffs_remaining--;
1506                     }
1507 
1508                     AEV_TRACE("coeff_abs_level_greater2_flag", gt2_bin, ps_cabac->u4_range);
1509                 }
1510             }
1511 
1512             /*************************************************************/
1513             /* encode the coeff signs and abs remaing levels             */
1514             /*************************************************************/
1515             if(num_coded)
1516             {
1517                 WORD32 base_level;
1518                 WORD32 rice_param = 0;
1519                 WORD32 j;
1520 
1521                 /*************************************************************/
1522                 /* encode the coeff signs populated in sign_bins             */
1523                 /*************************************************************/
1524 
1525                 if(sign_hidden && i4_sign_data_hiding_flag)
1526                 {
1527                     sign_bins >>= 1;
1528                     num_coded--;
1529                 }
1530 
1531                 if(num_coded > 0)
1532                 {
1533                     ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
1534                 }
1535 
1536                 AEV_TRACE("sign_flags", sign_bins, ps_cabac->u4_range);
1537 
1538                 /*************************************************************/
1539                 /* encode the coeff_abs_level_remaining as TR / EGK bins     */
1540                 /* See section 9.3.2.7 for details                           */
1541                 /*************************************************************/
1542 
1543                 /* first remaining coeff baselevel */
1544                 if(first_gt1_coeff > 2)
1545                 {
1546                     base_level = 3;
1547                 }
1548                 else if(num_coeffs_remaining > num_coeffs_base1)
1549                 {
1550                     /* atleast one coeff in first 8 is gt > 1 */
1551                     base_level = 2;
1552                 }
1553                 else
1554                 {
1555                     /* all coeffs have base of 1 */
1556                     base_level = 1;
1557                 }
1558 
1559                 for(j = 0; j < num_coeffs_remaining; j++)
1560                 {
1561                     WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
1562                     WORD32 abs_coeff_rem;
1563                     WORD32 rice_max = (4 << rice_param);
1564 
1565                     pu2_sig_coeff_buf++;
1566 
1567                     /* sanity check */
1568                     ASSERT(abs_coeff >= base_level);
1569 
1570                     abs_coeff_rem = (abs_coeff - base_level);
1571 
1572                     /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
1573                     /* TODO://HM-8.0-dev does either TR or EGK but not both */
1574                     if(abs_coeff_rem >= rice_max)
1575                     {
1576                         UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
1577 
1578                         /* coeff exceeds max rice limit                    */
1579                         /* encode the TR prefix as tunary code             */
1580                         /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
1581                         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
1582 
1583                         /* encode the exponential golomb code suffix */
1584                         ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
1585                     }
1586                     else
1587                     {
1588                         /* code coeff as truncated rice code  */
1589                         ret |= ihevce_cabac_encode_trunc_rice(
1590                             ps_cabac, abs_coeff_rem, rice_param, rice_max);
1591                     }
1592 
1593                     AEV_TRACE("coeff_abs_level_remaining", abs_coeff_rem, ps_cabac->u4_range);
1594 
1595                     /* update the rice param based on coeff level */
1596                     if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
1597                     {
1598                         rice_param++;
1599                     }
1600 
1601                     /* change base level to 1 if more than 8 coded coeffs */
1602                     if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
1603                     {
1604                         base_level = 2;
1605                     }
1606                     else
1607                     {
1608                         base_level = 1;
1609                     }
1610                 }
1611             }
1612         }
1613     }
1614     /*tap texture bits*/
1615     if(ps_cabac->e_cabac_op_mode == CABAC_MODE_COMPUTE_BITS)
1616     {  // clang-format off
1617         ps_cabac->u4_texture_bits_estimated_q12 +=
1618             (ps_cabac->u4_bits_estimated_q12 -
1619                 ps_cabac->u4_header_bits_estimated_q12);  //(ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
1620     }  // clang-format on
1621 
1622     return (ret);
1623 }
1624 
1625 /**
1626 ******************************************************************************
1627 *
1628 *  @brief Get the bits estimate for a transform residual block as per section
1629 *   7.3.13
1630 *
1631 *  @par   Description
1632 *   The residual block is read from a compressed coeff buffer populated during
1633 *   the scanning of the quantized coeffs. The contents of the buffer are
1634 *   breifly explained in param description of pv_coeff
1635 *
1636 *  @remarks Does not support sign data hiding and transform skip flag currently
1637 *
1638 *  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
1639 *           HM.8.0-dev for related abs_greater_than_1 context initialization
1640 *           and rice_max paramtere used for coeff abs level remaining
1641 *
1642 *  @param[inout]   ps_entropy_ctxt
1643 *  pointer to entropy context (handle)
1644 *
1645 *  @param[in]      pv_coeff
1646 *  Compressed residue buffer containing following information:
1647 *
1648 *  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
1649 *
1650 *  For each 4x4 subblock starting from last_subblock_num (in scan order)
1651 *     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
1652 *
1653 *    `If cur_csbf
1654 *      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
1655 *      Read 2 bytes : abs_gt1_flags (max of 8 only)
1656 *      Read 2 bytes : coeff_sign_flags
1657 *
1658 *      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
1659 *      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
1660 *
1661 *  @param[in]      log2_tr_size
1662 *  transform size of the current TU
1663 *
1664 *  @param[in]      is_luma
1665 *  boolean indicating if the texture type is luma / chroma
1666 *
1667 *
1668 *  @return      success or failure error code
1669 *
1670 ******************************************************************************
1671 */
ihevce_cabac_residue_encode_rdopt(entropy_context_t * ps_entropy_ctxt,void * pv_coeff,WORD32 log2_tr_size,WORD32 is_luma,WORD32 perform_sbh)1672 WORD32 ihevce_cabac_residue_encode_rdopt(
1673     entropy_context_t *ps_entropy_ctxt,
1674     void *pv_coeff,
1675     WORD32 log2_tr_size,
1676     WORD32 is_luma,
1677     WORD32 perform_sbh)
1678 {
1679     WORD32 ret = IHEVCE_SUCCESS;
1680     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
1681     UWORD32 temp_tex_bits_q12;
1682     WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
1683 
1684     UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
1685     UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
1686 
1687     /* last sig coeff indices in scan order */
1688     WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
1689     WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
1690 
1691     /* read the scan type : upright diag / horz / vert */
1692     WORD32 scan_type = pu1_coeff_buf_hdr[2];
1693 
1694     /************************************************************************/
1695     /* position of the last coded sub block. This sub block contains coeff  */
1696     /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
1697     /* be derived here it better to be populated by scanning module         */
1698     /************************************************************************/
1699     WORD32 last_csb = pu1_coeff_buf_hdr[3];
1700 
1701     WORD32 cur_csbf = 0, nbr_csbf;
1702     WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1703     WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
1704 
1705     WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
1706 
1707     WORD32 i;
1708 
1709     UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1710 
1711     /* sanity checks */
1712     /* transform skip not supported */
1713     ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
1714 
1715     cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
1716 
1717     i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
1718 
1719     {
1720         temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
1721     }
1722 
1723     if(SCAN_VERT == scan_type)
1724     {
1725         /* last coeff x and y are swapped for vertical scan */
1726         SWAP(last_sig_coeff_x, last_sig_coeff_y);
1727     }
1728 
1729     /* Encode the last_sig_coeff_x and last_sig_coeff_y */
1730     ret |= ihevce_cabac_encode_last_coeff_x_y(
1731         ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
1732 
1733     /*************************************************************************/
1734     /* derive base context index for sig coeff as per section 9.3.3.1.4      */
1735     /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
1736     /*************************************************************************/
1737     if(is_luma)
1738     {
1739         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1740         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1741 
1742         if(3 == log2_tr_size)
1743         {
1744             /* 8x8 transform size */
1745             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
1746         }
1747         else if(3 < log2_tr_size)
1748         {
1749             /* larger transform sizes */
1750             sig_coeff_base_ctxt += 21;
1751         }
1752     }
1753     else
1754     {
1755         /* chroma context initializations */
1756         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1757         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1758 
1759         if(3 == log2_tr_size)
1760         {
1761             /* 8x8 transform size */
1762             sig_coeff_base_ctxt += 9;
1763         }
1764         else if(3 < log2_tr_size)
1765         {
1766             /* larger transform sizes */
1767             sig_coeff_base_ctxt += 12;
1768         }
1769     }
1770 
1771     /* go to csbf flags */
1772     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
1773 
1774     /************************************************************************/
1775     /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
1776     /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
1777     /************************************************************************/
1778     for(i = last_csb; i >= 0; i--)
1779     {
1780         UWORD16 u2_marker_csbf;
1781         WORD32 ctxt_idx;
1782 
1783         u2_marker_csbf = *pu2_sig_coeff_buf;
1784         pu2_sig_coeff_buf++;
1785 
1786         /* sanity checks for marker present in every csbf flag */
1787         ASSERT((u2_marker_csbf >> 4) == 0xBAD);
1788 
1789         /* extract the current and neigbour csbf flags */
1790         cur_csbf = u2_marker_csbf & 0x1;
1791         nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
1792 
1793         /*********************************************************************/
1794         /* code the csbf flags; last and first csb not sent as it is derived */
1795         /*********************************************************************/
1796         if((i < last_csb) && (i > 0))
1797         {
1798             ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1799 
1800             /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1801             ctxt_idx += nbr_csbf ? 1 : 0;
1802             ctxt_idx += is_luma ? 0 : 2;
1803 
1804             {
1805                 WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
1806 
1807                 /* increment bits generated based on state and bin encoded */
1808                 ps_cabac->u4_bits_estimated_q12 +=
1809                     gau2_ihevce_cabac_bin_to_bits[state_mps ^ cur_csbf];
1810 
1811                 /* update the context model from state transition LUT */
1812                 pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | cur_csbf];
1813             }
1814         }
1815         else
1816         {
1817             /* sanity check, this csb contains the last_sig_coeff */
1818             if(i == last_csb)
1819             {
1820                 ASSERT(cur_csbf == 1);
1821             }
1822         }
1823 
1824         if(cur_csbf)
1825         {
1826             /*****************************************************************/
1827             /* encode the sig coeff map as per section 7.3.13                */
1828             /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
1829             /*****************************************************************/
1830 
1831             /* Added for Sign bit data hiding*/
1832             WORD32 first_scan_pos = 16;
1833             WORD32 last_scan_pos = -1;
1834             WORD32 sign_hidden;
1835 
1836             UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
1837             WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
1838             WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
1839 
1840             WORD32 sig_coeff_map = u2_gt0_flags;
1841 
1842             WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
1843 
1844             WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
1845             WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
1846 
1847             WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
1848             WORD32 bit; /* temp boolean */
1849 
1850             /* total count of coeffs to be coded as abs level remaining */
1851             WORD32 num_coeffs_remaining = 0;
1852 
1853             /* count of coeffs to be coded as  abslevel-1 */
1854             WORD32 num_coeffs_base1 = 0;
1855             WORD32 scan_pos;
1856             WORD32 first_gt1_coeff = 0;
1857 
1858             if((i != 0) || (0 == last_csb))
1859             {
1860                 /* sanity check, atleast one coeff is coded as csbf is set */
1861                 ASSERT(sig_coeff_map != 0);
1862             }
1863 
1864             pu2_sig_coeff_buf += 3;
1865 
1866             scan_pos = 15;
1867             if(i == last_csb)
1868             {
1869                 /*************************************************************/
1870                 /* clear last_scan_pos for last block in scan order as this  */
1871                 /* is communicated  throught last_coeff_x and last_coeff_y   */
1872                 /*************************************************************/
1873                 WORD32 next_sig = CLZ(sig_coeff_map) + 1;
1874 
1875                 scan_pos = WORD_SIZE - next_sig;
1876 
1877                 /* prepare the bins for gt1 flags */
1878                 EXTRACT_BIT(bit, gt1_flags, scan_pos);
1879 
1880                 /* insert gt1 bin in lsb */
1881                 gt1_bins |= bit;
1882 
1883                 /* prepare the bins for sign flags */
1884                 EXTRACT_BIT(bit, sign_flags, scan_pos);
1885 
1886                 /* insert sign bin in lsb */
1887                 sign_bins |= bit;
1888 
1889                 sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
1890 
1891                 if(-1 == last_scan_pos)
1892                     last_scan_pos = scan_pos;
1893 
1894                 scan_pos--;
1895                 num_coded++;
1896             }
1897 
1898             /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
1899             infer_coeff = (i < last_csb) && (i > 0);
1900 
1901             /* encode the required sigcoeff flags (abslevel > 0)   */
1902             while(scan_pos >= 0)
1903             {
1904                 WORD32 y_pos_x_pos;
1905                 WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
1906 
1907                 WORD32 sig_coeff;
1908 
1909                 EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
1910 
1911                 /* derive the x,y pos */
1912                 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
1913 
1914                 /* derive the context inc as per section 9.3.3.1.4 */
1915                 if(2 == log2_tr_size)
1916                 {
1917                     /* 4x4 transform size increment uses lookup */
1918                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1919                 }
1920                 else if(scan_pos || i)
1921                 {
1922                     /* ctxt for AC coeff depends on curpos and neigbour csbf */
1923                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
1924 
1925                     /* based on luma subblock pos */
1926                     sig_ctxinc += (i && is_luma) ? 3 : 0;
1927                 }
1928                 else
1929                 {
1930                     /* DC coeff has fixed context for luma and chroma */
1931                     sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
1932                                                   : IHEVC_CAB_COEFF_FLAG + 27;
1933                 }
1934 
1935                 /*************************************************************/
1936                 /* encode sig coeff only if required                         */
1937                 /* decoder infers 0,0 coeff when all the other coeffs are 0  */
1938                 /*************************************************************/
1939                 if(scan_pos || (!infer_coeff))
1940                 {
1941                     ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
1942 
1943                     //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
1944                     {
1945                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
1946 
1947                         /* increment bits generated based on state and bin encoded */
1948                         ps_cabac->u4_bits_estimated_q12 +=
1949                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
1950 
1951                         /* update the context model from state transition LUT */
1952                         pu1_ctxt_model[ctxt_idx] =
1953                             gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
1954                     }
1955                 }
1956 
1957                 if(sig_coeff)
1958                 {
1959                     /* prepare the bins for gt1 flags */
1960                     EXTRACT_BIT(bit, gt1_flags, scan_pos);
1961 
1962                     /* shift and insert gt1 bin in lsb */
1963                     gt1_bins <<= 1;
1964                     gt1_bins |= bit;
1965 
1966                     /* prepare the bins for sign flags */
1967                     EXTRACT_BIT(bit, sign_flags, scan_pos);
1968 
1969                     /* shift and insert sign bin in lsb */
1970                     sign_bins <<= 1;
1971                     sign_bins |= bit;
1972 
1973                     num_coded++;
1974 
1975                     /* 0,0 coeff can no more be inferred :( */
1976                     infer_coeff = 0;
1977 
1978                     if(-1 == last_scan_pos)
1979                         last_scan_pos = scan_pos;
1980 
1981                     first_scan_pos = scan_pos;
1982                 }
1983 
1984                 scan_pos--;
1985             }
1986 
1987             /* Added for sign bit hiding*/
1988             sign_hidden =
1989                 (((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag) && (perform_sbh));
1990 
1991             /****************************************************************/
1992             /* encode the abs level greater than 1 bins; Section 7.3.13     */
1993             /* These have already been prepared during sig_coeff_map encode */
1994             /* Context modelling done as per section 9.3.3.1.5              */
1995             /****************************************************************/
1996             {
1997                 WORD32 j;
1998 
1999                 /* context set based on luma subblock pos */
2000                 WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
2001 
2002                 /* count of coeffs with abslevel > 1; max of 8 to be coded */
2003                 WORD32 num_gt1_bins = MIN(8, num_coded);
2004 
2005                 if(num_coded > 8)
2006                 {
2007                     /* pull back the bins to required number */
2008                     gt1_bins >>= (num_coded - 8);
2009 
2010                     num_coeffs_remaining += (num_coded - 8);
2011                     num_coeffs_base1 = (num_coded - 8);
2012                 }
2013 
2014                 /* See section 9.3.3.1.5           */
2015                 ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
2016 
2017                 gt1_ctxt = 1;
2018 
2019                 for(j = num_gt1_bins - 1; j >= 0; j--)
2020                 {
2021                     /* Encodet the abs level gt1 bins */
2022                     ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
2023 
2024                     EXTRACT_BIT(bit, gt1_bins, j);
2025 
2026                     //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
2027                     {
2028                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2029 
2030                         /* increment bits generated based on state and bin encoded */
2031                         ps_cabac->u4_bits_estimated_q12 +=
2032                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
2033 
2034                         /* update the context model from state transition LUT */
2035                         pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
2036                     }
2037 
2038                     if(bit)
2039                     {
2040                         gt1_ctxt = 0;
2041                         num_coeffs_remaining++;
2042                     }
2043                     else if(gt1_ctxt && (gt1_ctxt < 3))
2044                     {
2045                         gt1_ctxt++;
2046                     }
2047                 }
2048 
2049                 /*************************************************************/
2050                 /* encode abs level greater than 2 bin; Section 7.3.13       */
2051                 /*************************************************************/
2052                 if(gt1_bins)
2053                 {
2054                     WORD32 gt2_bin;
2055 
2056                     first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
2057                     gt2_bin = (first_gt1_coeff > 2);
2058 
2059                     /* atleast one level > 2 */
2060                     ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
2061 
2062                     ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
2063 
2064                     //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
2065                     {
2066                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2067 
2068                         /* increment bits generated based on state and bin encoded */
2069                         ps_cabac->u4_bits_estimated_q12 +=
2070                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
2071 
2072                         /* update the context model from state transition LUT */
2073                         pu1_ctxt_model[ctxt_idx] =
2074                             gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
2075                     }
2076 
2077                     if(!gt2_bin)
2078                     {
2079                         /* sanity check */
2080                         ASSERT(first_gt1_coeff == 2);
2081 
2082                         /* no need to send this coeff as bypass bins */
2083                         pu2_sig_coeff_buf++;
2084                         num_coeffs_remaining--;
2085                     }
2086                 }
2087             }
2088 
2089             /*************************************************************/
2090             /* encode the coeff signs and abs remaing levels             */
2091             /*************************************************************/
2092             if(num_coded)
2093             {
2094                 WORD32 base_level;
2095                 WORD32 rice_param = 0;
2096                 WORD32 j;
2097 
2098                 /*************************************************************/
2099                 /* encode the coeff signs populated in sign_bins             */
2100                 /*************************************************************/
2101                 if(sign_hidden && i4_sign_data_hiding_flag)
2102                 {
2103                     sign_bins >>= 1;
2104                     num_coded--;
2105                 }
2106 
2107                 if(num_coded > 0)
2108                 {
2109                     /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac,
2110                                                        sign_bins,
2111                                                        num_coded);
2112                     */
2113 
2114                     /* increment bits generated based on num bypass bins */
2115                     ps_cabac->u4_bits_estimated_q12 += (num_coded << CABAC_FRAC_BITS_Q);
2116                 }
2117 
2118                 /*************************************************************/
2119                 /* encode the coeff_abs_level_remaining as TR / EGK bins     */
2120                 /* See section 9.3.2.7 for details                           */
2121                 /*************************************************************/
2122 
2123                 /* first remaining coeff baselevel */
2124                 if(first_gt1_coeff > 2)
2125                 {
2126                     base_level = 3;
2127                 }
2128                 else if(num_coeffs_remaining > num_coeffs_base1)
2129                 {
2130                     /* atleast one coeff in first 8 is gt > 1 */
2131                     base_level = 2;
2132                 }
2133                 else
2134                 {
2135                     /* all coeffs have base of 1 */
2136                     base_level = 1;
2137                 }
2138 
2139                 for(j = 0; j < num_coeffs_remaining; j++)
2140                 {
2141                     WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
2142                     WORD32 abs_coeff_rem;
2143                     WORD32 rice_max = (4 << rice_param);
2144                     WORD32 num_bins, unary_length;
2145                     UWORD32 u4_sym_shiftk_plus1;
2146 
2147                     pu2_sig_coeff_buf++;
2148 
2149                     /* sanity check */
2150                     ASSERT(abs_coeff >= base_level);
2151 
2152                     abs_coeff_rem = (abs_coeff - base_level);
2153 
2154                     /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
2155                     /* TODO://HM-8.0-dev does either TR or EGK but not both */
2156                     if(abs_coeff_rem >= rice_max)
2157                     {
2158                         UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
2159 
2160                         /* coeff exceeds max rice limit                    */
2161                         /* encode the TR prefix as tunary code             */
2162                         /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
2163                         /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4); */
2164 
2165                         /* increment bits generated based on num bypass bins */
2166                         ps_cabac->u4_bits_estimated_q12 += (4 << CABAC_FRAC_BITS_Q);
2167 
2168                         /* encode the exponential golomb code suffix */
2169                         /*ret |= ihevce_cabac_encode_egk(ps_cabac,
2170                                                        u4_suffix,
2171                                                        (rice_param+1)
2172                                                       ); */
2173 
2174                         /* k = rice_param+1 */
2175                         /************************************************************************/
2176                         /* shift symbol by k bits to find unary code prefix (111110)            */
2177                         /* Use GETRANGE to elminate the while loop in sec 9.3.2.4 of HEVC spec  */
2178                         /************************************************************************/
2179                         u4_sym_shiftk_plus1 = (u4_suffix >> (rice_param + 1)) + 1;
2180 
2181                         /* GETRANGE(unary_length, (u4_sym_shiftk_plus1 + 1)); */
2182                         GETRANGE(unary_length, u4_sym_shiftk_plus1);
2183 
2184                         /* length of the code = 2 *(unary_length - 1) + 1 + k */
2185                         num_bins = (2 * unary_length) + rice_param;
2186 
2187                         /* increment bits generated based on num bypass bins */
2188                         ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
2189                     }
2190                     else
2191                     {
2192                         /* code coeff as truncated rice code  */
2193                         /* ret |= ihevce_cabac_encode_trunc_rice(ps_cabac,
2194                                                               abs_coeff_rem,
2195                                                               rice_param,
2196                                                               rice_max);
2197                                                               */
2198 
2199                         /************************************************************************/
2200                         /* shift symbol by c_rice_param bits to find unary code prefix (111.10) */
2201                         /************************************************************************/
2202                         unary_length = (abs_coeff_rem >> rice_param) + 1;
2203 
2204                         /* length of the code */
2205                         num_bins = unary_length + rice_param;
2206 
2207                         /* increment bits generated based on num bypass bins */
2208                         ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
2209                     }
2210 
2211                     /* update the rice param based on coeff level */
2212                     if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
2213                     {
2214                         rice_param++;
2215                     }
2216 
2217                     /* change base level to 1 if more than 8 coded coeffs */
2218                     if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
2219                     {
2220                         base_level = 2;
2221                     }
2222                     else
2223                     {
2224                         base_level = 1;
2225                     }
2226                 }
2227             }
2228         }
2229     }
2230     /*tap texture bits*/
2231     {
2232         ps_cabac->u4_texture_bits_estimated_q12 +=
2233             (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
2234     }
2235 
2236     return (ret);
2237 }
2238 
2239 /**
2240 ******************************************************************************
2241 *
2242 *  @brief Encodes a transform residual block as per section 7.3.13
2243 *
2244 *  @par   Description
2245 *  RDOQ optimization is carried out here. When sub-blk RDOQ is turned on, we calculate
2246 *  the distortion(D) and bits(R) for when the sub blk is coded and when not coded. We
2247 *  then use the D+lambdaR metric to decide whether the sub-blk should be coded or not, and
2248 *  aprropriately signal it. When coeff RDOQ is turned on, we traverse through the TU to
2249 *  find all non-zero coeffs. If the non zero coeff is a 1, then we make a decision(based on D+lambdaR)
2250 *  metric as to whether to code it as a 0 or 1. In case the coeff is > 1(say L where L>1) we choose betweem
2251 *  L and L+1
2252 *
2253 *  @remarks Does not support sign data hiding and transform skip flag currently
2254 *
2255 *  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
2256 *           HM.8.0-dev for related abs_greater_than_1 context initialization
2257 *           and rice_max paramtere used for coeff abs level remaining
2258 *
2259 *  @param[inout]   ps_entropy_ctxt
2260 *  pointer to entropy context (handle)
2261 *
2262 *  @param[in]      pv_coeff
2263 *  Compressed residue buffer containing following information:
2264 *
2265 *
2266 *  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
2267 *
2268 *  For each 4x4 subblock starting from last_subblock_num (in scan order)
2269 *     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
2270 *
2271 *    `If cur_csbf
2272 *      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
2273 *      Read 2 bytes : abs_gt1_flags (max of 8 only)
2274 *      Read 2 bytes : coeff_sign_flags
2275 *
2276 *      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
2277 *      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
2278 *
2279 *  @param[in]      log2_tr_size
2280 *  transform size of the current TU
2281 *
2282 *  @param[in]      is_luma
2283 *  boolean indicating if the texture type is luma / chroma
2284 *
2285 *  @param[out]    pi4_tu_coded_dist
2286 *  The distortion when the TU is coded(not all coeffs are set to 0) is stored here
2287 *
2288 *  @param[out]    pi4_tu_not_coded_dist
2289 *  The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
2290 *
2291 *
2292 *  @return      success or failure error code
2293 *
2294 ******************************************************************************
2295 */
2296 
ihevce_cabac_residue_encode_rdoq(entropy_context_t * ps_entropy_ctxt,void * pv_coeff,WORD32 log2_tr_size,WORD32 is_luma,void * pv_rdoq_ctxt,LWORD64 * pi8_tu_coded_dist,LWORD64 * pi8_tu_not_coded_dist,WORD32 perform_sbh)2297 WORD32 ihevce_cabac_residue_encode_rdoq(
2298     entropy_context_t *ps_entropy_ctxt,
2299     void *pv_coeff,
2300     WORD32 log2_tr_size,
2301     WORD32 is_luma,
2302     void *pv_rdoq_ctxt,
2303     LWORD64 *pi8_tu_coded_dist,
2304     LWORD64 *pi8_tu_not_coded_dist,
2305     WORD32 perform_sbh)
2306 {
2307     WORD32 *pi4_subBlock2csbfId_map;
2308 
2309     WORD32 ret = IHEVCE_SUCCESS;
2310 
2311     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
2312     cab_ctxt_t s_sub_blk_not_coded_cabac_ctxt;
2313     backup_ctxt_t s_backup_ctxt;
2314     backup_ctxt_t s_backup_ctxt_sub_blk_not_coded;
2315 
2316     UWORD32 temp_tex_bits_q12;
2317 
2318     UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
2319     UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
2320 
2321     LWORD64 i8_sub_blk_not_coded_dist = 0, i8_sub_blk_coded_dist = 0;
2322     WORD32 i4_sub_blk_not_coded_bits = 0, i4_sub_blk_coded_bits = 0;
2323     LWORD64 i8_sub_blk_not_coded_metric, i8_sub_blk_coded_metric;
2324     LWORD64 i8_tu_not_coded_dist = 0, i8_tu_coded_dist = 0;
2325     WORD32 i4_tu_coded_bits = 0;
2326     WORD32 temp_zero_col = 0, temp_zero_row = 0;
2327 
2328     UWORD8 *pu1_last_sig_coeff_x;
2329     UWORD8 *pu1_last_sig_coeff_y;
2330     WORD32 scan_type;
2331     WORD32 last_csb;
2332 
2333     WORD32 cur_csbf = 0, nbr_csbf;
2334     // WORD32 i4_temp_bits;
2335 
2336     WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
2337     WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
2338 
2339     UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
2340 
2341     rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
2342     WORD16 *pi2_coeffs = ps_rdoq_ctxt->pi2_quant_coeffs;
2343     WORD16 *pi2_tr_coeffs = ps_rdoq_ctxt->pi2_trans_values;
2344     WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
2345     WORD32 i4_round_val = ps_rdoq_ctxt->i4_round_val_ssd_in_td;
2346     WORD32 i4_shift_val = ps_rdoq_ctxt->i4_shift_val_ssd_in_td;
2347     WORD32 scan_idx = ps_rdoq_ctxt->i4_scan_idx;
2348 
2349     UWORD8 *pu1_csb_table, *pu1_trans_table;
2350     WORD32 shift_value, mask_value;
2351 
2352     WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
2353     WORD32 temp_gt1_ctxt = gt1_ctxt;
2354 
2355     WORD32 i;
2356 #if DISABLE_ZCSBF
2357     WORD32 i4_skip_zero_cbf = 0;
2358     WORD32 i4_skip_zero_csbf = 0;
2359     WORD32 i4_num_abs_1_coeffs = 0;
2360 #endif
2361     (void)perform_sbh;
2362     pi4_subBlock2csbfId_map = ps_rdoq_ctxt->pi4_subBlock2csbfId_map;
2363 
2364     /* scan order inside a csb */
2365     pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
2366     /*Initializing the backup_ctxt structures*/
2367     s_backup_ctxt.i4_num_bits = 0;
2368     s_backup_ctxt_sub_blk_not_coded.i4_num_bits = 0;
2369 
2370     memset(&s_backup_ctxt.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
2371     memset(&s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
2372 
2373     pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
2374     pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
2375 
2376     /* last sig coeff indices in scan order */
2377     pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
2378     pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
2379 
2380     /* read the scan type : upright diag / horz / vert */
2381     scan_type = pu1_coeff_buf_hdr[2];
2382 
2383     /************************************************************************/
2384     /* position of the last coded sub block. This sub block contains coeff  */
2385     /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
2386     /* be derived here it better to be populated by scanning module         */
2387     /************************************************************************/
2388     last_csb = pu1_coeff_buf_hdr[3];
2389 
2390     shift_value = ps_rdoq_ctxt->i4_log2_trans_size + 1;
2391     /* for finding. row no. from scan index */
2392     shift_value = shift_value - 3;
2393     /*for finding the col. no. from scan index*/
2394     mask_value = (ps_rdoq_ctxt->i4_trans_size / 4) - 1;
2395 
2396     switch(ps_rdoq_ctxt->i4_trans_size)
2397     {
2398     case 32:
2399         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
2400         break;
2401     case 16:
2402         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
2403         break;
2404     case 8:
2405         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
2406         break;
2407     case 4:
2408         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
2409         break;
2410     default:
2411         DBG_PRINTF("Invalid Trans Size\n");
2412         return -1;
2413         break;
2414     }
2415 
2416     /* sanity checks */
2417     /* transform skip not supported */
2418     ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
2419     {
2420         temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
2421     }
2422     /*************************************************************************/
2423     /* derive base context index for sig coeff as per section 9.3.3.1.4      */
2424     /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
2425     /*************************************************************************/
2426     if(is_luma)
2427     {
2428         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
2429         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
2430 
2431         if(3 == log2_tr_size)
2432         {
2433             /* 8x8 transform size */
2434             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
2435         }
2436         else if(3 < log2_tr_size)
2437         {
2438             /* larger transform sizes */
2439             sig_coeff_base_ctxt += 21;
2440         }
2441     }
2442     else
2443     {
2444         /* chroma context initializations */
2445         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
2446         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
2447 
2448         if(3 == log2_tr_size)
2449         {
2450             /* 8x8 transform size */
2451             sig_coeff_base_ctxt += 9;
2452         }
2453         else if(3 < log2_tr_size)
2454         {
2455             /* larger transform sizes */
2456             sig_coeff_base_ctxt += 12;
2457         }
2458     }
2459 
2460     /* go to csbf flags */
2461     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
2462 
2463     /*Calculating the distortion produced by all the zero coeffs in the TU*/
2464     for(i = (trans_size * trans_size) - 1; i >= 0; i--)
2465     {
2466         WORD32 i4_dist;
2467         WORD16 *pi2_orig_coeff = ps_rdoq_ctxt->pi2_trans_values;
2468 
2469         if(pi2_coeffs[i] == 0)
2470         {
2471             i4_dist = CALC_SSD_IN_TRANS_DOMAIN(pi2_orig_coeff[i], 0, 0, 0);
2472             i8_tu_not_coded_dist += i4_dist;
2473             i8_tu_coded_dist += i4_dist;
2474         }
2475     }
2476 
2477     /*Backup of the various cabac ctxts*/
2478     memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
2479     /************************************************************************/
2480     /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
2481     /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
2482     /************************************************************************/
2483 
2484     for(i = last_csb; i >= 0; i--)
2485     {
2486         UWORD16 u2_marker_csbf;
2487         WORD32 ctxt_idx;
2488         WORD32 i4_sub_blk_is_coded = 0;
2489         WORD32 blk_row, blk_col;
2490         WORD32 scaled_blk_row;
2491         WORD32 scaled_blk_col;
2492         WORD32 infer_coeff;
2493 
2494         gt1_ctxt = temp_gt1_ctxt;
2495 #if DISABLE_ZCSBF
2496         /*Initialize skip zero cbf flag to 0*/
2497         i4_skip_zero_csbf = 0;
2498         i4_num_abs_1_coeffs = 0;
2499 #endif
2500 
2501 #if OPT_MEMCPY
2502         ihevce_copy_backup_ctxt(
2503             (void *)&s_sub_blk_not_coded_cabac_ctxt,
2504             (void *)ps_cabac,
2505             (void *)&s_backup_ctxt_sub_blk_not_coded,
2506             (void *)&s_backup_ctxt);
2507         memset(s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, 5);
2508         memset(s_backup_ctxt.au1_ctxt_to_backup, 0, 5);
2509 #else
2510         memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
2511 #endif
2512         // i4_temp_bits = s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12;
2513 
2514         blk_row = pu1_trans_table[i] >> shift_value; /*row of csb*/
2515         blk_col = pu1_trans_table[i] & mask_value; /*col of csb*/
2516 
2517         scaled_blk_row = blk_row << 2;
2518         scaled_blk_col = blk_col << 2;
2519 
2520         infer_coeff = (i < last_csb) && (i > 0);
2521         u2_marker_csbf = *pu2_sig_coeff_buf;
2522 
2523         if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
2524         {
2525             if(!ps_rdoq_ctxt
2526                     ->pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
2527             {
2528                 /* clear the 2nd bit if the right csb is 0 */
2529                 u2_marker_csbf = u2_marker_csbf & (~(1 << 1));
2530             }
2531         }
2532         if((blk_row + 1 < trans_size / 4)) /* checking bottom boundary */
2533         {
2534             if(!ps_rdoq_ctxt
2535                     ->pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
2536             {
2537                 /* clear the 3rd bit if the bottom csb is 0*/
2538                 u2_marker_csbf = u2_marker_csbf & (~(1 << 2));
2539             }
2540         }
2541         pu2_sig_coeff_buf++;
2542 
2543         /* sanity checks for marker present in every csbf flag */
2544         ASSERT((u2_marker_csbf >> 4) == 0xBAD);
2545 
2546         /* extract the current and neigbour csbf flags */
2547         cur_csbf = u2_marker_csbf & 0x1;
2548         nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
2549 
2550         if((i < last_csb) && (i > 0))
2551         {
2552             ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
2553 
2554             /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
2555             ctxt_idx += nbr_csbf ? 1 : 0;
2556             ctxt_idx += is_luma ? 0 : 2;
2557 
2558             ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
2559 
2560             s_backup_ctxt.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
2561 
2562             if(cur_csbf)
2563             {
2564                 ret |= ihevce_cabac_encode_bin(&s_sub_blk_not_coded_cabac_ctxt, 0, ctxt_idx);
2565                 // clang-format off
2566                 i4_sub_blk_not_coded_bits =
2567                     s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12;  // - i4_temp_bits;
2568                 s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
2569                 // clang-format on
2570             }
2571         }
2572         else
2573         {
2574             /* sanity check, this csb contains the last_sig_coeff */
2575             if(i == last_csb)
2576             {
2577                 ASSERT(cur_csbf == 1);
2578             }
2579         }
2580         /*If any block in the TU is coded and the 0th block is not coded, the 0th
2581           block is still signalled as csbf = 1, and with all sig_coeffs sent as
2582           0(HEVC requirement)*/
2583         if((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0))
2584         {
2585             i4_sub_blk_not_coded_bits = ihevce_code_all_sig_coeffs_as_0_explicitly(
2586                 (void *)ps_rdoq_ctxt,
2587                 i,
2588                 pu1_trans_table,
2589                 is_luma,
2590                 scan_type,
2591                 infer_coeff,
2592                 nbr_csbf,
2593                 &s_sub_blk_not_coded_cabac_ctxt);
2594         }
2595 
2596         if(i == last_csb)
2597         {
2598             WORD32 i4_last_x = *pu1_last_sig_coeff_x;
2599             WORD32 i4_last_y = *pu1_last_sig_coeff_y;
2600             if(SCAN_VERT == scan_type)
2601             {
2602                 /* last coeff x and y are swapped for vertical scan */
2603                 SWAP(i4_last_x, i4_last_y);
2604             }
2605             /* Encode the last_sig_coeff_x and last_sig_coeff_y */
2606             ret |= ihevce_cabac_encode_last_coeff_x_y(
2607                 ps_cabac, i4_last_x, i4_last_y, log2_tr_size, is_luma);
2608             s_backup_ctxt.au1_ctxt_to_backup[LASTXY] = 1;
2609         }
2610 
2611         if(cur_csbf)
2612         {
2613             /*****************************************************************/
2614             /* encode the sig coeff map as per section 7.3.13                */
2615             /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
2616             /*****************************************************************/
2617 
2618             WORD32 i4_bit_depth;
2619             WORD32 i4_shift_iq;
2620             WORD32 i4_dequant_val;
2621             WORD32 bit; /* temp boolean */
2622 
2623             UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
2624             WORD32 sig_coeff_map = u2_gt0_flags;
2625             WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
2626             WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
2627 
2628             WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
2629 
2630             WORD16 *pi2_dequant_coeff = ps_rdoq_ctxt->pi2_dequant_coeff;
2631             WORD16 i2_qp_rem = ps_rdoq_ctxt->i2_qp_rem;
2632             WORD32 i4_qp_div = ps_rdoq_ctxt->i4_qp_div;
2633 
2634             WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
2635             WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
2636 
2637             /* total count of coeffs to be coded as abs level remaining */
2638             WORD32 num_coeffs_remaining = 0;
2639 
2640             /* count of coeffs to be coded as  abslevel-1 */
2641             WORD32 num_coeffs_base1 = 0;
2642             WORD32 scan_pos;
2643             WORD32 first_gt1_coeff = 0;
2644 
2645             i4_bit_depth = ps_entropy_ctxt->ps_sps->i1_bit_depth_luma_minus8 + 8;
2646             i4_shift_iq = i4_bit_depth + ps_rdoq_ctxt->i4_log2_trans_size - 5;
2647 
2648             i4_sub_blk_is_coded = 1;
2649 
2650             if((i != 0) || (0 == last_csb))
2651             {
2652                 /* sanity check, atleast one coeff is coded as csbf is set */
2653                 ASSERT(sig_coeff_map != 0);
2654             }
2655             /*Calculating the distortions produced*/
2656             {
2657                 WORD32 k, j;
2658                 WORD16 *pi2_temp_coeff =
2659                     &pi2_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
2660                 WORD16 *pi2_temp_tr_coeff =
2661                     &pi2_tr_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
2662                 WORD16 *pi2_temp_dequant_coeff =
2663                     &pi2_dequant_coeff[scaled_blk_col + (scaled_blk_row * trans_size)];
2664 
2665                 for(k = 0; k < 4; k++)
2666                 {
2667                     for(j = 0; j < 4; j++)
2668                     {
2669                         if(*pi2_temp_coeff)
2670                         {
2671                             /*Inverse quantizing for distortion calculation*/
2672                             if(ps_rdoq_ctxt->i4_trans_size != 4)
2673                             {
2674                                 IQUANT(
2675                                     i4_dequant_val,
2676                                     *pi2_temp_coeff,
2677                                     *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
2678                                     i4_shift_iq,
2679                                     i4_qp_div);
2680                             }
2681                             else
2682                             {
2683                                 IQUANT_4x4(
2684                                     i4_dequant_val,
2685                                     *pi2_temp_coeff,
2686                                     *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
2687                                     i4_shift_iq,
2688                                     i4_qp_div);
2689                             }
2690 
2691                             i8_sub_blk_coded_dist +=
2692                                 CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, i4_dequant_val, 0, 0);
2693 
2694                             i8_sub_blk_not_coded_dist +=
2695                                 CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, 0, 0, 0);
2696                         }
2697 #if DISABLE_ZCSBF
2698                         if(abs(*pi2_temp_coeff) > 1)
2699                         {
2700                             i4_skip_zero_csbf = 1;
2701                         }
2702                         else if(abs(*pi2_temp_coeff) == 1)
2703                         {
2704                             i4_num_abs_1_coeffs++;
2705                         }
2706 #endif
2707                         pi2_temp_coeff++;
2708                         pi2_temp_tr_coeff++;
2709                         pi2_temp_dequant_coeff++;
2710                     }
2711                     pi2_temp_tr_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
2712                     pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
2713                     pi2_dequant_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
2714                 }
2715             }
2716 
2717 #if DISABLE_ZCSBF
2718             i4_skip_zero_csbf = i4_skip_zero_csbf || (i4_num_abs_1_coeffs > 3);
2719 #endif
2720             pu2_sig_coeff_buf += 3;
2721 
2722             scan_pos = 15;
2723             if(i == last_csb)
2724             {
2725                 /*************************************************************/
2726                 /* clear last_scan_pos for last block in scan order as this  */
2727                 /* is communicated  throught last_coeff_x and last_coeff_y   */
2728                 /*************************************************************/
2729                 WORD32 next_sig = CLZ(sig_coeff_map) + 1;
2730 
2731                 scan_pos = WORD_SIZE - next_sig;
2732 
2733                 /* prepare the bins for gt1 flags */
2734                 EXTRACT_BIT(bit, gt1_flags, scan_pos);
2735 
2736                 /* insert gt1 bin in lsb */
2737                 gt1_bins |= bit;
2738 
2739                 /* prepare the bins for sign flags */
2740                 EXTRACT_BIT(bit, sign_flags, scan_pos);
2741 
2742                 /* insert sign bin in lsb */
2743                 sign_bins |= bit;
2744 
2745                 sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
2746 
2747                 scan_pos--;
2748                 num_coded++;
2749             }
2750 
2751             /* encode the required sigcoeff flags (abslevel > 0)   */
2752             while(scan_pos >= 0)
2753             {
2754                 WORD32 y_pos_x_pos;
2755                 WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
2756 
2757                 WORD32 sig_coeff;
2758 
2759                 EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
2760 
2761                 /* derive the x,y pos */
2762                 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
2763 
2764                 /* derive the context inc as per section 9.3.3.1.4 */
2765                 if(2 == log2_tr_size)
2766                 {
2767                     /* 4x4 transform size increment uses lookup */
2768                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
2769                 }
2770                 else if(scan_pos || i)
2771                 {
2772                     /* ctxt for AC coeff depends on curpos and neigbour csbf */
2773                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
2774 
2775                     /* based on luma subblock pos */
2776                     sig_ctxinc += (i && is_luma) ? 3 : 0;
2777                 }
2778                 else
2779                 {
2780                     /* DC coeff has fixed context for luma and chroma */
2781                     sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
2782                                                   : IHEVC_CAB_COEFF_FLAG + 27;
2783                 }
2784 
2785                 /*************************************************************/
2786                 /* encode sig coeff only if required                         */
2787                 /* decoder infers 0,0 coeff when all the other coeffs are 0  */
2788                 /*************************************************************/
2789                 if(scan_pos || (!infer_coeff))
2790                 {
2791                     ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
2792                     //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
2793                     {
2794                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2795 
2796                         /* increment bits generated based on state and bin encoded */
2797                         ps_cabac->u4_bits_estimated_q12 +=
2798                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
2799 
2800                         /* update the context model from state transition LUT */
2801                         pu1_ctxt_model[ctxt_idx] =
2802                             gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
2803                     }
2804                 }
2805 
2806                 if(sig_coeff)
2807                 {
2808                     /* prepare the bins for gt1 flags */
2809                     EXTRACT_BIT(bit, gt1_flags, scan_pos);
2810 
2811                     /* shift and insert gt1 bin in lsb */
2812                     gt1_bins <<= 1;
2813                     gt1_bins |= bit;
2814 
2815                     /* prepare the bins for sign flags */
2816                     EXTRACT_BIT(bit, sign_flags, scan_pos);
2817 
2818                     /* shift and insert sign bin in lsb */
2819                     sign_bins <<= 1;
2820                     sign_bins |= bit;
2821 
2822                     num_coded++;
2823 
2824                     /* 0,0 coeff can no more be inferred :( */
2825                     infer_coeff = 0;
2826                 }
2827 
2828                 scan_pos--;
2829             }
2830 
2831             s_backup_ctxt.au1_ctxt_to_backup[SIG_COEFF] = 1;
2832 
2833             /****************************************************************/
2834             /* encode the abs level greater than 1 bins; Section 7.3.13     */
2835             /* These have already been prepared during sig_coeff_map encode */
2836             /* Context modelling done as per section 9.3.3.1.5              */
2837             /****************************************************************/
2838             {
2839                 WORD32 j;
2840 
2841                 /* context set based on luma subblock pos */
2842                 WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
2843 
2844                 /* count of coeffs with abslevel > 1; max of 8 to be coded */
2845                 WORD32 num_gt1_bins = MIN(8, num_coded);
2846 
2847                 if(num_coded > 8)
2848                 {
2849                     /* pull back the bins to required number */
2850                     gt1_bins >>= (num_coded - 8);
2851 
2852                     num_coeffs_remaining += (num_coded - 8);
2853                     num_coeffs_base1 = (num_coded - 8);
2854                 }
2855 
2856                 /* See section 9.3.3.1.5           */
2857                 ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
2858 
2859                 gt1_ctxt = 1;
2860 
2861                 for(j = num_gt1_bins - 1; j >= 0; j--)
2862                 {
2863                     /* Encodet the abs level gt1 bins */
2864                     ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
2865 
2866                     EXTRACT_BIT(bit, gt1_bins, j);
2867 
2868                     //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
2869                     {
2870                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2871 
2872                         /* increment bits generated based on state and bin encoded */
2873                         ps_cabac->u4_bits_estimated_q12 +=
2874                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
2875 
2876                         /* update the context model from state transition LUT */
2877                         pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
2878                     }
2879 
2880                     if(bit)
2881                     {
2882                         gt1_ctxt = 0;
2883                         num_coeffs_remaining++;
2884                     }
2885                     else if(gt1_ctxt && (gt1_ctxt < 3))
2886                     {
2887                         gt1_ctxt++;
2888                     }
2889                 }
2890                 s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_1] = 1;
2891                 /*************************************************************/
2892                 /* encode abs level greater than 2 bin; Section 7.3.13       */
2893                 /*************************************************************/
2894                 if(gt1_bins)
2895                 {
2896                     WORD32 gt2_bin;
2897 
2898                     first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
2899                     gt2_bin = (first_gt1_coeff > 2);
2900 
2901                     /* atleast one level > 2 */
2902                     ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
2903 
2904                     ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
2905 
2906                     //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
2907                     {
2908                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2909 
2910                         /* increment bits generated based on state and bin encoded */
2911                         ps_cabac->u4_bits_estimated_q12 +=
2912                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
2913 
2914                         /* update the context model from state transition LUT */
2915                         pu1_ctxt_model[ctxt_idx] =
2916                             gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
2917                     }
2918 
2919                     if(!gt2_bin)
2920                     {
2921                         /* sanity check */
2922                         ASSERT(first_gt1_coeff == 2);
2923 
2924                         /* no need to send this coeff as bypass bins */
2925                         pu2_sig_coeff_buf++;
2926                         num_coeffs_remaining--;
2927                     }
2928                     s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_2] = 1;
2929                 }
2930             }
2931 
2932             /*************************************************************/
2933             /* encode the coeff signs and abs remaing levels             */
2934             /*************************************************************/
2935             if(num_coded)
2936             {
2937                 WORD32 base_level;
2938                 WORD32 rice_param = 0;
2939                 WORD32 j;
2940 
2941                 /*************************************************************/
2942                 /* encode the coeff signs populated in sign_bins             */
2943                 /*************************************************************/
2944                 if(num_coded > 0)
2945                 {
2946                     ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
2947                 }
2948                 /*************************************************************/
2949                 /* encode the coeff_abs_level_remaining as TR / EGK bins     */
2950                 /* See section 9.3.2.7 for details                           */
2951                 /*************************************************************/
2952 
2953                 /* first remaining coeff baselevel */
2954                 if(first_gt1_coeff > 2)
2955                 {
2956                     base_level = 3;
2957                 }
2958                 else if(num_coeffs_remaining > num_coeffs_base1)
2959                 {
2960                     /* atleast one coeff in first 8 is gt > 1 */
2961                     base_level = 2;
2962                 }
2963                 else
2964                 {
2965                     /* all coeffs have base of 1 */
2966                     base_level = 1;
2967                 }
2968 
2969                 for(j = 0; j < num_coeffs_remaining; j++)
2970                 {
2971                     WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
2972                     WORD32 abs_coeff_rem;
2973                     WORD32 rice_max = (4 << rice_param);
2974 
2975                     pu2_sig_coeff_buf++;
2976 
2977                     /* sanity check */
2978                     ASSERT(abs_coeff >= base_level);
2979 
2980                     abs_coeff_rem = (abs_coeff - base_level);
2981 
2982                     /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
2983                     /* TODO://HM-8.0-dev does either TR or EGK but not both */
2984                     if(abs_coeff_rem >= rice_max)
2985                     {
2986                         UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
2987 
2988                         /* coeff exceeds max rice limit                    */
2989                         /* encode the TR prefix as tunary code             */
2990                         /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
2991                         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
2992 
2993                         /* encode the exponential golomb code suffix */
2994                         ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
2995                     }
2996                     else
2997                     {
2998                         /* code coeff as truncated rice code  */
2999                         ret |= ihevce_cabac_encode_trunc_rice(
3000                             ps_cabac, abs_coeff_rem, rice_param, rice_max);
3001                     }
3002 
3003                     /* update the rice param based on coeff level */
3004                     if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
3005                     {
3006                         rice_param++;
3007                     }
3008 
3009                     /* change base level to 1 if more than 8 coded coeffs */
3010                     if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
3011                     {
3012                         base_level = 2;
3013                     }
3014                     else
3015                     {
3016                         base_level = 1;
3017                     }
3018                 }
3019             }
3020 
3021             i4_sub_blk_coded_bits = ps_cabac->u4_bits_estimated_q12;
3022             /**********************************************************/
3023             /**********************************************************/
3024             /**********************************************************/
3025             /*Decide whether sub block should be coded or not*/
3026             /**********************************************************/
3027             /**********************************************************/
3028             /**********************************************************/
3029             i8_sub_blk_coded_metric = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
3030                                           i8_sub_blk_coded_dist, 0, i4_round_val, i4_shift_val) +
3031                                       COMPUTE_RATE_COST_CLIP30_RDOQ(
3032                                           i4_sub_blk_coded_bits,
3033                                           ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
3034                                           (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
3035             i8_sub_blk_not_coded_metric =
3036                 CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
3037                     i8_sub_blk_not_coded_dist, 0, i4_round_val, i4_shift_val) +
3038                 COMPUTE_RATE_COST_CLIP30_RDOQ(
3039                     i4_sub_blk_not_coded_bits,
3040                     ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
3041                     (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
3042 
3043 #if DISABLE_ZCSBF
3044             if(((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
3045                 (i4_sub_blk_is_coded == 0)) &&
3046                (i4_skip_zero_csbf == 0))
3047 #else
3048             if((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
3049                (i4_sub_blk_is_coded == 0))
3050 #endif
3051             {
3052 #if OPT_MEMCPY
3053                 ihevce_copy_backup_ctxt(
3054                     (void *)ps_cabac,
3055                     (void *)&s_sub_blk_not_coded_cabac_ctxt,
3056                     (void *)&s_backup_ctxt,
3057                     (void *)&s_backup_ctxt_sub_blk_not_coded);
3058 #else
3059                 memcpy(ps_cabac, &s_sub_blk_not_coded_cabac_ctxt, sizeof(cab_ctxt_t));
3060 #endif
3061                 scan_pos = 15;
3062                 i4_sub_blk_is_coded = 0;
3063 
3064                 {
3065                     WORD32 k, j;
3066                     WORD16 *pi2_temp_coeff =
3067                         &pi2_coeffs[scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_q_data_strd)];
3068                     WORD16 *pi2_temp_iquant_coeff =
3069                         &ps_rdoq_ctxt->pi2_iquant_coeffs
3070                              [scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_iq_data_strd)];
3071                     for(k = 0; k < 4; k++)
3072                     {
3073                         for(j = 0; j < 4; j++)
3074                         {
3075                             *pi2_temp_coeff = 0;
3076                             *pi2_temp_iquant_coeff = 0;
3077 
3078                             pi2_temp_coeff++;
3079                             pi2_temp_iquant_coeff++;
3080                         }
3081                         pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
3082                         pi2_temp_iquant_coeff += ps_rdoq_ctxt->i4_iq_data_strd - 4;
3083                     }
3084                 }
3085 
3086                 /* If the csb to be masked is the last csb, then we should
3087                  * signal last x and last y from the next coded sub_blk */
3088                 if(i == last_csb)
3089                 {
3090                     pu1_coeff_buf_hdr = (UWORD8 *)pu2_sig_coeff_buf;
3091 
3092                     ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
3093                     last_csb = ihevce_find_new_last_csb(
3094                         pi4_subBlock2csbfId_map,
3095                         i,
3096                         (void *)ps_rdoq_ctxt,
3097                         pu1_trans_table,
3098                         pu1_csb_table,
3099                         pi2_coeffs,
3100                         shift_value,
3101                         mask_value,
3102                         &pu1_coeff_buf_hdr);
3103                     /*We are in a for loop. This means that the decrement to i happens immediately right
3104                       at the end of the for loop. This would decrement the value of i to (last_csb - 1).
3105                       Hence we increment i by 1, so that after the decrement i becomes last_csb.*/
3106                     i = last_csb + 1;
3107                     pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
3108                     pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
3109                     scan_type = pu1_coeff_buf_hdr[2];
3110                     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + 4);
3111                 }
3112                 i8_tu_coded_dist += i8_sub_blk_not_coded_dist;
3113                 i4_tu_coded_bits += i4_sub_blk_not_coded_bits;
3114             }
3115             else
3116             {
3117                 ps_rdoq_ctxt->i1_tu_is_coded = 1;
3118                 temp_gt1_ctxt = gt1_ctxt;
3119 
3120                 i8_tu_coded_dist += i8_sub_blk_coded_dist;
3121                 i4_tu_coded_bits += i4_sub_blk_coded_bits;
3122             }
3123 #if DISABLE_ZCSBF
3124             i4_skip_zero_cbf = i4_skip_zero_cbf || i4_skip_zero_csbf;
3125 #endif
3126             /*Cumulating the distortion for the entire TU*/
3127             i8_tu_not_coded_dist += i8_sub_blk_not_coded_dist;
3128             //i4_tu_coded_dist                += i4_sub_blk_coded_dist;
3129             //i4_tu_coded_bits                += i4_sub_blk_coded_bits;
3130             i8_sub_blk_not_coded_dist = 0;
3131             i4_sub_blk_not_coded_bits = 0;
3132             i8_sub_blk_coded_dist = 0;
3133             i4_sub_blk_coded_bits = 0;
3134 
3135             if(i4_sub_blk_is_coded)
3136             {
3137                 ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 1;
3138                 temp_zero_col = (temp_zero_col) | (0xF << scaled_blk_col);
3139                 temp_zero_row = (temp_zero_row) | (0xF << scaled_blk_row);
3140             }
3141             else
3142             {
3143                 if(!((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0)))
3144                 {
3145                     ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
3146                 }
3147             }
3148         }
3149     }
3150 
3151     /*tap texture bits*/
3152     {
3153         ps_cabac->u4_texture_bits_estimated_q12 +=
3154             (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
3155     }
3156 
3157     i8_tu_not_coded_dist =
3158         CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(i8_tu_not_coded_dist, 0, i4_round_val, i4_shift_val);
3159 
3160     /* i4_tu_coded_dist = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
3161         i4_tu_coded_dist, 0, i4_round_val, i4_shift_val); */
3162     *pi8_tu_coded_dist = i8_tu_coded_dist;
3163     *pi8_tu_not_coded_dist = i8_tu_not_coded_dist;
3164 #if DISABLE_ZCSBF
3165     if(i4_skip_zero_cbf == 1)
3166     {
3167         *pi8_tu_not_coded_dist = 0x7FFFFFFF;
3168     }
3169 #endif
3170 
3171     *ps_rdoq_ctxt->pi4_zero_col = ~temp_zero_col;
3172     *ps_rdoq_ctxt->pi4_zero_row = ~temp_zero_row;
3173 
3174     return (ret);
3175 }
3176 
3177 /**
3178 ******************************************************************************
3179 *
3180 *  @brief Codes all the sig coeffs as 0
3181 *
3182 *  @param[in]   i
3183 *  Index of the current csb
3184 *
3185 *  @param[in]   pu1_trans_table
3186 *  Pointer to the trans table
3187 *
3188 *  @param[in]  scan_type
3189 *  Determines the scan order
3190 *
3191 *  @param[in]  infer_coeff
3192 *  Indicates whether the 0,0 coeff can be inferred or not
3193 *
3194 *  @param[in]   nbr_csbf
3195 *  Talks about if the neighboour csbs(right and bottom) are coded or not
3196 *
3197 *  @param[in]    ps_cabac
3198 *  Cabac state
3199 *
3200 *  @param[out]    pi4_tu_not_coded_dist
3201 *  The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
3202 *
3203 *  @return    The number of bits generated when the 0th sub blk is coded as all 0s
3204 *             This is the cumulate bits(i.e. for all blocks in the TU), and not only
3205 *             the bits generated for this block
3206 *
3207 ******************************************************************************
3208 */
ihevce_code_all_sig_coeffs_as_0_explicitly(void * pv_rdoq_ctxt,WORD32 i,UWORD8 * pu1_trans_table,WORD32 is_luma,WORD32 scan_type,WORD32 infer_coeff,WORD32 nbr_csbf,cab_ctxt_t * ps_cabac)3209 WORD32 ihevce_code_all_sig_coeffs_as_0_explicitly(
3210     void *pv_rdoq_ctxt,
3211     WORD32 i,
3212     UWORD8 *pu1_trans_table,
3213     WORD32 is_luma,
3214     WORD32 scan_type,
3215     WORD32 infer_coeff,
3216     WORD32 nbr_csbf,
3217     cab_ctxt_t *ps_cabac)
3218 {
3219     WORD32 sig_coeff_base_ctxt;
3220     WORD32 scan_pos = 15;
3221     WORD32 ctxt_idx;
3222     WORD32 ret = 0;
3223 
3224     rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
3225 
3226     WORD32 log2_tr_size = ps_rdoq_ctxt->i4_log2_trans_size;
3227 
3228     (void)pu1_trans_table;
3229     if(is_luma)
3230     {
3231         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
3232         if(3 == log2_tr_size)
3233         {
3234             /* 8x8 transform size */
3235             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
3236         }
3237         else if(3 < log2_tr_size)
3238         {
3239             /* larger transform sizes */
3240             sig_coeff_base_ctxt += 21;
3241         }
3242     }
3243     else
3244     {
3245         /* chroma context initializations */
3246         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
3247 
3248         if(3 == log2_tr_size)
3249         {
3250             /* 8x8 transform size */
3251             sig_coeff_base_ctxt += 9;
3252         }
3253         else if(3 < log2_tr_size)
3254         {
3255             /* larger transform sizes */
3256             sig_coeff_base_ctxt += 12;
3257         }
3258     }
3259     while(scan_pos >= 0)
3260     {
3261         WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
3262         WORD32 sig_coeff = 0;
3263         /* derive the x,y pos */
3264         WORD32 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
3265 
3266         /* derive the context inc as per section 9.3.3.1.4 */
3267         if(2 == log2_tr_size)
3268         {
3269             /* 4x4 transform size increment uses lookup */
3270             sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
3271         }
3272         else if(scan_pos || i)
3273         {
3274             /* ctxt for AC coeff depends on curpos and neigbour csbf */
3275             sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
3276 
3277             /* based on luma subblock pos */
3278             sig_ctxinc += (i && is_luma) ? 3 : 0;
3279         }
3280         else
3281         {
3282             /* DC coeff has fixed context for luma and chroma */
3283             sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
3284         }
3285 
3286         if(scan_pos || (!infer_coeff))
3287         {
3288             ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
3289             ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
3290             AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
3291         }
3292         scan_pos--;
3293     }
3294     return (ps_cabac->u4_bits_estimated_q12);  // - i4_temp_bits);
3295 }
3296 
3297 /**
3298 ******************************************************************************
3299 *
3300 *  @brief Finds the next csb with a non-zero coeff
3301 *
3302 *  @paramp[in]  cur_last_csb_pos
3303 *  The index of the current csb with a non-zero coeff
3304 *
3305 *  @param[inout]   pv_rdoq_ctxt
3306 *  RODQ context structure
3307 *
3308 *  @param[in]   pu1_trans_table
3309 *  Pointer to the trans table
3310 *
3311 *  @param[in]   pi2_coeffs
3312 *  Pointer to all the quantized coefficients
3313 *
3314 *  @param[in]  shift_value
3315 *  Determines the shifting value for determining appropriate position of coeff
3316 *
3317 *  @param[in]  mask_value
3318 *  Determines the masking value for determining appropriate position of coeff
3319 *
3320 *  @param[in]   nbr_csbf
3321 *  Talks about if the neighboour csbs(right and bottom) are coded or not
3322 *
3323 *  @param[in]    ps_cabac
3324 *  Cabac state
3325 *
3326 *  @param[inout] ppu1_addr
3327 *  Pointer to the header(i.e. pointer used for traversing the ecd data generated
3328 *  in ihevce_scan_coeffs)
3329 *
3330 *  @return    The index of the csb with the next non-zero coeff
3331 *
3332 ******************************************************************************
3333 */
ihevce_find_new_last_csb(WORD32 * pi4_subBlock2csbfId_map,WORD32 cur_last_csb_pos,void * pv_rdoq_ctxt,UWORD8 * pu1_trans_table,UWORD8 * pu1_csb_table,WORD16 * pi2_coeffs,WORD32 shift_value,WORD32 mask_value,UWORD8 ** ppu1_addr)3334 WORD32 ihevce_find_new_last_csb(
3335     WORD32 *pi4_subBlock2csbfId_map,
3336     WORD32 cur_last_csb_pos,
3337     void *pv_rdoq_ctxt,
3338     UWORD8 *pu1_trans_table,
3339     UWORD8 *pu1_csb_table,
3340     WORD16 *pi2_coeffs,
3341     WORD32 shift_value,
3342     WORD32 mask_value,
3343     UWORD8 **ppu1_addr)
3344 {
3345     WORD32 blk_row;
3346     WORD32 blk_col;
3347     WORD32 x_pos;
3348     WORD32 y_pos;
3349     WORD32 i;
3350     WORD32 j;
3351     UWORD16 *pu2_out_data_coeff;
3352     rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
3353     WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
3354     UWORD8 *pu1_out_data_header = *ppu1_addr;
3355 
3356     for(i = cur_last_csb_pos - 1; i >= 0; i--)
3357     {
3358         /* check for the first csb flag in our scan order */
3359         if(ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]])
3360         {
3361             UWORD8 u1_last_x, u1_last_y;
3362             WORD32 quant_coeff;
3363 
3364             pu1_out_data_header -= 4;  //To move the pointer back to the appropriate position
3365             /* row of csb */
3366             blk_row = pu1_trans_table[i] >> shift_value;
3367             /* col of csb */
3368             blk_col = pu1_trans_table[i] & mask_value;
3369 
3370             /*check for the 1st non-0 values inside the csb in our scan order*/
3371             for(j = 15; j >= 0; j--)
3372             {
3373                 x_pos = (pu1_csb_table[j] & 0x3) + blk_col * 4;
3374                 y_pos = (pu1_csb_table[j] >> 2) + blk_row * 4;
3375 
3376                 quant_coeff = pi2_coeffs[x_pos + (y_pos * trans_size)];
3377 
3378                 if(quant_coeff != 0)
3379                     break;
3380             }
3381 
3382             ASSERT(j >= 0);
3383 
3384             u1_last_x = x_pos;
3385             u1_last_y = y_pos;
3386 
3387             /* storing last_x and last_y */
3388             *(pu1_out_data_header) = u1_last_x;
3389             *(pu1_out_data_header + 1) = u1_last_y;
3390 
3391             /* storing the scan order */
3392             *(pu1_out_data_header + 2) = ps_rdoq_ctxt->i4_scan_idx;
3393 
3394             /* storing last_sub_block pos. in scan order count */
3395             *(pu1_out_data_header + 3) = i;
3396 
3397             /*stored the first 4 bytes, now all are word16. So word16 pointer*/
3398             pu2_out_data_coeff = (UWORD16 *)(pu1_out_data_header + 4);
3399 
3400             *pu2_out_data_coeff = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
3401             *ppu1_addr = pu1_out_data_header;
3402 
3403             break; /*We just need this loop for finding 1st non-zero csb only*/
3404         }
3405         else
3406             pu1_out_data_header += 2;
3407     }
3408     return i;
3409 }
3410 
3411 /**
3412 ******************************************************************************
3413 *
3414 *  @brief Used to optimize the memcpy of cabac states. It copies only those
3415 *  states in the cabac context which have been altered.
3416 *
3417 *  @paramp[inout]  pv_dest
3418 *  Pointer to desitination cabac state.
3419 *
3420 *  @param[inout]   pv_backup_ctxt_dest
3421 *  Pointer to destination backup context
3422 *
3423 *  @param[inout]   pv_backup_ctxt_src
3424 *  Pointer to source backup context
3425 *
3426 *  @Desc:
3427 *  We go through each element in the backup_ctxt structure which will tell us
3428 *  if the states corresponding to lastxlasty, sigcoeffs, grtr_than_1_bins,
3429 *  grtr_than_2_bins and sub_blk_coded_flag(i.e. 0xBAD0) context elements
3430 *  have been altered. If they have been altered, we will memcpy the states
3431 *  corresponding to these context elements alone
3432 *
3433 *  @return  Nothing
3434 *
3435 ******************************************************************************
3436 */
ihevce_copy_backup_ctxt(void * pv_dest,void * pv_src,void * pv_backup_ctxt_dest,void * pv_backup_ctxt_src)3437 void ihevce_copy_backup_ctxt(
3438     void *pv_dest, void *pv_src, void *pv_backup_ctxt_dest, void *pv_backup_ctxt_src)
3439 {
3440     UWORD8 *pu1_dest = (UWORD8 *)(((cab_ctxt_t *)pv_dest)->au1_ctxt_models);
3441     UWORD8 *pu1_src = (UWORD8 *)(((cab_ctxt_t *)pv_src)->au1_ctxt_models);
3442     backup_ctxt_t *ps_backup_dest_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_dest);
3443     backup_ctxt_t *ps_backup_src_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_src);
3444     WORD32 i4_i;
3445 
3446     /*
3447     0       IHEVC_CAB_COEFFX_PREFIX         lastx last y has been coded
3448     1       IHEVC_CAB_CODED_SUBLK_IDX       sub-blk coded or not flag has been coded
3449     2       IHEVC_CAB_COEFF_FLAG            sigcoeff has been coded
3450     3       IHEVC_CAB_COEFABS_GRTR1_FLAG    greater than 1 bin has been coded
3451     4       IHEVC_CAB_COEFABS_GRTR2_FLAG    greater than 2 bin has been coded*/
3452     assert(MAX_NUM_CONTEXT_ELEMENTS == 5);
3453     for(i4_i = 0; i4_i < MAX_NUM_CONTEXT_ELEMENTS; i4_i++)
3454     {
3455         if((ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF]) ||
3456            (ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF]))
3457         {
3458             memcpy(&pu1_dest[IHEVC_CAB_COEFF_FLAG], &pu1_src[IHEVC_CAB_COEFF_FLAG], 42);
3459             ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
3460             ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
3461         }
3462         if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]) ||
3463            (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]))
3464         {
3465             memcpy(
3466                 &pu1_dest[IHEVC_CAB_COEFABS_GRTR1_FLAG],
3467                 &pu1_src[IHEVC_CAB_COEFABS_GRTR1_FLAG],
3468                 24);
3469             ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
3470             ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
3471         }
3472         if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]) ||
3473            (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]))
3474         {
3475             memcpy(
3476                 &pu1_dest[IHEVC_CAB_COEFABS_GRTR2_FLAG], &pu1_src[IHEVC_CAB_COEFABS_GRTR2_FLAG], 6);
3477             ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
3478             ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
3479         }
3480         if((ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]) ||
3481            (ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]))
3482         {
3483             memcpy(&pu1_dest[IHEVC_CAB_CODED_SUBLK_IDX], &pu1_src[IHEVC_CAB_CODED_SUBLK_IDX], 4);
3484             ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
3485             ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
3486         }
3487         if((ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY]) ||
3488            (ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY]))
3489         {
3490             memcpy(&pu1_dest[IHEVC_CAB_COEFFX_PREFIX], &pu1_src[IHEVC_CAB_COEFFX_PREFIX], 36);
3491             ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
3492             ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
3493         }
3494     }
3495     ((cab_ctxt_t *)pv_dest)->u4_bits_estimated_q12 = ((cab_ctxt_t *)pv_src)->u4_bits_estimated_q12;
3496 }
3497