1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22  *******************************************************************************
23  * @file
24  *  ih264e_core_coding.c
25  *
26  * @brief
27  *  This file contains routines that perform luma and chroma core coding for
28  *  intra macroblocks
29  *
30  * @author
31  *  ittiam
32  *
33  * @par List of Functions:
34  *  - ih264e_pack_l_mb_i16()
35  *  - ih264e_pack_c_mb_i8()
36  *  - ih264e_code_luma_intra_macroblock_16x16()
37  *  - ih264e_code_luma_intra_macroblock_4x4()
38  *  - ih264e_code_chroma_intra_macroblock_8x8()
39  *
40  * @remarks
41  *  None
42  *
43  *******************************************************************************
44  */
45 
46 /*****************************************************************************/
47 /* File Includes                                                             */
48 /*****************************************************************************/
49 
50 /* System include files */
51 #include <stdio.h>
52 #include <string.h>
53 #include <assert.h>
54 
55 /* User include files */
56 #include "ih264e_config.h"
57 #include "ih264_typedefs.h"
58 #include "ih264_platform_macros.h"
59 #include "iv2.h"
60 #include "ive2.h"
61 #include "ih264_macros.h"
62 #include "ih264_defs.h"
63 #include "ih264e_defs.h"
64 #include "ih264_trans_data.h"
65 #include "ih264e_error.h"
66 #include "ih264e_bitstream.h"
67 #include "ime_distortion_metrics.h"
68 #include "ime_defs.h"
69 #include "ime_structs.h"
70 #include "ih264_structs.h"
71 #include "ih264_trans_quant_itrans_iquant.h"
72 #include "ih264_inter_pred_filters.h"
73 #include "ih264_mem_fns.h"
74 #include "ih264_padding.h"
75 #include "ih264_intra_pred_filters.h"
76 #include "ih264_deblk_edge_filters.h"
77 #include "ih264_cabac_tables.h"
78 #include "irc_cntrl_param.h"
79 #include "irc_frame_info_collector.h"
80 #include "ih264e_rate_control.h"
81 #include "ih264e_cabac_structs.h"
82 #include "ih264e_structs.h"
83 #include "ih264e_globals.h"
84 #include "ih264e_core_coding.h"
85 #include "ih264e_mc.h"
86 
87 
88 /*****************************************************************************/
89 /* Function Definitions                                                      */
90 /*****************************************************************************/
91 
92 /**
93 *******************************************************************************
94 *
95 * @brief
96 *  This function performs does the DCT transform then Hadamard transform
97 *  and quantization for a macroblock when the mb mode is intra 16x16 mode
98 *
99 * @par Description:
100 *  First  cf4 is done on all 16 4x4 blocks of the 16x16 input block.
101 *  Then hadamard transform is done on the DC coefficients
102 *  Quantization is then performed on the 16x16 block, 4x4 wise
103 *
104 * @param[in] pu1_src
105 *  Pointer to source sub-block
106 *
107 * @param[in] pu1_pred
108 *  Pointer to prediction sub-block
109 *
110 * @param[in] pi2_out
111 *  Pointer to residual sub-block
112 *  The output will be in linear format
113 *  The first 16 continuous locations will contain the values of Dc block
114 *  After DC block and a stride 1st AC block will follow
115 *  After one more stride next AC block will follow
116 *  The blocks will be in raster scan order
117 *
118 * @param[in] src_strd
119 *  Source stride
120 *
121 * @param[in] pred_strd
122 *  Prediction stride
123 *
124 * @param[in] dst_strd
125 *  Destination stride
126 *
127 * @param[in] pu2_scale_matrix
128 *  The quantization matrix for 4x4 transform
129 *
130 * @param[in] pu2_threshold_matrix
131 *  Threshold matrix
132 *
133 * @param[in] u4_qbits
134 *  15+QP/6
135 *
136 * @param[in] u4_round_factor
137 *  Round factor for quant
138 *
139 * @param[out] pu1_nnz
140 *  Memory to store the non-zeros after transform
141 *  The first byte will be the nnz of DC block
142 *  From the next byte the AC nnzs will be stored in raster scan order
143 *
144 * @param u4_dc_flag
145 *  Signals if Dc transform is to be done or not
146 *   1 -> Dc transform will be done
147 *   0 -> Dc transform will not be done
148 *
149 * @remarks
150 *
151 *******************************************************************************
152 */
ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t * ps_codec,UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD16 * pi2_out,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD8 * pu1_nnz,UWORD32 u4_dc_flag)153 void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
154                                                 UWORD8 *pu1_src,
155                                                 UWORD8 *pu1_pred,
156                                                 WORD16 *pi2_out,
157                                                 WORD32 src_strd,
158                                                 WORD32 pred_strd,
159                                                 WORD32 dst_strd,
160                                                 const UWORD16 *pu2_scale_matrix,
161                                                 const UWORD16 *pu2_threshold_matrix,
162                                                 UWORD32 u4_qbits,
163                                                 UWORD32 u4_round_factor,
164                                                 UWORD8 *pu1_nnz,
165                                                 UWORD32 u4_dc_flag)
166 
167 {
168     WORD32 blk_cntr;
169     WORD32 i4_offsetx, i4_offsety;
170     UWORD8 *pu1_curr_src, *pu1_curr_pred;
171 
172     WORD16 *pi2_dc_str = pi2_out;
173 
174     /* Move to the ac addresses */
175     pu1_nnz++;
176     pi2_out += dst_strd;
177 
178     for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
179     {
180         IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
181 
182         pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
183         pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
184 
185         ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
186                                           pi2_out + blk_cntr * dst_strd,
187                                           src_strd, pred_strd, pu2_scale_matrix,
188                                           pu2_threshold_matrix, u4_qbits,
189                                           u4_round_factor, &pu1_nnz[blk_cntr],
190                                           &pi2_dc_str[blk_cntr]);
191 
192     }
193 
194     if (!u4_dc_flag)
195         return;
196 
197     /*
198      * In case of i16x16, we need to remove the contribution of dc coeffs into
199      * nnz of each block. We are doing that in the packing function
200      */
201 
202     /* Adjust pointers to point to dc values */
203     pi2_out -= dst_strd;
204     pu1_nnz--;
205 
206     u4_qbits++;
207     u4_round_factor <<= 1;
208 
209     ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
210                                     pu2_threshold_matrix, u4_qbits,
211                                     u4_round_factor, &pu1_nnz[0]);
212 }
213 
214 /**
215 *******************************************************************************
216 *
217 * @brief
218 *  This function performs the intra 16x16 inverse transform process for H264
219 *  it includes inverse Dc transform, inverse quant and then inverse transform
220 *
221 * @par Description:
222 *
223 * @param[in] pi2_src
224 *  Input data, 16x16 size
225 *  First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
226 *  after a stride 1st AC clock will be present again in raster can order
227 *  Then each AC block of the 16x16 block will follow in raster scan order
228 *
229 * @param[in] pu1_pred
230 *  The predicted data, 16x16 size
231 *  Block by block form
232 *
233 * @param[in] pu1_out
234 *  Output 16x16
235 *  In block by block form
236 *
237 * @param[in] src_strd
238 *  Source stride
239 *
240 * @param[in] pred_strd
241 *  input stride for prediction buffer
242 *
243 * @param[in] out_strd
244 *  input stride for output buffer
245 *
246 * @param[in] pu2_iscale_mat
247 *  Inverse quantization matrix for 4x4 transform
248 *
249 * @param[in] pu2_weigh_mat
250 *  weight matrix of 4x4 transform
251 *
252 * @param[in] qp_div
253 *  QP/6
254 *
255 * @param[in] pi4_tmp
256 *  Input temporary buffer
257 *  needs to be at least 20 in size
258 *
259 * @param[in] pu4_cntrl
260 *  Controls the transform path
261 *  total Last 17 bits are used
262 *  the 16th th bit will correspond to DC block
263 *  and 32-17 will correspond to the ac blocks in raster scan order
264 *  bit equaling zero indicates that the entire 4x4 block is zero for DC
265 *  For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
266 *
267 * @param[in] pi4_tmp
268 *  Input temporary buffer
269 *  needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
270 *
271 * @returns
272 *  none
273 *
274 * @remarks
275 *  The all zero case must be taken care outside
276 *
277 *******************************************************************************
278 */
ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t * ps_codec,WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,UWORD32 u4_cntrl,UWORD32 u4_dc_trans_flag,WORD32 * pi4_tmp)279 void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
280                                                     WORD16 *pi2_src,
281                                                     UWORD8 *pu1_pred,
282                                                     UWORD8 *pu1_out,
283                                                     WORD32 src_strd,
284                                                     WORD32 pred_strd,
285                                                     WORD32 out_strd,
286                                                     const UWORD16 *pu2_iscale_mat,
287                                                     const UWORD16 *pu2_weigh_mat,
288                                                     UWORD32 qp_div,
289                                                     UWORD32 u4_cntrl,
290                                                     UWORD32 u4_dc_trans_flag,
291                                                     WORD32 *pi4_tmp)
292 {
293     /* Start index for inverse quant in a 4x4 block */
294     WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
295 
296     /* Cntrl bits for 4x4 transforms
297      * u4_blk_cntrl       : controls if a 4x4 block should be processed in ac path
298      * u4_dc_cntrl        : controls is a 4x4 block is to be processed in dc path
299      *                    : dc block must contain only single dc coefficient
300      * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
301      *                    : ie not (ac or dc)
302      */
303     UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
304 
305     /* tmp registers for block ids */
306     UWORD32 u4_blk_id;
307 
308     /* Subscrripts */
309     WORD32 i4_offset_x, i4_offset_y;
310 
311     UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk;
312 
313     /* Src and stride for dc coeffs */
314     UWORD32 u4_dc_inc;
315     WORD16 *pi2_dc_src;
316 
317     /*
318      * For intra blocks we need to do inverse dc transform
319      * In case if intra blocks, its here that we populate the dc bits in cntrl
320      * as they cannot be populated any earlier
321      */
322     if (u4_dc_trans_flag)
323     {
324         UWORD32 cntr, u4_dc_cntrl;
325         /* Do inv hadamard and place the results at the start of each AC block */
326         ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
327                                            pu2_weigh_mat, qp_div, pi4_tmp);
328 
329         /* Update the cntrl flag */
330         u4_dc_cntrl = 0;
331         for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
332         {
333             u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
334         }
335         /* Mark dc bits as 1 if corresponding ac bit is 0 */
336         u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
337         /* Combine both ac and dc bits */
338         u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
339                         | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
340     }
341 
342     /* Source for dc coeffs
343      * If the block is intra, we have to read dc values from first row of src
344      * then stride for each block is 1, other wise its src stride
345      */
346     pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
347     u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
348 
349     /* The AC blocks starts from 2nd row */
350     pi2_src += src_strd;
351 
352     /* Get the block bits */
353     u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
354     u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
355     u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
356 
357     /* Get first block to process */
358     DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
359     while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
360     {
361         /* Compute address of src blocks */
362         WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
363 
364         IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
365 
366         /* Compute address of out and pred blocks */
367         pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
368         pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
369 
370         /* Do inv dc transform */
371         ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
372                                                 pu1_cur_prd_blk,
373                                                 pu1_cur_out_blk, pred_strd,
374                                                 out_strd, pu2_iscale_mat,
375                                                 pu2_weigh_mat, qp_div, NULL,
376                                                 iq_start_idx,
377                                                 pi2_dc_src + i4_src_offset);
378         /* Get next DC block to process */
379         DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
380     }
381 
382     /* now process ac/mixed blocks */
383     DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
384     while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
385     {
386 
387         WORD32 i4_src_offset = src_strd * u4_blk_id;
388 
389         IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
390 
391         pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
392         pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
393 
394         ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
395                                              pu1_cur_prd_blk, pu1_cur_out_blk,
396                                              pred_strd, out_strd,
397                                              pu2_iscale_mat, pu2_weigh_mat,
398                                              qp_div, (WORD16*) pi4_tmp,
399                                              iq_start_idx,
400                                              pi2_dc_src + u4_blk_id);
401 
402         DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
403     }
404 
405     /* Now process empty blocks */
406     DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
407     while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
408     {
409         IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
410 
411         pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
412         pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
413 
414         ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
415                                           pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
416                                           SIZE_4X4_BLK_VERT, 0, 0);
417 
418         DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
419     }
420 }
421 
422 /**
423 *******************************************************************************
424 *
425 * @brief
426 *  This function performs does the DCT transform then Hadamard transform
427 *  and quantization for a chroma macroblock
428 *
429 * @par Description:
430 *  First  cf4 is done on all 16 4x4 blocks of the 8x8input block
431 *  Then hadamard transform is done on the DC coefficients
432 *  Quantization is then performed on the 8x8 block, 4x4 wise
433 *
434 * @param[in] pu1_src
435 *  Pointer to source sub-block
436 *  The input is in interleaved format for two chroma planes
437 *
438 * @param[in] pu1_pred
439 *  Pointer to prediction sub-block
440 *  Prediction is in inter leaved format
441 *
442 * @param[in] pi2_out
443 *  Pointer to residual sub-block
444 *  The output will be in linear format
445 *  The first 4 continuous locations will contain the values of DC block for U
446 *  and then next 4 will contain for V.
447 *  After DC block and a stride 1st AC block of U plane will follow
448 *  After one more stride next AC block of V plane will follow
449 *  The blocks will be in raster scan order
450 *
451 *  After all the AC blocks of U plane AC blocks of V plane will follow in exact
452 *  same way
453 *
454 * @param[in] src_strd
455 *  Source stride
456 *
457 * @param[in] pred_strd
458 *  Prediction stride
459 *
460 * @param[in] dst_strd
461 *  Destination stride
462 *
463 * @param[in] pu2_scale_matrix
464 *  The quantization matrix for 4x4 transform
465 *
466 * @param[in] pu2_threshold_matrix
467 *  Threshold matrix
468 *
469 * @param[in] u4_qbits
470 *  15+QP/6
471 *
472 * @param[in] u4_round_factor
473 *  Round factor for quant
474 *
475 * @param[out] pu1_nnz
476 *  Memory to store the non-zeros after transform
477 *  The first byte will be the nnz od DC block for U plane
478 *  From the next byte the AC nnzs will be storerd in raster scan order
479 *  The fifth byte will be nnz of Dc block of V plane
480 *  Then Ac blocks will follow
481 *
482 * @param u4_dc_flag
483 *  Signals if Dc transform is to be done or not
484 *   1 -> Dc transform will be done
485 *   0 -> Dc transform will not be done
486 *
487 * @remarks
488 *
489 *******************************************************************************
490 */
ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t * ps_codec,UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD16 * pi2_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD8 * pu1_nnz_c)491 void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
492                                                 UWORD8 *pu1_src,
493                                                 UWORD8 *pu1_pred,
494                                                 WORD16 *pi2_out,
495                                                 WORD32 src_strd,
496                                                 WORD32 pred_strd,
497                                                 WORD32 out_strd,
498                                                 const UWORD16 *pu2_scale_matrix,
499                                                 const UWORD16 *pu2_threshold_matrix,
500                                                 UWORD32 u4_qbits,
501                                                 UWORD32 u4_round_factor,
502                                                 UWORD8 *pu1_nnz_c)
503 {
504     WORD32 blk_cntr;
505     WORD32 i4_offsetx, i4_offsety;
506     UWORD8 *pu1_curr_src, *pu1_curr_pred;
507 
508     WORD16 pi2_dc_str[8];
509     UWORD8 au1_dcnnz[2];
510 
511     /* Move to the ac addresses */
512     pu1_nnz_c++;
513     pi2_out += out_strd;
514 
515     for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
516     {
517         IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
518 
519         pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
520         pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
521 
522         /* For chroma, v plane nnz is populated from position 5 */
523         ps_codec->pf_resi_trans_quant_chroma_4x4(
524                         pu1_curr_src, pu1_curr_pred,
525                         pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
526                         pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
527                         u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
528                         &pi2_dc_str[blk_cntr]);
529     }
530 
531     /* Adjust pointers to point to dc values */
532     pi2_out -= out_strd;
533     pu1_nnz_c--;
534 
535     u4_qbits++;
536     u4_round_factor <<= 1;
537 
538     ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
539                                        pu2_threshold_matrix, u4_qbits,
540                                        u4_round_factor, au1_dcnnz);
541 
542     /* Copy the dc nnzs */
543     pu1_nnz_c[0] = au1_dcnnz[0];
544     pu1_nnz_c[5] = au1_dcnnz[1];
545 
546 }
547 
548 /**
549 *******************************************************************************
550 * @brief
551 *  This function performs the inverse transform with process for chroma MB of H264
552 *
553 * @par Description:
554 *  Does inverse DC transform ,inverse quantization inverse transform
555 *
556 * @param[in] pi2_src
557 *  Input data, 16x16 size
558 *  The input is in the form of, first 4 locations will contain DC coeffs of
559 *  U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
560 *  in raster scan order will follow, each block as linear array in raster scan order.
561 *  After a stride next AC block will follow. After all AC blocks of U plane
562 *  V plane AC blocks will follow in exact same order.
563 *
564 * @param[in] pu1_pred
565 *  The predicted data, 8x16 size, U and V interleaved
566 *
567 * @param[in] pu1_out
568 *  Output 8x16, U and V interleaved
569 *
570 * @param[in] src_strd
571 *  Source stride
572 *
573 * @param[in] pred_strd
574 *  input stride for prediction buffer
575 *
576 * @param[in] out_strd
577 *  input stride for output buffer
578 *
579 * @param[in] pu2_iscale_mat
580 *  Inverse quantization martix for 4x4 transform
581 *
582 * @param[in] pu2_weigh_mat
583 *  weight matrix of 4x4 transform
584 *
585 * @param[in] qp_div
586 *  QP/6
587 *
588 * @param[in] pi4_tmp
589 *  Input temporary buffer
590 *  needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
591 *  in size
592 *
593 * @param[in] pu4_cntrl
594 *  Controls the transform path
595 *  the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
596 *  32-28 bits will indicate AC blocks of U plane in raster scan order
597 *  27-23 bits will indicate AC blocks of V plane in rater scan order
598 *  The bit 1 implies that there is at least one non zero coeff in a block
599 *
600 * @returns
601 *  none
602 *
603 * @remarks
604 *******************************************************************************
605 */
ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t * ps_codec,WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,UWORD32 u4_cntrl,WORD32 * pi4_tmp)606 void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
607                                                     WORD16 *pi2_src,
608                                                     UWORD8 *pu1_pred,
609                                                     UWORD8 *pu1_out,
610                                                     WORD32 src_strd,
611                                                     WORD32 pred_strd,
612                                                     WORD32 out_strd,
613                                                     const UWORD16 *pu2_iscale_mat,
614                                                     const UWORD16 *pu2_weigh_mat,
615                                                     UWORD32 qp_div,
616                                                     UWORD32 u4_cntrl,
617                                                     WORD32 *pi4_tmp)
618 {
619     /* Cntrl bits for 4x4 transforms
620      * u4_blk_cntrl       : controls if a 4x4 block should be processed in ac path
621      * u4_dc_cntrl        : controls is a 4x4 block is to be processed in dc path
622      *                    : dc block must contain only single dc coefficient
623      * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
624      *                    : ie not (ac or dc)
625      */
626 
627     UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
628 
629     /* tmp registers for block ids */
630     WORD32 u4_blk_id;
631 
632     /* Offsets for pointers */
633     WORD32 i4_offset_x, i4_offset_y;
634 
635     /* Pointer to 4x4 blocks */
636     UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk;
637 
638     /* Tmp register for pointer to dc coffs */
639     WORD16 *pi2_dc_src;
640 
641     WORD16 i2_zero = 0;
642 
643     /* Increment for dc block */
644     WORD32 i4_dc_inc;
645 
646     /*
647      * Lets do the inverse transform for dc coeffs in chroma
648      */
649     if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
650     {
651         UWORD32 cntr, u4_dc_cntrl;
652         /* Do inv hadamard for u an v block */
653 
654         ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
655                                               pu2_weigh_mat, qp_div, NULL);
656         /*
657          * Update the cntrl flag
658          * Flag is updated as follows bits 15-11 -> u block dc bits
659          */
660         u4_dc_cntrl = 0;
661         for (cntr = 0; cntr < 8; cntr++)
662         {
663             u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
664         }
665 
666         /* Mark dc bits as 1 if corresponding ac bit is 0 */
667         u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
668         /* Combine both ac and dc bits */
669         u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
670                         | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
671 
672         /* Since we populated the dc coffs, we have to read them from there */
673         pi2_dc_src = pi2_src;
674         i4_dc_inc = 1;
675     }
676     else
677     {
678         u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
679         pi2_dc_src = &i2_zero;
680         i4_dc_inc = 0;
681     }
682 
683     /* Get the block bits */
684     u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
685     u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
686     u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
687 
688     /* The AC blocks starts from 2nd row */
689     pi2_src += src_strd;
690 
691     DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
692     while (u4_blk_id < 8)
693     {
694         WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
695 
696         IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
697 
698         pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
699         pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
700 
701         ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
702                         pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
703                         pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
704                         NULL, pi2_dc_src + dc_src_offset);
705         /* Get next DC block to process */
706         DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
707     }
708 
709     /* now process ac/mixed blocks */
710     DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
711     while (u4_blk_id < 8)
712     {
713         WORD32 i4_src_offset = src_strd * u4_blk_id;
714         WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
715 
716         IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
717 
718         pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
719         pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
720 
721         ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
722                                                     pu1_cur_4x4_prd_blk,
723                                                     pu1_cur_4x4_out_blk,
724                                                     pred_strd, out_strd,
725                                                     pu2_iscale_mat,
726                                                     pu2_weigh_mat, qp_div,
727                                                     (WORD16 *) pi4_tmp,
728                                                     pi2_dc_src + dc_src_offset);
729 
730         DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
731     }
732 
733     /* Now process empty blocks */
734     DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
735     while (u4_blk_id < 8)
736     {
737         IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
738 
739         pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
740         pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
741 
742         ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
743                                      pred_strd, out_strd, SIZE_4X4_BLK_VERT,
744                                      SIZE_4X4_BLK_HRZ);
745 
746         DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
747     }
748 }
749 
750 /**
751 ******************************************************************************
752 *
753 * @brief  This function packs residue of an i16x16 luma mb for entropy coding
754 *
755 * @par   Description
756 *  An i16 macro block contains two classes of units, dc 4x4 block and
757 *  4x4 ac blocks. while packing the mb, the dc block is sent first, and
758 *  the 16 ac blocks are sent next in scan order. Each and every block is
759 *  represented by 3 parameters (nnz, significant coefficient map and the
760 *  residue coefficients itself). If a 4x4 unit does not have any coefficients
761 *  then only nnz is sent. Inside a 4x4 block the individual coefficients are
762 *  sent in scan order.
763 *
764 *  The first byte of each block will be nnz of the block, if it is non zero,
765 *  a 2 byte significance map is sent. This is followed by nonzero coefficients.
766 *  This is repeated for 1 dc + 16 ac blocks.
767 *
768 * @param[in]  pi2_res_mb
769 *  pointer to residue mb
770 *
771 * @param[in, out]  pv_mb_coeff_data
772 *  buffer pointing to packed residue coefficients
773 *
774 * @param[in]  u4_res_strd
775 *  residual block stride
776 *
777 * @param[out]  u1_cbp_l
778 *  coded block pattern luma
779 *
780 * @param[in]   pu1_nnz
781 *  number of non zero coefficients in each 4x4 unit
782 *
783 * @param[out]
784 *  Control signal for inverse transform of 16x16 blocks
785 *
786 * @return none
787 *
788 * @ remarks
789 *
790 ******************************************************************************
791 */
ih264e_pack_l_mb_i16(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 * pu4_cntrl)792 void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
793                           void **pv_mb_coeff_data,
794                           WORD32 i4_res_strd,
795                           UWORD8 *u1_cbp_l,
796                           UWORD8 *pu1_nnz,
797                           UWORD32 *pu4_cntrl)
798 {
799     /* pointer to packed sub block buffer space */
800     tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
801 
802     /* no of non zero coefficients in the current sub block */
803     UWORD32 u4_nnz_cnt;
804 
805     /* significant coefficient map */
806     UWORD32 u4_s_map;
807 
808     /* pointer to scanning matrix */
809     const UWORD8 *pu1_scan_order;
810 
811     /* number of non zeros in sub block */
812     UWORD32 u4_nnz;
813 
814     /* coeff scan order */
815     const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
816 
817     /* temp var */
818     UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
819 
820     /*DC and AC coeff pointers*/
821     WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc;
822 
823     /********************************************************/
824     /*  pack dc coeff data for entropy coding               */
825     /********************************************************/
826 
827     pi2_res_mb_dc = pi2_res_mb;
828     pu1_scan_order = gu1_luma_scan_order_dc;
829 
830     u4_nnz = *pu1_nnz;
831     u4_cntrl = 0;
832 
833     /* write number of non zero coefficients */
834     ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
835 
836     if (u4_nnz)
837     {
838         for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
839         {
840             if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
841             {
842                 /* write residue */
843                 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
844                 u4_s_map |= mask;
845             }
846             mask <<= 1;
847         }
848         /* write significant coeff map */
849         ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
850         (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
851 
852         u4_cntrl = 0x00008000;// Set DC bit in ctrl code
853     }
854     else
855     {
856         (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
857     }
858 
859     /********************************************************/
860     /*  pack ac coeff data for entropy coding               */
861     /********************************************************/
862 
863     pu1_nnz ++;
864     pu1_scan_order = gu1_luma_scan_order;
865     pi2_res_mb += i4_res_strd; /*Move to AC block*/
866 
867     ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
868 
869     for (b4 = 0; b4 < 16; b4++)
870     {
871         ps_mb_coeff_data = (*pv_mb_coeff_data);
872 
873         u4_nnz = pu1_nnz[u1_scan_order[b4]];
874 
875         /* Jump according to the scan order */
876         pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
877 
878         /*
879          * Since this is a i16x16 block, we should not count dc coeff on indi
880          * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
881          * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
882          * here
883          */
884         u4_nnz -= (pi2_res_mb_ac[0] != 0);
885 
886         /* write number of non zero coefficients */
887         ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
888 
889         if (u4_nnz)
890         {
891             for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
892             {
893                 if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
894                 {
895                     /* write residue */
896                     ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
897                     u4_s_map |= mask;
898                 }
899                 mask <<= 1;
900             }
901             /* write significant coeff map */
902             ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
903             (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
904             *u1_cbp_l = 15;
905 
906             u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
907         }
908         else
909         {
910             (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
911         }
912 
913     }
914 
915     if (!(*u1_cbp_l))
916     {
917         (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
918     }
919 
920     /* Store the cntrl signal */
921     (*pu4_cntrl) = u4_cntrl;
922     return;
923 }
924 
925 /**
926 ******************************************************************************
927 *
928 * @brief  This function packs residue of an p16x16 luma mb for entropy coding
929 *
930 * @par   Description
931 *  A p16x16 macro block contains two classes of units 16  4x4 ac blocks.
932 *  while packing the mb, the dc block is sent first, and
933 *  the 16 ac blocks are sent next in scan order. Each and every block is
934 *  represented by 3 parameters (nnz, significant coefficient map and the
935 *  residue coefficients itself). If a 4x4 unit does not have any coefficients
936 *  then only nnz is sent. Inside a 4x4 block the individual coefficients are
937 *  sent in scan order.
938 *
939 *  The first byte of each block will be nnz of the block, if it is non zero,
940 *  a 2 byte significance map is sent. This is followed by nonzero coefficients.
941 *  This is repeated for 1 dc + 16 ac blocks.
942 *
943 * @param[in]  pi2_res_mb
944 *  pointer to residue mb
945 *
946 * @param[in, out]  pv_mb_coeff_data
947 *  buffer pointing to packed residue coefficients
948 *
949 * @param[in]  i4_res_strd
950 *  residual block stride
951 *
952 * @param[out]  u1_cbp_l
953 *  coded block pattern luma
954 *
955 * @param[in]   pu1_nnz
956 *  number of non zero coefficients in each 4x4 unit
957 *
958 * @param[out] pu4_cntrl
959 *  Control signal for inverse transform
960 *
961 * @return none
962 *
963 * @remarks Killing coffs not yet coded
964 *
965 ******************************************************************************
966 */
ih264e_pack_l_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl)967 void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
968                       void **pv_mb_coeff_data,
969                       WORD32 i4_res_strd,
970                       UWORD8 *u1_cbp_l,
971                       UWORD8 *pu1_nnz,
972                       UWORD32 u4_thres_resi,
973                       UWORD32 *pu4_cntrl)
974 {
975     /* pointer to packed sub block buffer space */
976     tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
977 
978     /* no of non zero coefficients in the current sub block */
979     UWORD32 u4_nnz_cnt;
980 
981     /* significant coefficient map */
982     UWORD32 u4_s_map;
983 
984     /* pointer to scanning matrix */
985     const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
986 
987     /* number of non zeros in sub block */
988     UWORD32 u4_nnz;
989 
990     /* pointer to residual sub block */
991     WORD16  *pi2_res_sb;
992 
993     /* coeff scan order */
994     const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
995 
996     /* coeff cost */
997     const UWORD8  *pu1_coeff_cost = gu1_coeff_cost;
998 
999     /* temp var */
1000     UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
1001 
1002     /* temp var */
1003     WORD32 i4_res_val, i4_run = -1, dcac_block;
1004 
1005     /* When Hadamard transform is disabled, first row values are dont care, ignore them */
1006     pi2_res_mb += i4_res_strd;
1007 
1008     /* When Hadamard transform is disabled, first unit value is dont care, ignore this */
1009     pu1_nnz ++;
1010 
1011     ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1012 
1013     /********************************************************/
1014     /*  pack coeff data for entropy coding                  */
1015     /********************************************************/
1016 
1017     for (b4 = 0; b4 < 16; b4++)
1018     {
1019         ps_mb_coeff_data = (*pv_mb_coeff_data);
1020 
1021         b8 = b4 >> 2;
1022 
1023         u4_nnz = pu1_nnz[u1_scan_order[b4]];
1024 
1025         /* Jump according to the scan order */
1026         pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
1027 
1028         /* write number of non zero coefficients */
1029         ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1030 
1031         if (u4_nnz)
1032         {
1033             for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1034             {
1035                 /* number of runs of zero before, this is used to compute coeff cost */
1036                 i4_run++;
1037 
1038                 i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1039 
1040                 if (i4_res_val)
1041                 {
1042                     /* write residue */
1043                     ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
1044                     u4_s_map |= mask;
1045 
1046                     if (u4_thres_resi)
1047                     {
1048                         /* compute coeff cost */
1049                         if (i4_res_val == 1 || i4_res_val == -1)
1050                         {
1051                             if (i4_run < 6)
1052                                 u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
1053                         }
1054                         else
1055                             u4_b8_coeff_cost += 9;
1056 
1057                         i4_run = -1;
1058                     }
1059                 }
1060 
1061                 mask <<= 1;
1062             }
1063 
1064             /* write significant coeff map */
1065             ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1066             (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1067 
1068             /* cbp */
1069             *u1_cbp_l |= (1 << b8);
1070 
1071             /* Cntrl map for inverse transform computation
1072              *
1073              * If coeff_cnt is zero, it means that only nonzero was a dc coeff
1074              * Hence we have to set the 16 - u1_scan_order[b4]) position instead
1075              * of 31 - u1_scan_order[b4]
1076              */
1077             dcac_block = (coeff_cnt == 0)?16:31;
1078             u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
1079         }
1080         else
1081         {
1082             (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1083         }
1084 
1085         /* Decide if the 8x8 unit has to be sent for entropy coding? */
1086         if ((b4+1) % 4 == 0)
1087         {
1088             if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
1089                             (*u1_cbp_l & (1 << b8)) )
1090             {
1091 
1092 
1093                 /*
1094                  * When we want to reset the full 8x8 block, we have to reset
1095                  * both the dc and ac coeff bits hence we have the symmetric
1096                  * arrangement of bits
1097                  */
1098                 const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
1099 
1100                 /* restore cbp */
1101                 *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
1102 
1103                 /* correct cntrl flag */
1104                 u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
1105 
1106                 /* correct nnz */
1107                 pu1_nnz[u1_scan_order[b4 - 3]] = 0;
1108                 pu1_nnz[u1_scan_order[b4 - 2]] = 0;
1109                 pu1_nnz[u1_scan_order[b4 - 1]] = 0;
1110                 pu1_nnz[u1_scan_order[b4]] = 0;
1111 
1112                 /* reset blk cost */
1113                 u4_b8_coeff_cost = 0;
1114             }
1115 
1116             if (!(*u1_cbp_l & (1 << b8)))
1117             {
1118                 (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
1119             }
1120 
1121             u4_mb_coeff_cost += u4_b8_coeff_cost;
1122 
1123             u4_b8_coeff_cost = 0;
1124             i4_run = -1;
1125             ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1126         }
1127     }
1128 
1129     if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
1130                     && (*u1_cbp_l))
1131     {
1132         (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
1133         *u1_cbp_l = 0;
1134         u4_cntrl = 0;
1135         memset(pu1_nnz, 0, 16);
1136     }
1137 
1138     (*pu4_cntrl) = u4_cntrl;
1139 
1140     return;
1141 }
1142 
1143 /**
1144 ******************************************************************************
1145 *
1146 * @brief  This function packs residue of an i8x8 chroma mb for entropy coding
1147 *
1148 * @par   Description
1149 *  An i8 chroma macro block contains two classes of units, dc 2x2 block and
1150 *  4x4 ac blocks. while packing the mb, the dc block is sent first, and
1151 *  the 4 ac blocks are sent next in scan order. Each and every block is
1152 *  represented by 3 parameters (nnz, significant coefficient map and the
1153 *  residue coefficients itself). If a 4x4 unit does not have any coefficients
1154 *  then only nnz is sent. Inside a 4x4 block the individual coefficients are
1155 *  sent in scan order.
1156 *
1157 *  The first byte of each block will be nnz of the block, if it is non zero,
1158 *  a 2 byte significance map is sent. This is followed by nonzero coefficients.
1159 *  This is repeated for 1 dc + 4 ac blocks.
1160 *
1161 * @param[in]  pi2_res_mb
1162 *  pointer to residue mb
1163 *
1164 * @param[in, out]  pv_mb_coeff_data
1165 *  buffer pointing to packed residue coefficients
1166 *
1167 * @param[in]  u4_res_strd
1168 *  residual block stride
1169 *
1170 * @param[out]  u1_cbp_c
1171 *  coded block pattern chroma
1172 *
1173 * @param[in]   pu1_nnz
1174 *  number of non zero coefficients in each 4x4 unit
1175 *
1176 * @param[out]   pu1_nnz
1177 *  Control signal for inverse transform
1178 *
1179 * @param[in]   u4_swap_uv
1180 *  Swaps the order of U and V planes in entropy bitstream
1181 *
1182 * @return none
1183 *
1184 * @ remarks
1185 *
1186 ******************************************************************************
1187 */
ih264e_pack_c_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_c,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl,UWORD32 u4_swap_uv)1188 void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
1189                       void **pv_mb_coeff_data,
1190                       WORD32 i4_res_strd,
1191                       UWORD8 *u1_cbp_c,
1192                       UWORD8 *pu1_nnz,
1193                       UWORD32 u4_thres_resi,
1194                       UWORD32 *pu4_cntrl,
1195                       UWORD32 u4_swap_uv)
1196 {
1197     /* pointer to packed sub block buffer space */
1198     tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
1199     tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
1200 
1201     /* nnz pointer */
1202     UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
1203 
1204     /* nnz counter */
1205     UWORD32 u4_nnz_cnt;
1206 
1207     /* significant coefficient map */
1208     UWORD32 u4_s_map;
1209 
1210     /* pointer to scanning matrix */
1211     const UWORD8 *pu1_scan_order;
1212 
1213     /* no of non zero coefficients in the current sub block */
1214     UWORD32 u4_nnz;
1215 
1216     /* pointer to residual sub block, res val */
1217     WORD16 *pi2_res_sb, i2_res_val;
1218 
1219     /* temp var */
1220     UWORD32 coeff_cnt, mask, b4,plane;
1221 
1222     /* temp var */
1223     UWORD32 u4_coeff_cost;
1224     WORD32 i4_run;
1225 
1226     /* coeff cost */
1227     const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
1228 
1229     /* pointer to packed buffer space */
1230     UWORD32 *pu4_mb_coeff_data = NULL;
1231 
1232     /* ac coded block pattern */
1233     UWORD8 u1_cbp_ac;
1234 
1235     /* Variable to store the current bit pos in cntrl variable*/
1236     UWORD32 cntrl_pos = 0;
1237 
1238     /********************************************************/
1239     /*  pack dc coeff data for entropy coding               */
1240     /********************************************************/
1241     pu1_scan_order = gu1_chroma_scan_order_dc;
1242     pi2_res_sb = pi2_res_mb;
1243     pu1_nnz_dc = pu1_nnz;
1244     (*pu4_cntrl) = 0;
1245     cntrl_pos = 15;
1246     ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
1247 
1248     /* Color space conversion between SP_UV and SP_VU
1249      * We always assume SP_UV for all the processing
1250      * Hence to get proper stream output we need to swap U and V channels here
1251      *
1252      * For that there are two paths we need to look for
1253      * One is the path to bitstream , these variables should have the proper input
1254      * configured UV or VU
1255      * For the other path the inverse transform variables should have what ever ordering the
1256      * input had
1257      */
1258 
1259     if (u4_swap_uv)
1260     {
1261         pu1_nnz_dc += 5;/* Move to NNZ of V planve */
1262         pi2_res_sb += 4;/* Move to DC coff of V plane */
1263 
1264         cntrl_pos = 14; /* Control bit for V plane */
1265     }
1266 
1267     for (plane = 0; plane < 2; plane++)
1268     {
1269         ps_mb_coeff_data = (*pv_mb_coeff_data);
1270 
1271         u4_nnz = *pu1_nnz_dc;
1272         /* write number of non zero coefficients U/V */
1273         ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1274 
1275         if (u4_nnz)
1276         {
1277             for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1278             {
1279                 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1280                 if (i2_res_val)
1281                 {
1282                     /* write residue U/V */
1283                     ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1284                     u4_s_map |= mask;
1285                 }
1286                 mask <<= 1;
1287             }
1288             /* write significant coeff map U/V */
1289             ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1290             (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1291             *u1_cbp_c = 1;
1292 
1293             (*pu4_cntrl) |= (1 << cntrl_pos);
1294         }
1295         else
1296         {
1297             (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1298         }
1299 
1300         if (u4_swap_uv)
1301         {
1302             cntrl_pos++; /* Control bit for U plane */
1303             pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
1304             pi2_res_sb -= 4; /* Move to DC coff of U plane */
1305 
1306         }
1307         else
1308         {
1309             cntrl_pos--; /* Control bit for U plane */
1310             pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
1311             pi2_res_sb += 4; /* Move to DC coff of V plane */
1312         }
1313     }
1314 
1315     /********************************************************/
1316     /*  pack ac coeff data for entropy coding               */
1317     /********************************************************/
1318 
1319     pu1_scan_order = gu1_chroma_scan_order;
1320     ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
1321 
1322     if (u4_swap_uv)
1323     {
1324         pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
1325         cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
1326         pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1327     }
1328     else
1329     {
1330         pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
1331         cntrl_pos = 31;
1332         pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
1333     }
1334 
1335     for (plane = 0; plane < 2; plane++)
1336     {
1337         pu4_mb_coeff_data = (*pv_mb_coeff_data);
1338 
1339         u4_coeff_cost = 0;
1340         i4_run = -1;
1341 
1342         /* get the current cbp, so that it automatically
1343          * gets reverted in case of zero ac values */
1344         u1_cbp_ac = *u1_cbp_c;
1345 
1346         for (b4 = 0; b4 < 4; b4++)
1347         {
1348             ps_mb_coeff_data = (*pv_mb_coeff_data);
1349 
1350             u4_nnz = *pu1_nnz_ac;
1351 
1352             /*
1353              * We are scanning only ac coeffs, but the nnz is for the
1354              * complete 4x4 block. Hence we have to discount the nnz contributed
1355              * by the dc coefficient
1356              */
1357             u4_nnz -= (pi2_res_sb[0]!=0);
1358 
1359             /* write number of non zero coefficients U/V */
1360             ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1361 
1362             if (u4_nnz)
1363             {
1364                 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1365                 {
1366                     i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1367 
1368                     i4_run++;
1369 
1370                     if (i2_res_val)
1371                     {
1372                         /* write residue U/V */
1373                         ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1374                         u4_s_map |= mask;
1375 
1376                         if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
1377                         {
1378                             /* compute coeff cost */
1379                             if (i2_res_val == 1 || i2_res_val == -1)
1380                             {
1381                                 if (i4_run < 6)
1382                                     u4_coeff_cost += pu1_coeff_cost[i4_run];
1383                             }
1384                             else
1385                                 u4_coeff_cost += 9;
1386 
1387                             i4_run = -1;
1388                         }
1389                     }
1390                     mask <<= 1;
1391                 }
1392 
1393                 /* write significant coeff map U/V */
1394                 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1395                 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1396                 u1_cbp_ac = 2;
1397 
1398                 (*pu4_cntrl) |= 1 << cntrl_pos;
1399             }
1400             else
1401             {
1402                 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1403             }
1404 
1405             pu1_nnz_ac++;
1406             pi2_res_sb += i4_res_strd;
1407             cntrl_pos--;
1408         }
1409 
1410         /* reset block */
1411         if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
1412         {
1413             pu4_mb_coeff_data[0] = 0;
1414             pu4_mb_coeff_data[1] = 0;
1415             pu4_mb_coeff_data[2] = 0;
1416             pu4_mb_coeff_data[3] = 0;
1417             (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
1418 
1419             /* Generate the control signal */
1420             /* Zero out the current plane's AC coefficients */
1421             (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
1422 
1423             /* Similarly do for the NNZ also */
1424             *(pu1_nnz_ac - 4) = 0;
1425             *(pu1_nnz_ac - 3) = 0;
1426             *(pu1_nnz_ac - 2) = 0;
1427             *(pu1_nnz_ac - 1) = 0;
1428         }
1429         else
1430         {
1431             *u1_cbp_c = u1_cbp_ac;
1432         }
1433 
1434         if (u4_swap_uv)
1435         {
1436             pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
1437             cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
1438             pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1439 
1440             pu1_nnz_ac = pu1_nnz + 1;
1441         }
1442         else
1443             pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
1444     }
1445 
1446     /* restore the ptr basing on cbp */
1447     if (*u1_cbp_c == 0)
1448     {
1449         (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
1450     }
1451     else if (*u1_cbp_c == 1)
1452     {
1453         (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
1454     }
1455 
1456     return ;
1457 }
1458 
1459 /**
1460 *******************************************************************************
1461 *
1462 * @brief performs luma core coding when intra mode is i16x16
1463 *
1464 * @par Description:
1465 *  If the current mb is to be coded as intra of mb type i16x16, the mb is first
1466 *  predicted using one of i16x16 prediction filters, basing on the intra mode
1467 *  chosen. Then, error is computed between the input blk and the estimated blk.
1468 *  This error is transformed (hierarchical transform i.e., dct followed by hada-
1469 *  -mard), quantized. The quantized coefficients are packed in scan order for
1470 *  entropy coding.
1471 *
1472 * @param[in] ps_proc_ctxt
1473 *  pointer to the current macro block context
1474 *
1475 * @returns u1_cbp_l
1476 *  coded block pattern luma
1477 *
1478 * @remarks none
1479 *
1480 *******************************************************************************
1481 */
1482 
ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t * ps_proc)1483 UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
1484 {
1485     /* Codec Context */
1486     codec_t *ps_codec = ps_proc->ps_codec;
1487 
1488     /* pointer to ref macro block */
1489     UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1490 
1491     /* pointer to src macro block */
1492     UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1493 
1494     /* pointer to prediction macro block */
1495     UWORD8 *pu1_pred_mb = NULL;
1496 
1497     /* pointer to residual macro block */
1498     WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1499 
1500     /* strides */
1501     WORD32 i4_src_strd = ps_proc->i4_src_strd;
1502     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1503     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1504     WORD32 i4_res_strd = ps_proc->i4_res_strd;
1505 
1506     /* intra mode */
1507     UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1508 
1509     /* coded block pattern */
1510     UWORD8 u1_cbp_l = 0;
1511 
1512     /* number of non zero coeffs*/
1513     UWORD32 au4_nnz[5];
1514     UWORD8  *pu1_nnz = (UWORD8 *)au4_nnz;
1515 
1516     /*Cntrol signal for itrans*/
1517     UWORD32 u4_cntrl;
1518 
1519     /* quantization parameters */
1520     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1521 
1522     /* pointer to packed mb coeff data */
1523     void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1524 
1525     /* init nnz */
1526     au4_nnz[0] = 0;
1527     au4_nnz[1] = 0;
1528     au4_nnz[2] = 0;
1529     au4_nnz[3] = 0;
1530     au4_nnz[4] = 0;
1531 
1532     if (u1_intra_mode == PLANE_I16x16)
1533     {
1534         pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
1535     }
1536     else
1537     {
1538         pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
1539     }
1540 
1541     /********************************************************/
1542     /*  error estimation,                                   */
1543     /*  transform                                           */
1544     /*  quantization                                        */
1545     /********************************************************/
1546     ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
1547                                                pu1_pred_mb, pi2_res_mb,
1548                                                i4_src_strd, i4_pred_strd,
1549                                                i4_res_strd,
1550                                                ps_qp_params->pu2_scale_mat,
1551                                                ps_qp_params->pu2_thres_mat,
1552                                                ps_qp_params->u1_qbits,
1553                                                ps_qp_params->u4_dead_zone,
1554                                                pu1_nnz, ENABLE_DC_TRANSFORM);
1555 
1556     /********************************************************/
1557     /*  pack coeff data for entropy coding                  */
1558     /********************************************************/
1559     ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
1560                          pu1_nnz, &u4_cntrl);
1561 
1562     /********************************************************/
1563     /*  ierror estimation,                                  */
1564     /*  itransform                                          */
1565     /*  iquantization                                       */
1566     /********************************************************/
1567     /*
1568      *if refernce frame is not to be computed
1569      *we only need the right and bottom border 4x4 blocks to predict next intra
1570      *blocks, hence only compute them
1571      */
1572     if (!ps_proc->u4_compute_recon)
1573     {
1574         u4_cntrl &= 0x111F8000;
1575     }
1576 
1577     if (u4_cntrl)
1578     {
1579         ih264e_luma_16x16_idctrans_iquant_itrans_recon(
1580                         ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1581                         i4_res_strd, i4_pred_strd, i4_rec_strd,
1582                         ps_qp_params->pu2_iscale_mat,
1583                         ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
1584                         u4_cntrl, ENABLE_DC_TRANSFORM,
1585                         ps_proc->pv_scratch_buff);
1586     }
1587     else
1588     {
1589         ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
1590                                           i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
1591                                           0);
1592     }
1593 
1594     return (u1_cbp_l);
1595 }
1596 
1597 
1598 /**
1599 *******************************************************************************
1600 *
1601 * @brief performs luma core coding when intra mode is i4x4
1602 *
1603 * @par Description:
1604 *  If the current mb is to be coded as intra of mb type i4x4, the mb is first
1605 *  predicted using one of i4x4 prediction filters, basing on the intra mode
1606 *  chosen. Then, error is computed between the input blk and the estimated blk.
1607 *  This error is dct transformed and quantized. The quantized coefficients are
1608 *  packed in scan order for entropy coding.
1609 *
1610 * @param[in] ps_proc_ctxt
1611 *  pointer to the current macro block context
1612 *
1613 * @returns u1_cbp_l
1614 *  coded block pattern luma
1615 *
1616 * @remarks
1617 *  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1618 *  mentioned in h.264 specification
1619 *
1620 *******************************************************************************
1621 */
ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t * ps_proc)1622 UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
1623 {
1624     /* Codec Context */
1625     codec_t *ps_codec = ps_proc->ps_codec;
1626 
1627     /* pointer to ref macro block */
1628     UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1629 
1630     /* pointer to src macro block */
1631     UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1632 
1633     /* pointer to prediction macro block */
1634     UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1635 
1636     /* pointer to residual macro block */
1637     WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1638 
1639     /* strides */
1640     WORD32 i4_src_strd = ps_proc->i4_src_strd;
1641     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1642     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1643 
1644     /* pointer to neighbors: left, top, top-left */
1645     UWORD8 *pu1_mb_a;
1646     UWORD8 *pu1_mb_b;
1647     UWORD8 *pu1_mb_c;
1648     UWORD8 *pu1_mb_d;
1649 
1650     /* intra mode */
1651     UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1652 
1653     /* neighbor availability */
1654     WORD32 i4_ngbr_avbl;
1655 
1656     /* neighbor pels for intra prediction */
1657     UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1658 
1659     /* coded block pattern */
1660     UWORD8 u1_cbp_l = 0;
1661 
1662     /* number of non zero coeffs*/
1663     UWORD8  u1_nnz;
1664 
1665     /* quantization parameters */
1666     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1667 
1668     /* pointer to packed mb coeff data */
1669     void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1670 
1671     /* pointer to packed mb coeff data */
1672     tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1673 
1674     /* no of non zero coefficients in the current sub block */
1675     UWORD32 u4_nnz_cnt;
1676 
1677     /* significant coefficient map */
1678     UWORD32 u4_s_map;
1679 
1680     /* pointer to scanning matrix */
1681     const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1682 
1683     /*Dummy variable for 4x4 trans fucntion*/
1684     WORD16 i2_dc_dummy;
1685 
1686     /* temp var */
1687     UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
1688 
1689     /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1690     for (b8 = 0; b8 < 4; b8++)
1691     {
1692         u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
1693         u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
1694 
1695         /* if in case cbp for the 8x8 block is zero, send no residue */
1696         ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1697 
1698         for (b4 = 0; b4 < 4; b4++)
1699         {
1700             /* index of pel in MB */
1701             u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
1702             u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
1703 
1704             /* Initialize source and reference pointers */
1705             pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
1706             pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
1707 
1708             /* pointer to left of ref macro block */
1709             pu1_mb_a = pu1_ref_mb - 1;
1710             /* pointer to top of ref macro block */
1711             pu1_mb_b = pu1_ref_mb - i4_rec_strd;
1712             /* pointer to topright of ref macro block */
1713             pu1_mb_c = pu1_mb_b + 4;
1714             /* pointer to topleft macro block */
1715             pu1_mb_d = pu1_mb_b - 1;
1716 
1717             /* compute neighbor availability */
1718             i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1719 
1720             /* sub block intra mode */
1721             u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
1722 
1723             /********************************************************/
1724             /* gather prediction pels from neighbors for prediction */
1725             /********************************************************/
1726             /* left pels */
1727             if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
1728             {
1729                 for (i = 0; i < 4; i++)
1730                     pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
1731             }
1732             else
1733             {
1734                 memset(pu1_ngbr_pels_i4, 0, 4);
1735             }
1736 
1737             /* top pels */
1738             if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1739             {
1740                 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1741             }
1742             else
1743             {
1744                 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
1745             }
1746             /* top left pels */
1747             if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
1748             {
1749                 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1750             }
1751             else
1752             {
1753                 pu1_ngbr_pels_i4[4] = 0;
1754             }
1755             /* top right pels */
1756             if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
1757             {
1758                 memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
1759             }
1760             else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1761             {
1762                 memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
1763             }
1764 
1765             /********************************************************/
1766             /*  prediction                                          */
1767             /********************************************************/
1768             (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
1769                                                           pu1_pred_mb, 0,
1770                                                           i4_pred_strd,
1771                                                           i4_ngbr_avbl);
1772 
1773             /********************************************************/
1774             /*  error estimation,                                   */
1775             /*  transform                                           */
1776             /*  quantization                                        */
1777             /********************************************************/
1778             ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
1779                                               pi2_res_mb, i4_src_strd,
1780                                               i4_pred_strd,
1781                                               ps_qp_params->pu2_scale_mat,
1782                                               ps_qp_params->pu2_thres_mat,
1783                                               ps_qp_params->u1_qbits,
1784                                               ps_qp_params->u4_dead_zone,
1785                                               &u1_nnz, &i2_dc_dummy);
1786 
1787             /********************************************************/
1788             /*  pack coeff data for entropy coding                  */
1789             /********************************************************/
1790             ps_mb_coeff_data = *pv_mb_coeff_data;
1791 
1792             /* write number of non zero coefficients */
1793             ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
1794 
1795             if (u1_nnz)
1796             {
1797                 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
1798                 {
1799                     if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1800                     {
1801                         /* write residue */
1802                         ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1803                         u4_s_map |= mask;
1804                     }
1805                     mask <<= 1;
1806                 }
1807                 /* write significant coeff map */
1808                 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1809 
1810                 /* update ptr to coeff data */
1811                 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1812 
1813                 /* cbp */
1814                 u1_cbp_l |= (1 << b8);
1815             }
1816             else
1817             {
1818                 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1819             }
1820 
1821             /********************************************************/
1822             /*  ierror estimation,                                  */
1823             /*  itransform                                          */
1824             /*  iquantization                                       */
1825             /********************************************************/
1826             if (u1_nnz)
1827                 ps_codec->pf_iquant_itrans_recon_4x4(
1828                                 pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1829                                 /*No input stride,*/i4_pred_strd,
1830                                 i4_rec_strd, ps_qp_params->pu2_iscale_mat,
1831                                 ps_qp_params->pu2_weigh_mat,
1832                                 ps_qp_params->u1_qp_div,
1833                                 ps_proc->pv_scratch_buff, 0, 0);
1834             else
1835                 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
1836                                                   i4_pred_strd, i4_rec_strd,
1837                                                   BLK_SIZE, BLK_SIZE, NULL,
1838                                                   0);
1839 
1840         }
1841 
1842         /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1843         if (!(u1_cbp_l & (1 << b8)))
1844         {
1845             *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1846         }
1847     }
1848 
1849     return (u1_cbp_l);
1850 }
1851 
1852 /**
1853 *******************************************************************************
1854 *
1855 * @brief performs luma core coding when intra mode is i4x4
1856 *
1857 * @par Description:
1858 *  If the current mb is to be coded as intra of mb type i4x4, the mb is first
1859 *  predicted using one of i4x4 prediction filters, basing on the intra mode
1860 *  chosen. Then, error is computed between the input blk and the estimated blk.
1861 *  This error is dct transformed and quantized. The quantized coefficients are
1862 *  packed in scan order for entropy coding.
1863 *
1864 * @param[in] ps_proc_ctxt
1865 *  pointer to the current macro block context
1866 *
1867 * @returns u1_cbp_l
1868 *  coded block pattern luma
1869 *
1870 * @remarks
1871 *  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1872 *  mentioned in h.264 specification
1873 *
1874 *******************************************************************************
1875 */
ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t * ps_proc)1876 UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
1877 {
1878     /* Codec Context */
1879     codec_t *ps_codec = ps_proc->ps_codec;
1880 
1881     /* pointer to ref macro block */
1882     UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
1883 
1884     /* pointer to recon buffer */
1885     UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
1886 
1887     /* pointer to residual macro block */
1888     WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1889 
1890     /* strides */
1891     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1892 
1893     /* number of non zero coeffs*/
1894     UWORD8  *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1895 
1896     /* coded block pattern */
1897     UWORD8 u1_cbp_l = 0;
1898 
1899     /* pointer to packed mb coeff data */
1900     void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1901 
1902     /* pointer to packed mb coeff data */
1903     tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1904 
1905     /* no of non zero coefficients in the current sub block */
1906     UWORD32 u4_nnz_cnt;
1907 
1908     /* significant coefficient map */
1909     UWORD32 u4_s_map;
1910 
1911     /* pointer to scanning matrix */
1912     const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1913 
1914     /* temp var */
1915     UWORD32 b8, b4, coeff_cnt, mask;
1916 
1917     /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1918     for (b8 = 0; b8 < 4; b8++)
1919     {
1920         /* if in case cbp for the 8x8 block is zero, send no residue */
1921         ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1922 
1923         for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1924         {
1925             /********************************************************/
1926             /*  pack coeff data for entropy coding                  */
1927             /********************************************************/
1928             ps_mb_coeff_data = *pv_mb_coeff_data;
1929 
1930             /* write number of non zero coefficients */
1931             ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
1932 
1933             if (*pu1_nnz)
1934             {
1935                 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
1936                 {
1937                     if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1938                     {
1939                         /* write residue */
1940                         ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1941                         u4_s_map |= mask;
1942                     }
1943                     mask <<= 1;
1944                 }
1945                 /* write significant coeff map */
1946                 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1947 
1948                 /* update ptr to coeff data */
1949                 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1950 
1951                 /* cbp */
1952                 u1_cbp_l |= (1 << b8);
1953             }
1954             else
1955             {
1956                 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1957             }
1958         }
1959 
1960         /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1961         if (!(u1_cbp_l & (1 << b8)))
1962         {
1963             *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1964         }
1965     }
1966 
1967     /* memcpy recon */
1968     ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
1969 
1970     return (u1_cbp_l);
1971 }
1972 
1973 
1974 /**
1975 *******************************************************************************
1976 *
1977 * @brief performs chroma core coding for intra macro blocks
1978 *
1979 * @par Description:
1980 *  If the current MB is to be intra coded with mb type chroma I8x8, the MB is
1981 *  first predicted using intra 8x8 prediction filters. The predicted data is
1982 *  compared with the input for error and the error is transformed. The DC
1983 *  coefficients of each transformed sub blocks are further transformed using
1984 *  Hadamard transform. The resulting coefficients are quantized, packed and sent
1985 *  for entropy coding.
1986 *
1987 * @param[in] ps_proc_ctxt
1988 *  pointer to the current macro block context
1989 *
1990 * @returns u1_cbp_c
1991 *  coded block pattern chroma
1992 *
1993 * @remarks
1994 *  The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
1995 *  mentioned in h.264 specification
1996 *
1997 *******************************************************************************
1998 */
ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t * ps_proc)1999 UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
2000 {
2001     /* Codec Context */
2002     codec_t *ps_codec = ps_proc->ps_codec;
2003 
2004     /* pointer to ref macro block */
2005     UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
2006 
2007     /* pointer to src macro block */
2008     UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2009 
2010     /* pointer to prediction macro block */
2011     UWORD8 *pu1_pred_mb = NULL;
2012 
2013     /* pointer to residual macro block */
2014     WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2015 
2016     /* strides */
2017     WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
2018     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2019     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2020     WORD32 i4_res_strd = ps_proc->i4_res_strd;
2021 
2022     /* intra mode */
2023     UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
2024 
2025     /* coded block pattern */
2026     UWORD8 u1_cbp_c = 0;
2027 
2028     /* number of non zero coeffs*/
2029     UWORD8 au1_nnz[18] = {0};
2030 
2031     /* quantization parameters */
2032     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2033 
2034     /* Control signal for inverse transform */
2035     UWORD32 u4_cntrl;
2036 
2037     /* pointer to packed mb coeff data */
2038     void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2039 
2040     /* See if we need to swap U and V plances for entropy */
2041     UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2042 
2043     if (PLANE_CH_I8x8 == u1_intra_mode)
2044     {
2045         pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
2046     }
2047     else
2048     {
2049         pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
2050     }
2051 
2052     /********************************************************/
2053     /*  error estimation,                                   */
2054     /*  transform                                           */
2055     /*  quantization                                        */
2056     /********************************************************/
2057     ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2058                                                pu1_pred_mb, pi2_res_mb,
2059                                                i4_src_strd, i4_pred_strd,
2060                                                i4_res_strd,
2061                                                ps_qp_params->pu2_scale_mat,
2062                                                ps_qp_params->pu2_thres_mat,
2063                                                ps_qp_params->u1_qbits,
2064                                                ps_qp_params->u4_dead_zone,
2065                                                au1_nnz);
2066 
2067     /********************************************************/
2068     /*  pack coeff data for entropy coding                  */
2069     /********************************************************/
2070     ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2071                      au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2072 
2073     /********************************************************/
2074     /*  ierror estimation,                                  */
2075     /*  itransform                                          */
2076     /*  iquantization                                       */
2077     /********************************************************/
2078     ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
2079                                                    pu1_pred_mb, pu1_ref_mb,
2080                                                    i4_res_strd, i4_pred_strd,
2081                                                    i4_rec_strd,
2082                                                    ps_qp_params->pu2_iscale_mat,
2083                                                    ps_qp_params->pu2_weigh_mat,
2084                                                    ps_qp_params->u1_qp_div,
2085                                                    u4_cntrl,
2086                                                    ps_proc->pv_scratch_buff);
2087     return (u1_cbp_c);
2088 }
2089 
2090 
2091 /**
2092 *******************************************************************************
2093 *
2094 * @brief performs luma core coding when  mode is inter
2095 *
2096 * @par Description:
2097 *  If the current mb is to be coded as inter the mb is predicted based on the
2098 *  sub mb partitions and corresponding motion vectors generated by ME. Then,
2099 *  error is computed between the input blk and the estimated blk. This error is
2100 *  transformed, quantized. The quantized coefficients are packed in scan order
2101 *  for entropy coding
2102 *
2103 * @param[in] ps_proc_ctxt
2104 *  pointer to the current macro block context
2105 *
2106 * @returns u1_cbp_l
2107 *  coded block pattern luma
2108 *
2109 * @remarks none
2110 *
2111 *******************************************************************************
2112 */
2113 
ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t * ps_proc)2114 UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
2115 {
2116     /* Codec Context */
2117     codec_t *ps_codec = ps_proc->ps_codec;
2118 
2119     /* pointer to ref macro block */
2120     UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
2121 
2122     /* pointer to src macro block */
2123     UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
2124 
2125     /* pointer to prediction macro block */
2126     UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2127 
2128     /* pointer to residual macro block */
2129     WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2130 
2131     /* strides */
2132     WORD32 i4_src_strd = ps_proc->i4_src_strd;
2133     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2134     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2135     WORD32 i4_res_strd = ps_proc->i4_res_strd;
2136 
2137     /* coded block pattern */
2138     UWORD8 u1_cbp_l = 0;
2139 
2140     /*Control signal of itrans*/
2141     UWORD32 u4_cntrl;
2142 
2143     /* number of non zero coeffs*/
2144     UWORD8  *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz;
2145 
2146     /* quantization parameters */
2147     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2148 
2149     /* pointer to packed mb coeff data */
2150     void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2151 
2152     /* pseudo pred buffer */
2153     UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
2154 
2155     /* pseudo pred buffer stride */
2156     WORD32 i4_pseudo_pred_strd = i4_pred_strd;
2157 
2158     /* init nnz */
2159     ps_proc->au4_nnz[0] = 0;
2160     ps_proc->au4_nnz[1] = 0;
2161     ps_proc->au4_nnz[2] = 0;
2162     ps_proc->au4_nnz[3] = 0;
2163     ps_proc->au4_nnz[4] = 0;
2164 
2165     /********************************************************/
2166     /*  prediction                                          */
2167     /********************************************************/
2168     ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
2169 
2170     /********************************************************/
2171     /*  error estimation,                                   */
2172     /*  transform                                           */
2173     /*  quantization                                        */
2174     /********************************************************/
2175     if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
2176     {
2177         ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2178                                                    pu1_pseudo_pred, pi2_res_mb,
2179                                                    i4_src_strd,
2180                                                    i4_pseudo_pred_strd,
2181                                                    i4_res_strd,
2182                                                    ps_qp_params->pu2_scale_mat,
2183                                                    ps_qp_params->pu2_thres_mat,
2184                                                    ps_qp_params->u1_qbits,
2185                                                    ps_qp_params->u4_dead_zone,
2186                                                    pu1_nnz,
2187                                                    DISABLE_DC_TRANSFORM);
2188 
2189         /********************************************************/
2190         /*  pack coeff data for entropy coding                  */
2191         /********************************************************/
2192         ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
2193                          pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
2194     }
2195     else
2196     {
2197         u1_cbp_l = 0;
2198         u4_cntrl = 0;
2199     }
2200 
2201     /********************************************************/
2202     /*  ierror estimation,                                  */
2203     /*  itransform                                          */
2204     /*  iquantization                                       */
2205     /********************************************************/
2206 
2207     /*If the frame is not to be used for P frame reference or dumping recon
2208      * we only will use the reocn for only predicting intra Mbs
2209      * THis will need only right and bottom edge 4x4 blocks recon
2210      * Hence we selectively enable them using control signal(including DC)
2211      */
2212     if (ps_proc->u4_compute_recon != 1)
2213     {
2214         u4_cntrl &= 0x111F0000;
2215     }
2216 
2217     if (u4_cntrl)
2218     {
2219         ih264e_luma_16x16_idctrans_iquant_itrans_recon(
2220                         ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
2221                         i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
2222                         ps_qp_params->pu2_iscale_mat,
2223                         ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2224                         u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM,
2225                         ps_proc->pv_scratch_buff);
2226     }
2227     else
2228     {
2229         ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
2230                                           i4_pseudo_pred_strd, i4_rec_strd,
2231                                           MB_SIZE, MB_SIZE, NULL, 0);
2232     }
2233 
2234 
2235     return (u1_cbp_l);
2236 }
2237 
2238 /**
2239 *******************************************************************************
2240 *
2241 * @brief performs chroma core coding for inter macro blocks
2242 *
2243 * @par Description:
2244 *  If the current mb is to be coded as inter predicted mb,based on the sub mb partitions
2245 *  and corresponding motion vectors generated by ME  ,prediction is done.
2246 *  Then, error is computed between the input blk and the estimated blk.
2247 *  This error is transformed , quantized. The quantized coefficients
2248 *  are packed in scan order for
2249 *  entropy coding.
2250 *
2251 * @param[in] ps_proc_ctxt
2252 *  pointer to the current macro block context
2253 *
2254 * @returns u1_cbp_l
2255 *  coded block pattern chroma
2256 *
2257 * @remarks none
2258 *
2259 *******************************************************************************
2260 */
ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t * ps_proc)2261 UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
2262 {
2263     /* Codec Context */
2264     codec_t *ps_codec = ps_proc->ps_codec;
2265 
2266     /* pointer to ref macro block */
2267     UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
2268 
2269     /* pointer to src macro block */
2270     UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2271 
2272     /* pointer to prediction macro block */
2273     UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2274 
2275     /* pointer to residual macro block */
2276     WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2277 
2278     /* strides */
2279     WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
2280     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2281     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2282     WORD32 i4_res_strd = ps_proc->i4_res_strd;
2283 
2284     /* coded block pattern */
2285     UWORD8 u1_cbp_c = 0;
2286 
2287     /*Control signal for inverse transform*/
2288     UWORD32 u4_cntrl;
2289 
2290     /* number of non zero coeffs*/
2291     UWORD8 au1_nnz[10] = {0};
2292 
2293     /* quantization parameters */
2294     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2295 
2296     /* pointer to packed mb coeff data */
2297     void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2298 
2299     /*See if we need to swap U and V plances for entropy*/
2300     UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2301 
2302     /********************************************************/
2303     /*  prediction                                          */
2304     /********************************************************/
2305     ih264e_motion_comp_chroma(ps_proc);
2306 
2307     /********************************************************/
2308     /*  error estimation,                                   */
2309     /*  transform                                           */
2310     /*  quantization                                        */
2311     /********************************************************/
2312     ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2313                                                pu1_pred_mb, pi2_res_mb,
2314                                                i4_src_strd, i4_pred_strd,
2315                                                i4_res_strd,
2316                                                ps_qp_params->pu2_scale_mat,
2317                                                ps_qp_params->pu2_thres_mat,
2318                                                ps_qp_params->u1_qbits,
2319                                                ps_qp_params->u4_dead_zone,
2320                                                au1_nnz);
2321 
2322     /********************************************************/
2323     /*  pack coeff data for entropy coding                  */
2324     /********************************************************/
2325     ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2326                      au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2327 
2328     /********************************************************/
2329     /*  ierror estimation,                                  */
2330     /*  itransform                                          */
2331     /*  iquantization                                       */
2332     /********************************************************/
2333 
2334     /* If the frame is not to be used for P frame reference or dumping recon
2335      * we only will use the reocn for only predicting intra Mbs
2336      * THis will need only right and bottom edge 4x4 blocks recon
2337      * Hence we selectively enable them using control signal(including DC)
2338      */
2339     if (!ps_proc->u4_compute_recon)
2340     {
2341         u4_cntrl &= 0x7700C000;
2342     }
2343 
2344     if (u4_cntrl)
2345     {
2346         ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
2347                         ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
2348                         i4_res_strd, i4_pred_strd, i4_rec_strd,
2349                         ps_qp_params->pu2_iscale_mat,
2350                         ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2351                         u4_cntrl, ps_proc->pv_scratch_buff);
2352     }
2353     else
2354     {
2355         ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
2356                                           i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
2357                                           NULL, 0);
2358     }
2359 
2360     return (u1_cbp_c);
2361 }
2362