1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevcd_iquant_itrans_recon_ctb.c
22  *
23  * @brief
24  *  Contains functions for inverse quantization, inverse transform and recon
25  *
26  * @author
27  *  Ittiam
28  *
29  * @par List of Functions:
30  * - ihevcd_iquant_itrans_recon_ctb()
31  *
32  * @remarks
33  *  None
34  *
35  *******************************************************************************
36  */
37 /*****************************************************************************/
38 /* File Includes                                                             */
39 /*****************************************************************************/
40 #include <stdio.h>
41 #include <stddef.h>
42 #include <stdlib.h>
43 #include <string.h>
44 
45 #include "ihevc_typedefs.h"
46 #include "iv.h"
47 #include "ivd.h"
48 #include "ihevcd_cxa.h"
49 
50 #include "ihevc_defs.h"
51 #include "ihevc_debug.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_cabac_tables.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 
57 #include "ihevcd_defs.h"
58 #include "ihevcd_function_selector.h"
59 #include "ihevcd_structs.h"
60 #include "ihevcd_error.h"
61 #include "ihevcd_bitstream.h"
62 #include "ihevc_common_tables.h"
63 
64 /* Intra pred includes */
65 #include "ihevc_intra_pred.h"
66 
67 /* Inverse transform common module includes */
68 #include "ihevc_trans_tables.h"
69 #include "ihevc_trans_macros.h"
70 #include "ihevc_itrans_recon.h"
71 #include "ihevc_recon.h"
72 #include "ihevc_chroma_itrans_recon.h"
73 #include "ihevc_chroma_recon.h"
74 
75 /* Decoder includes */
76 #include "ihevcd_common_tables.h"
77 #include "ihevcd_iquant_itrans_recon_ctb.h"
78 #include "ihevcd_debug.h"
79 #include "ihevcd_profile.h"
80 #include "ihevcd_statistics.h"
81 #include "ihevcd_itrans_recon_dc.h"
82 
83 static const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
84 
85 
86 /* Globals */
87 static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] =
88   { IP_FUNC_MODE_0, /* Mode 0 */
89     IP_FUNC_MODE_1, /* Mode 1 */
90     IP_FUNC_MODE_2, /* Mode 2 */
91     IP_FUNC_MODE_3TO9, /* Mode 3 */
92     IP_FUNC_MODE_3TO9, /* Mode 4 */
93     IP_FUNC_MODE_3TO9, /* Mode 5 */
94     IP_FUNC_MODE_3TO9, /* Mode 6 */
95     IP_FUNC_MODE_3TO9, /* Mode 7 */
96     IP_FUNC_MODE_3TO9, /* Mode 8 */
97     IP_FUNC_MODE_3TO9, /* Mode 9 */
98     IP_FUNC_MODE_10, /* Mode 10 */
99     IP_FUNC_MODE_11TO17, /* Mode 11 */
100     IP_FUNC_MODE_11TO17, /* Mode 12 */
101     IP_FUNC_MODE_11TO17, /* Mode 13 */
102     IP_FUNC_MODE_11TO17, /* Mode 14 */
103     IP_FUNC_MODE_11TO17, /* Mode 15 */
104     IP_FUNC_MODE_11TO17, /* Mode 16 */
105     IP_FUNC_MODE_11TO17, /* Mode 17 */
106     IP_FUNC_MODE_18_34, /* Mode 18 */
107     IP_FUNC_MODE_19TO25, /* Mode 19 */
108     IP_FUNC_MODE_19TO25, /* Mode 20 */
109     IP_FUNC_MODE_19TO25, /* Mode 21 */
110     IP_FUNC_MODE_19TO25, /* Mode 22 */
111     IP_FUNC_MODE_19TO25, /* Mode 23 */
112     IP_FUNC_MODE_19TO25, /* Mode 24 */
113     IP_FUNC_MODE_19TO25, /* Mode 25 */
114     IP_FUNC_MODE_26, /* Mode 26 */
115     IP_FUNC_MODE_27TO33, /* Mode 27 */
116     IP_FUNC_MODE_27TO33, /* Mode 26 */
117     IP_FUNC_MODE_27TO33, /* Mode 29 */
118     IP_FUNC_MODE_27TO33, /* Mode 30 */
119     IP_FUNC_MODE_27TO33, /* Mode 31 */
120     IP_FUNC_MODE_27TO33, /* Mode 32 */
121     IP_FUNC_MODE_27TO33, /* Mode 33 */
122     IP_FUNC_MODE_18_34, /* Mode 34 */
123 };
124 
125 
126 const WORD16 *g_ai2_ihevc_trans_tables[] =
127   { &g_ai2_ihevc_trans_dst_4[0][0],
128     &g_ai2_ihevc_trans_4[0][0],
129     &g_ai2_ihevc_trans_8[0][0],
130     &g_ai2_ihevc_trans_16[0][0],
131     &g_ai2_ihevc_trans_32[0][0]
132 };
133 
134 
135 /*****************************************************************************/
136 /* Function Prototypes                                                       */
137 /*****************************************************************************/
138 /* Returns number of ai2_level read from ps_sblk_coeff */
ihevcd_unpack_coeffs(WORD16 * pi2_tu_coeff,WORD32 log2_trans_size,UWORD8 * pu1_tu_coeff_data,WORD16 * pi2_dequant_matrix,WORD32 qp_rem,WORD32 qp_div,TRANSFORM_TYPE e_trans_type,WORD32 trans_quant_bypass,UWORD32 * pu4_zero_cols,UWORD32 * pu4_zero_rows,UWORD32 * pu4_coeff_type,WORD16 * pi2_coeff_value)139 UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff,
140                              WORD32 log2_trans_size,
141                              UWORD8 *pu1_tu_coeff_data,
142                              WORD16 *pi2_dequant_matrix,
143                              WORD32 qp_rem,
144                              WORD32 qp_div,
145                              TRANSFORM_TYPE e_trans_type,
146                              WORD32 trans_quant_bypass,
147                              UWORD32 *pu4_zero_cols,
148                              UWORD32 *pu4_zero_rows,
149                              UWORD32 *pu4_coeff_type,
150                              WORD16 *pi2_coeff_value)
151 {
152     /* Generating coeffs from coeff-map */
153     WORD32 i;
154     WORD16 *pi2_sblk_ptr;
155     WORD32 subblk_pos_x, subblk_pos_y;
156     WORD32 sblk_scan_idx, coeff_raster_idx;
157     WORD32 sblk_non_zero_coeff_idx;
158     tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
159     UWORD8 u1_num_coded_sblks, u1_scan_type;
160     UWORD8 *pu1_new_tu_coeff_data;
161     WORD32 trans_size;
162     WORD32 xs, ys;
163     WORD32 trans_skip;
164     WORD16 iquant_out;
165     WORD32 shift_iq;
166     {
167         WORD32 bit_depth;
168 
169         bit_depth = 8 + 0;
170         shift_iq = bit_depth + log2_trans_size - 5;
171     }
172     trans_size = (1 << log2_trans_size);
173 
174     /* First byte points to number of coded blocks */
175     u1_num_coded_sblks = *pu1_tu_coeff_data++;
176 
177     /* Next byte points to scan type */
178     u1_scan_type = *pu1_tu_coeff_data++;
179     /* 0th bit has trans_skip */
180     trans_skip = u1_scan_type & 1;
181     u1_scan_type >>= 1;
182 
183     pi2_sblk_ptr = pi2_tu_coeff;
184 
185     /* Initially all columns are assumed to be zero */
186     *pu4_zero_cols = 0xFFFFFFFF;
187     /* Initially all rows are assumed to be zero */
188     *pu4_zero_rows = 0xFFFFFFFF;
189 
190     ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data);
191 
192     if(trans_skip)
193         memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
194 
195     STATS_INIT_SBLK_AND_COEFF_POS();
196 
197     /* DC only case */
198     if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks)
199                     && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos)
200                     && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map))
201     {
202         *pu4_coeff_type = 1;
203 
204         if(!trans_quant_bypass)
205         {
206             if(4 == trans_size)
207             {
208                 IQUANT_4x4(iquant_out,
209                            ps_tu_sblk_coeff_data->ai2_level[0],
210                            pi2_dequant_matrix[0]
211                                            * g_ihevc_iquant_scales[qp_rem],
212                            shift_iq, qp_div);
213             }
214             else
215             {
216                 IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0],
217                        pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem],
218                        shift_iq, qp_div);
219             }
220             if(trans_skip)
221                 iquant_out = (iquant_out + 16) >> 5;
222         }
223         else
224         {
225             /* setting the column to zero */
226             for(i = 0; i < trans_size; i++)
227                 *(pi2_tu_coeff + i * trans_size) = 0;
228 
229             iquant_out = ps_tu_sblk_coeff_data->ai2_level[0];
230         }
231         *pi2_coeff_value = iquant_out;
232         *pi2_tu_coeff = iquant_out;
233         *pu4_zero_cols &= ~0x1;
234         *pu4_zero_rows &= ~0x1;
235         ps_tu_sblk_coeff_data =
236                         (void *)&ps_tu_sblk_coeff_data->ai2_level[1];
237 
238         STATS_UPDATE_COEFF_COUNT();
239         STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass),  0, 0);
240         STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
241         return ((UWORD8 *)ps_tu_sblk_coeff_data);
242     }
243     else
244     {
245         *pu4_coeff_type = 0;
246         /* In case of trans skip, memset has already happened */
247         if(!trans_skip)
248             memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
249     }
250 
251     for(i = 0; i < u1_num_coded_sblks; i++)
252     {
253         UWORD32 u4_sig_coeff_map;
254         subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF;
255         subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8;
256 
257         STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y);
258 
259         subblk_pos_x = subblk_pos_x * MIN_TU_SIZE;
260         subblk_pos_y = subblk_pos_y * MIN_TU_SIZE;
261 
262         pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size
263                         + subblk_pos_x;
264 
265         //*pu4_zero_cols &= ~(0xF << subblk_pos_x);
266 
267         sblk_non_zero_coeff_idx = 0;
268         u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map;
269         //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--)
270         sblk_scan_idx = 31;
271         do
272         {
273             WORD32 clz = CLZ(u4_sig_coeff_map);
274 
275             sblk_scan_idx -= clz;
276             /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */
277             /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */
278             u4_sig_coeff_map = u4_sig_coeff_map << clz;
279             /* Copying coeffs and storing in reverse order */
280             {
281                 STATS_UPDATE_COEFF_COUNT();
282                 coeff_raster_idx =
283                                 gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx];
284 
285                 xs = coeff_raster_idx & 0x3;
286                 ys = coeff_raster_idx >> 2;
287 
288                 if(!trans_quant_bypass)
289                 {
290                     if(4 == trans_size)
291                     {
292                         IQUANT_4x4(iquant_out,
293                                    ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
294                                    pi2_dequant_matrix[(subblk_pos_x + xs)
295                                                    + (subblk_pos_y + ys)
296                                                    * trans_size]
297                                    * g_ihevc_iquant_scales[qp_rem],
298                                    shift_iq, qp_div);
299                         sblk_non_zero_coeff_idx++;
300                     }
301                     else
302                     {
303                         IQUANT(iquant_out,
304                                ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
305                                pi2_dequant_matrix[(subblk_pos_x + xs)
306                                                + (subblk_pos_y + ys)
307                                                * trans_size]
308                                * g_ihevc_iquant_scales[qp_rem],
309                                shift_iq, qp_div);
310                         sblk_non_zero_coeff_idx++;
311                     }
312 
313                     if(trans_skip)
314                         iquant_out = (iquant_out + 16) >> 5;
315                 }
316                 else
317                 {
318                     iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++];
319                 }
320                 *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs));
321                 *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys));
322                 *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out;
323             }
324             sblk_scan_idx--;
325             u4_sig_coeff_map <<= 1;
326 
327         }while(u4_sig_coeff_map);
328         /* Updating the sblk pointer */
329         ps_tu_sblk_coeff_data =
330                         (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx];
331     }
332 
333     STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
334 
335     pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data;
336 
337     return pu1_new_tu_coeff_data;
338 }
339 
ihevcd_get_intra_nbr_flag(process_ctxt_t * ps_proc,tu_t * ps_tu,UWORD32 * pu4_intra_nbr_avail,WORD16 i2_pic_width_in_luma_samples,UWORD8 i1_constrained_intra_pred_flag,WORD32 trans_size,WORD32 ctb_size)340 WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc,
341                                  tu_t *ps_tu,
342                                  UWORD32 *pu4_intra_nbr_avail,
343                                  WORD16 i2_pic_width_in_luma_samples,
344                                  UWORD8 i1_constrained_intra_pred_flag,
345                                  WORD32 trans_size,
346                                  WORD32 ctb_size)
347 {
348     sps_t *ps_sps;
349     UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail,
350                     u1_top_lt_avail;
351     WORD32 x_cur, y_cur, x_nbr, y_nbr;
352     UWORD8 *pu1_nbr_intra_flag;
353     UWORD8 *pu1_pic_intra_flag;
354     UWORD8 top_right, top, top_left, left, bot_left;
355     WORD32 intra_pos;
356     WORD32 num_8_blks, num_8_blks_in_bits;
357     WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64;
358     WORD32 cur_x, cur_y;
359     WORD32 i;
360     WORD32 nbr_flags;
361 
362     ps_sps = ps_proc->ps_sps;
363     cur_x = ps_tu->b4_pos_x;
364     cur_y = ps_tu->b4_pos_y;
365 
366     u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE]
367                     >> (31 - (1 + cur_x - 1))) & 1;
368     u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1)))
369                     & 1;
370     u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x)))
371                     & 1;
372     u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
373                     >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1;
374     u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
375                     >> (31 - (1 + cur_x - 1))) & 1;
376 
377     x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE;
378     y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE;
379 
380     pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
381 
382     /* WORD32 nbr_flags as below  MSB --> LSB */
383     /*    Top-Left | Top-Right | Top | Left | Bottom-Left
384      *       1         4         4     4         4
385      */
386     bot_left = 0;
387     left = 0;
388     top_right = 0;
389     top = 0;
390     top_left = 0;
391 
392     num_8_blks = trans_size > 4 ? trans_size / 8 : 1;
393     num_8_blks_in_bits = ((1 << num_8_blks) - 1);
394 
395     if(i1_constrained_intra_pred_flag)
396     {
397         /* TODO: constrained intra pred not tested */
398         if(u1_bot_lt_avail)
399         {
400             x_nbr = x_cur - 1;
401             y_nbr = y_cur + trans_size;
402 
403             pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
404                             + x_nbr / 64;
405             intra_pos = ((x_nbr / 8) % 8);
406             for(i = 0; i < num_8_blks; i++)
407             {
408                 bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row)
409                                 >> intra_pos) & 1) << i;
410             }
411             bot_left &= num_8_blks_in_bits;
412         }
413         if(u1_left_avail)
414         {
415             x_nbr = x_cur - 1;
416             y_nbr = y_cur;
417 
418             pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
419                             + x_nbr / 64;
420             intra_pos = ((x_nbr / 8) % 8);
421 
422             for(i = 0; i < num_8_blks; i++)
423             {
424                 left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos)
425                                 & 1) << i;
426             }
427             left &= num_8_blks_in_bits;
428         }
429         if(u1_top_avail)
430         {
431             x_nbr = x_cur;
432             y_nbr = y_cur - 1;
433 
434             pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
435                             + x_nbr / 64;
436             intra_pos = ((x_nbr / 8) % 8);
437 
438             top = (*pu1_nbr_intra_flag >> intra_pos);
439             top &= num_8_blks_in_bits;
440             /*
441              for(i=0;i<num_8_blks;i++)
442              {
443              top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
444              }
445              */
446         }
447         if(u1_top_rt_avail)
448         {
449             x_nbr = x_cur + trans_size;
450             y_nbr = y_cur - 1;
451 
452             pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
453                             + x_nbr / 64;
454             intra_pos = ((x_nbr / 8) % 8);
455 
456             top_right = (*pu1_nbr_intra_flag >> intra_pos);
457             top_right &= num_8_blks_in_bits;
458             /*
459              for(i=0;i<num_8_blks;i++)
460              {
461              top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
462              }
463              */
464         }
465         if(u1_top_lt_avail)
466         {
467             x_nbr = x_cur - 1;
468             y_nbr = y_cur - 1;
469 
470             pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
471                             + x_nbr / 64;
472             intra_pos = ((x_nbr / 8) % 8);
473 
474             top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1;
475         }
476     }
477     else
478     {
479         if(u1_top_avail)
480             top = 0xF;
481         if(u1_top_rt_avail)
482             top_right = 0xF;
483         if(u1_bot_lt_avail)
484             bot_left = 0xF;
485         if(u1_left_avail)
486             left = 0xF;
487         if(u1_top_lt_avail)
488             top_left = 0x1;
489     }
490 
491     /* Handling incomplete CTBs */
492     {
493         WORD32 pu_size_limit = MIN(trans_size, 8);
494         WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
495                         - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)
496                         - (ps_tu->b4_pos_x * MIN_TU_SIZE)
497                         - (1 << (ps_tu->b3_size + 2));
498         /* ctb_size_top gives number of valid pixels remaining in the current row */
499         WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
500         WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1;
501 
502         WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
503                         - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)
504                         - (ps_tu->b4_pos_y * MIN_TU_SIZE)
505                         - (1 << (ps_tu->b3_size + 2));
506         /* ctb_size_bot gives number of valid pixels remaining in the current column */
507         WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining);
508         WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1;
509 
510         top_right &= ctb_size_top_bits;
511         bot_left &= ctb_size_bot_bits;
512     }
513 
514     /*    Top-Left | Top-Right | Top | Left | Bottom-Left
515      *      1         4         4     4         4
516      */
517 
518     /*
519      nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
520      | gau4_ihevcd_4_bit_reverse[bot_left];
521      */
522     nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
523                     | gau4_ihevcd_4_bit_reverse[bot_left];
524 
525 
526     return nbr_flags;
527 
528 }
529 
ihevcd_iquant_itrans_recon_ctb(process_ctxt_t * ps_proc)530 WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc)
531 {
532     WORD16 *pi2_scaling_mat;
533     UWORD8 *pu1_y_dst_ctb;
534     UWORD8 *pu1_uv_dst_ctb;
535     WORD32 ctb_size;
536     codec_t *ps_codec;
537     slice_header_t *ps_slice_hdr;
538     tu_t *ps_tu;
539     WORD16 *pi2_ctb_coeff;
540     WORD32 tu_cnt;
541     WORD16 *pi2_tu_coeff;
542     WORD16 *pi2_tmp;
543     WORD32 pic_strd;
544     WORD32 luma_nbr_flags;
545     WORD32 chroma_nbr_flags = 0;
546     UWORD8 u1_luma_pred_mode_first_tu = 0;
547     /* Pointers for generating 2d coeffs from coeff-map */
548     UWORD8 *pu1_tu_coeff_data;
549     /* nbr avail map for CTB */
550     /* 1st bit points to neighbor (left/top_left/bot_left) */
551     /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */
552     UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE
553                     + 2 /* Top nbr + bot nbr */]; UWORD32
554                     top_avail_bits;
555     sps_t *ps_sps;
556     pps_t *ps_pps;
557     WORD32 intra_flag;
558     UWORD8 *pu1_pic_intra_flag;
559     /*************************************************************************/
560     /* Contanis scaling matrix offset in the following order in a 1D buffer  */
561     /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V                                       */
562     /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V                                       */
563     /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V                                       */
564     /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V                                       */
565     /* Intra 16x16 Y, 16x16 U, 16x16 V                                       */
566     /* Inter 16x16 Y, 16x16 U, 16x16 V                                       */
567     /* Intra 32x32 Y                                                         */
568     /* Inter 32x32 Y                                                         */
569     /*************************************************************************/
570     WORD32 scaling_mat_offset[] =
571       { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
572         1248, 1504, 1760, 2016, 3040 };
573 
574     PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
575 
576     ps_sps = ps_proc->ps_sps;
577     ps_pps = ps_proc->ps_pps;
578     ps_slice_hdr = ps_proc->ps_slice_hdr;
579     ps_codec = ps_proc->ps_codec;
580 
581     pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma;
582     pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma;
583 
584     pi2_ctb_coeff = ps_proc->pi2_invscan_out;
585 
586     ctb_size = (1 << ps_sps->i1_log2_ctb_size);
587     pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data;
588 
589     pic_strd = ps_codec->i4_strd;
590 
591     pi2_tmp = ps_proc->pi2_itrans_intrmd_buf;
592 
593     pi2_tu_coeff = pi2_ctb_coeff;
594 
595     ps_tu = ps_proc->ps_tu;
596 
597     if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag))
598     {
599         pi2_scaling_mat = ps_pps->pi2_scaling_mat;
600     }
601     else
602     {
603         pi2_scaling_mat = ps_sps->pi2_scaling_mat;
604     }
605 
606     {
607         /* Updating the initial availability map */
608         WORD32 i;
609         UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail,
610                         u1_top_ctb_avail;
611 
612         u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
613         u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
614         u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
615         u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
616 
617         /* Initializing the availability array */
618         memset(au4_intra_nbr_avail, 0,
619                (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32));
620         /* Initializing the availability array with CTB level availability flags */
621         {
622             WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
623             WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
624             for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++)
625             {
626                 au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31);
627             }
628         }
629         au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31)
630                         >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */
631 
632         au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31);
633 
634         {
635             WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
636             WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
637             WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
638 
639             /* ctb_size_top gives number of valid pixels remaining in the current row */
640             /* Since we need pattern of 1's starting from the MSB, an additional shift */
641             /* is needed */
642             shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
643 
644             top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1)
645                             << shift;
646         }
647         au4_intra_nbr_avail[0] |= (
648                         (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0);
649         /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
650 
651     }
652 
653     /* Applying Inverse transform on all the TU's in CTB */
654     for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++)
655     {
656         WORD32 transform_skip_flag = 0;
657         WORD32 transform_skip_flag_v = 0;
658         WORD32 num_comp, c_idx, func_idx;
659         WORD32 src_strd, pred_strd, dst_strd;
660         WORD32 qp_div = 0, qp_rem = 0;
661         WORD32 qp_div_v = 0, qp_rem_v = 0;
662         UWORD32 zero_cols = 0, zero_cols_v = 0;
663         UWORD32 zero_rows = 0, zero_rows_v = 0;
664         UWORD32 coeff_type = 0, coeff_type_v = 0;
665         WORD16 i2_coeff_value, i2_coeff_value_v;
666         WORD32 trans_size = 0;
667         TRANSFORM_TYPE e_trans_type;
668         WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2;
669         WORD32 log2_trans_size;
670         WORD32 chroma_qp_idx;
671         WORD16 *pi2_src = NULL, *pi2_src_v = NULL;
672         UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL;
673         UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL;
674         WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL;
675         WORD32 tu_x, tu_y;
676         WORD32 tu_y_offset, tu_uv_offset;
677         WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset;
678         UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode;
679         WORD32 luma_nbr_flags_4x4[4];
680         WORD32 offset;
681         WORD32 pcm_flag;
682         WORD32  chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
683         /* If 420SP_VU is chroma format, pred and dst pointer   */
684         /* will be added +1 to point to U                       */
685         WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu;
686         /* If 420SP_VU is chroma format, pred and dst pointer   */
687         /* will be added U offset of +1 and subtracted 2        */
688         /* to point to V                                        */
689         WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu;
690 
691         tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */
692         tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */
693         {
694             WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x);
695             WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y);
696 
697             WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
698 
699             pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
700             pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
701             pu1_pic_intra_flag += (tu_abs_x >> 6);
702 
703             intra_flag = *pu1_pic_intra_flag;
704             intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
705         }
706 
707         u1_luma_pred_mode = ps_tu->b6_luma_intra_mode;
708         u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx;
709 
710         if(u1_chroma_pred_mode != 7)
711             num_comp = 2; /* Y and UV */
712         else
713             num_comp = 1; /* Y */
714 
715 
716         pcm_flag = 0;
717 
718         if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE))
719         {
720             UWORD8 *pu1_buf;
721             UWORD8 *pu1_y_dst = pu1_y_dst_ctb;
722             UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb;
723             WORD32 i, j;
724             tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
725             WORD32 cb_size = 1 << (ps_tu->b3_size + 2);
726 
727             /* trans_size is used to update availability after reconstruction */
728             trans_size = cb_size;
729 
730             pcm_flag = 1;
731 
732             tu_y_offset = tu_x + tu_y * pic_strd;
733             pu1_y_dst += tu_x + tu_y * pic_strd;
734             pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd;
735 
736             /* First byte points to number of coded blocks */
737             pu1_tu_coeff_data++;
738 
739             /* Next byte points to scan type */
740             pu1_tu_coeff_data++;
741 
742             ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data;
743 
744             pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0];
745             {
746 
747                 for(i = 0; i < cb_size; i++)
748                 {
749                     //pu1_y_dst[i * pic_strd + j] = *pu1_buf++;
750                     memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size);
751                     pu1_buf += cb_size;
752                 }
753 
754                 pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset;
755 
756                 /* U */
757                 for(i = 0; i < cb_size / 2; i++)
758                 {
759                     for(j = 0; j < cb_size / 2; j++)
760                     {
761                         pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
762                     }
763                 }
764 
765                 pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset;
766 
767                 /* V */
768                 for(i = 0; i < cb_size / 2; i++)
769                 {
770                     for(j = 0; j < cb_size / 2; j++)
771                     {
772                         pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
773                     }
774                 }
775             }
776 
777             pu1_tu_coeff_data = pu1_buf;
778 
779         }
780 
781 
782 
783 
784 
785         for(c_idx = 0; c_idx < num_comp; c_idx++)
786         {
787             if(0 == pcm_flag)
788             {
789                 /* Initializing variables */
790                 pred_strd = pic_strd;
791                 dst_strd = pic_strd;
792 
793                 if(c_idx == 0) /* Y */
794                 {
795                     log2_y_trans_size_minus_2 = ps_tu->b3_size;
796                     trans_size = 1 << (log2_y_trans_size_minus_2 + 2);
797                     log2_trans_size = log2_y_trans_size_minus_2 + 2;
798 
799                     tu_y_offset = tu_x + tu_y * pic_strd;
800 
801                     pi2_src = pi2_tu_coeff;
802                     pu1_pred = pu1_y_dst_ctb + tu_y_offset;
803                     pu1_dst = pu1_y_dst_ctb + tu_y_offset;
804 
805                     /* Calculating scaling matrix offset */
806                     offset = log2_y_trans_size_minus_2 * 6
807                                     + (!intra_flag)
808                                     * ((log2_y_trans_size_minus_2
809                                                     == 3) ? 1 : 3)
810                                     + c_idx;
811                     pi2_dequant_matrix = pi2_scaling_mat
812                                     + scaling_mat_offset[offset];
813 
814                     src_strd = trans_size;
815 
816                     /* 4x4 transform Luma in INTRA mode is DST */
817                     if(log2_y_trans_size_minus_2 == 0 && intra_flag)
818                     {
819                         func_idx = log2_y_trans_size_minus_2;
820                         e_trans_type = DST_4x4;
821                     }
822                     else
823                     {
824                         func_idx = log2_y_trans_size_minus_2 + 1;
825                         e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1);
826                     }
827 
828                     qp_div = ps_tu->b7_qp / 6;
829                     qp_rem = ps_tu->b7_qp % 6;
830 
831                     u1_cbf = ps_tu->b1_y_cbf;
832 
833                     transform_skip_flag = pu1_tu_coeff_data[1] & 1;
834                     /* Unpacking coeffs */
835                     if(1 == u1_cbf)
836                     {
837                         pu1_tu_coeff_data = ihevcd_unpack_coeffs(
838                                         pi2_src, log2_y_trans_size_minus_2 + 2,
839                                         pu1_tu_coeff_data, pi2_dequant_matrix,
840                                         qp_rem, qp_div, e_trans_type,
841                                         ps_tu->b1_transquant_bypass, &zero_cols,
842                                         &zero_rows, &coeff_type,
843                                         &i2_coeff_value);
844                     }
845                 }
846                 else /* UV interleaved */
847                 {
848                     /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */
849                     if(ps_tu->b3_size == 0)
850                     {
851                         /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */
852                         log2_uv_trans_size_minus_2 = ps_tu->b3_size;
853                         tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd;
854                     }
855                     else
856                     {
857                         log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1;
858                         tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd;
859                     }
860                     trans_size = 1 << (log2_uv_trans_size_minus_2 + 2);
861                     log2_trans_size = log2_uv_trans_size_minus_2 + 2;
862 
863                     pi2_src = pi2_tu_coeff;
864                     pi2_src_v = pi2_tu_coeff + trans_size * trans_size;
865                     pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
866                     pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
867                     pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
868                     pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
869 
870                     /*TODO: Add support for choosing different tables for U and V,
871                      * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes
872                      */
873                     /* Calculating scaling matrix offset */
874                     /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since
875                      * max uv trans size is 16x16
876                      */
877                     offset = log2_uv_trans_size_minus_2 * 6
878                                     + (!intra_flag) * 3 + c_idx;
879                     pi2_dequant_matrix = pi2_scaling_mat
880                                     + scaling_mat_offset[offset];
881                     pi2_dequant_matrix_v = pi2_scaling_mat
882                                     + scaling_mat_offset[offset + 1];
883 
884                     src_strd = trans_size;
885 
886                     func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/
887                     e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1);
888                     /* QP for U */
889                     i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset;
890                     i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset;
891                     u1_cbf = ps_tu->b1_cb_cbf;
892 
893                     chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
894                                     + i1_chroma_slice_qp_offset;
895                     chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
896                     qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
897                     qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
898 
899                     /* QP for V */
900                     i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset;
901                     i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset;
902                     u1_cbf_v = ps_tu->b1_cr_cbf;
903 
904                     chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
905                                     + i1_chroma_slice_qp_offset;
906                     chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
907                     qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
908                     qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
909 
910                     /* Unpacking coeffs */
911                     transform_skip_flag = pu1_tu_coeff_data[1] & 1;
912                     if(1 == u1_cbf)
913                     {
914                         pu1_tu_coeff_data = ihevcd_unpack_coeffs(
915                                         pi2_src, log2_uv_trans_size_minus_2 + 2,
916                                         pu1_tu_coeff_data, pi2_dequant_matrix,
917                                         qp_rem, qp_div, e_trans_type,
918                                         ps_tu->b1_transquant_bypass, &zero_cols,
919                                         &zero_rows, &coeff_type,
920                                         &i2_coeff_value);
921                     }
922 
923                     transform_skip_flag_v = pu1_tu_coeff_data[1] & 1;
924                     if(1 == u1_cbf_v)
925                     {
926                         pu1_tu_coeff_data = ihevcd_unpack_coeffs(
927                                         pi2_src_v, log2_uv_trans_size_minus_2 + 2,
928                                         pu1_tu_coeff_data, pi2_dequant_matrix_v,
929                                         qp_rem_v, qp_div_v, e_trans_type,
930                                         ps_tu->b1_transquant_bypass, &zero_cols_v,
931                                         &zero_rows_v, &coeff_type_v, &i2_coeff_value_v);
932                     }
933                 }
934                 /***************************************************************/
935                 /******************  Intra Prediction **************************/
936                 /***************************************************************/
937                 if(intra_flag) /* Intra */
938                 {
939                     UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 4];
940                     UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
941                     WORD32 luma_pred_func_idx, chroma_pred_func_idx;
942 
943                     /* Get the neighbour availability flags */
944                     /* Done for only Y */
945                     if(c_idx == 0)
946                     {
947                         /* Get neighbor availability for Y only */
948                         luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc,
949                                                                    ps_tu,
950                                                                    au4_intra_nbr_avail,
951                                                                    ps_sps->i2_pic_width_in_luma_samples,
952                                                                    ps_pps->i1_constrained_intra_pred_flag,
953                                                                    trans_size,
954                                                                    ctb_size);
955 
956                         if(trans_size == 4)
957                             luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags;
958 
959                         if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0))
960                         {
961                             chroma_nbr_flags = luma_nbr_flags;
962                         }
963 
964                         /* Initializing nbr pointers */
965                         pu1_top = pu1_pred - pic_strd;
966                         pu1_left = pu1_pred - 1;
967                         pu1_top_left = pu1_pred - pic_strd - 1;
968 
969                         /* call reference array substitution */
970                         if(luma_nbr_flags == 0x1ffff)
971                             ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr(
972                                             pu1_top_left,
973                                             pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
974                         else
975                             ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr(
976                                             pu1_top_left,
977                                             pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
978 
979                         /* call reference filtering */
980                         ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr(
981                                         au1_ref_sub_out, trans_size,
982                                         au1_ref_sub_out,
983                                         u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag);
984 
985                         /* use the look up to get the function idx */
986                         luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode];
987 
988                         /* call the intra prediction function */
989                         ps_codec->apf_intra_pred_luma[luma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode);
990                     }
991                     else
992                     {
993                         /* In case of yuv420sp_vu, prediction happens as usual.         */
994                         /* So point the pu1_pred pointer to original prediction pointer */
995                         UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset;
996 
997                         /*    Top-Left | Top-Right | Top | Left | Bottom-Left
998                          *      1         4         4     4         4
999                          *
1000                          * Generating chroma_nbr_flags depending upon the transform size */
1001                         if(ps_tu->b3_size == 0)
1002                         {
1003                             /* Take TL,T,L flags of First luma 4x4 block */
1004                             chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
1005                             /* Take TR flags of Second luma 4x4 block */
1006                             chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
1007                             /* Take BL flags of Third luma 4x4 block */
1008                             chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
1009                         }
1010 
1011                         /* Initializing nbr pointers */
1012                         pu1_top = pu1_pred_orig - pic_strd;
1013                         pu1_left = pu1_pred_orig - 2;
1014                         pu1_top_left = pu1_pred_orig - pic_strd - 2;
1015 
1016                         /* Chroma pred  mode derivation from luma pred mode */
1017                         {
1018                             tu_t *ps_tu_tmp = ps_tu;
1019                             while(!ps_tu_tmp->b1_first_tu_in_cu)
1020                             {
1021                                 ps_tu_tmp--;
1022                             }
1023                             u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode;
1024                         }
1025                         if(4 == u1_chroma_pred_mode)
1026                             u1_chroma_pred_mode = u1_luma_pred_mode_first_tu;
1027                         else
1028                         {
1029                             u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode];
1030 
1031                             if(u1_chroma_pred_mode ==
1032                                                             u1_luma_pred_mode_first_tu)
1033                             {
1034                                 u1_chroma_pred_mode = INTRA_ANGULAR(34);
1035                             }
1036                         }
1037 
1038                         /* call the chroma reference array substitution */
1039                         ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr(
1040                                         pu1_top_left,
1041                                         pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, au1_ref_sub_out, 1);
1042 
1043                         /* use the look up to get the function idx */
1044                         chroma_pred_func_idx =
1045                                         g_i4_ip_funcs[u1_chroma_pred_mode];
1046 
1047                         /* call the intra prediction function */
1048                         ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode);
1049                     }
1050                 }
1051 
1052                 /* Updating number of transform types */
1053                 STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx);
1054 
1055                 /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */
1056                 if(1 == u1_cbf)
1057                 {
1058                     if(ps_tu->b1_transquant_bypass || transform_skip_flag)
1059                     {
1060                         /* Recon */
1061                         ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst,
1062                                                       src_strd, pred_strd, dst_strd,
1063                                                       zero_cols);
1064                     }
1065                     else
1066                     {
1067 
1068                         /* Updating coded number of transform types(excluding trans skip and trans quant skip) */
1069                         STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1070 
1071                         /* iQuant , iTrans and Recon */
1072                         if((0 == coeff_type))
1073                         {
1074                             ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp,
1075                                                                  pu1_pred, pu1_dst,
1076                                                                  src_strd, pred_strd,
1077                                                                  dst_strd, zero_cols,
1078                                                                  zero_rows);
1079                         }
1080                         else /* DC only */
1081                         {
1082                             STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1083                             ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst,
1084                                                                  pred_strd, dst_strd,
1085                                                                  log2_trans_size,
1086                                                                  i2_coeff_value);
1087                         }
1088                     }
1089                 }
1090                 /* IQ, IT and Recon for V */
1091                 if(c_idx != 0)
1092                 {
1093                     if(1 == u1_cbf_v)
1094                     {
1095                         if(ps_tu->b1_transquant_bypass || transform_skip_flag_v)
1096                         {
1097                             /* Recon */
1098                             ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v,
1099                                                           pu1_dst_v, src_strd,
1100                                                           pred_strd, dst_strd,
1101                                                           zero_cols_v);
1102                         }
1103                         else
1104                         {
1105                             /* Updating number of transform types */
1106                             STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1107 
1108                             /* iQuant , iTrans and Recon */
1109                             if((0 == coeff_type_v))
1110                             {
1111                                 ps_codec->apf_itrans_recon[func_idx](pi2_src_v,
1112                                                                      pi2_tmp,
1113                                                                      pu1_pred_v,
1114                                                                      pu1_dst_v,
1115                                                                      src_strd,
1116                                                                      pred_strd,
1117                                                                      dst_strd,
1118                                                                      zero_cols_v,
1119                                                                      zero_rows_v);
1120                             }
1121                             else  /* DC only */
1122                             {
1123                                 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1124                                 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v,
1125                                                                      pred_strd, dst_strd,
1126                                                                      log2_trans_size,
1127                                                                      i2_coeff_value_v);
1128                             }
1129                         }
1130                     }
1131                 }
1132             }
1133 
1134             /* Neighbor availability inside CTB */
1135             /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
1136             /* Used for neighbor availability in intra pred */
1137             if(c_idx == 0)
1138             {
1139                 WORD32 i;
1140                 WORD32 trans_in_min_tu;
1141                 UWORD32 cur_tu_in_bits;
1142                 UWORD32 cur_tu_avail_flag;
1143 
1144                 trans_in_min_tu = trans_size / MIN_TU_SIZE;
1145                 cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
1146                 cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
1147 
1148                 cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1);
1149 
1150                 for(i = 0; i < trans_in_min_tu; i++)
1151                     au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |=
1152                                     cur_tu_avail_flag;
1153             }
1154         }
1155     }
1156     ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data;
1157 
1158     return ps_proc->i4_ctb_tu_cnt;
1159 }
1160 
1161