1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevc_deblk.c
22 *
23 * @brief
24 *  Contains definition for the ctb level deblk function
25 *
26 * @author
27 *  Srinivas T
28 *
29 * @par List of Functions:
30 *   - ihevc_deblk()
31 *
32 * @remarks
33 *  None
34 *
35 *******************************************************************************
36 */
37 
38 #include <stdio.h>
39 #include <stddef.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 
44 #include "ihevc_typedefs.h"
45 #include "iv.h"
46 #include "ivd.h"
47 #include "ihevcd_cxa.h"
48 #include "ithread.h"
49 
50 #include "ihevc_defs.h"
51 #include "ihevc_debug.h"
52 #include "ihevc_defs.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57 
58 #include "ihevc_error.h"
59 #include "ihevc_common_tables.h"
60 
61 #include "ihevcd_trace.h"
62 #include "ihevcd_defs.h"
63 #include "ihevcd_function_selector.h"
64 #include "ihevcd_structs.h"
65 #include "ihevcd_error.h"
66 #include "ihevcd_nal.h"
67 #include "ihevcd_bitstream.h"
68 #include "ihevcd_job_queue.h"
69 #include "ihevcd_utils.h"
70 #include "ihevcd_debug.h"
71 
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 /**
76 *******************************************************************************
77 *
78 * @brief
79 *     Deblock CTB level function.
80 *
81 * @par Description:
82 *     For a given CTB, deblocking on both vertical and
83 *     horizontal edges is done. Both the luma and chroma
84 *     blocks are processed
85 *
86 * @param[in] ps_deblk
87 *  Pointer to the deblock context
88 *
89 * @returns
90 *
91 * @remarks
92 *  None
93 *
94 *******************************************************************************
95 */
96 
ihevcd_deblk_ctb(deblk_ctxt_t * ps_deblk,WORD32 i4_is_last_ctb_x,WORD32 i4_is_last_ctb_y)97 void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98                       WORD32 i4_is_last_ctb_x,
99                       WORD32 i4_is_last_ctb_y)
100 {
101     WORD32 ctb_size;
102     WORD32 log2_ctb_size;
103     UWORD32 u4_bs;
104     WORD32 bs_tz; /*Leading zeros in boundary strength*/
105     WORD32 qp_p, qp_q;
106 
107     WORD32 filter_p, filter_q;
108 
109     UWORD8 *pu1_src;
110     WORD32 qp_strd;
111     UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112     UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113     WORD32 bs_strd;
114     WORD32 src_strd;
115     UWORD8 *pu1_qp;
116     UWORD16 *pu2_ctb_no_loop_filter_flag;
117     UWORD16 au2_ctb_no_loop_filter_flag[9];
118 
119     WORD32 col, row;
120 
121     /* Flag to indicate if QP is constant in CTB
122      * 0 - top_left, 1 - top, 2 - left, 3 - current */
123     UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124     WORD32 ctb_indx;
125     WORD32  chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126     sps_t *ps_sps;
127     pps_t *ps_pps;
128     codec_t *ps_codec;
129     slice_header_t *ps_slice_hdr;
130 
131     PROFILE_DISABLE_DEBLK();
132 
133     ps_sps = ps_deblk->ps_sps;
134     ps_pps = ps_deblk->ps_pps;
135     ps_codec = ps_deblk->ps_codec;
136     ps_slice_hdr = ps_deblk->ps_slice_hdr;
137 
138     log2_ctb_size = ps_sps->i1_log2_ctb_size;
139     ctb_size = (1 << ps_sps->i1_log2_ctb_size);
140 
141     /* strides are in units of number of bytes */
142     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
143     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
144 
145     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
146                     (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
147                     ps_deblk->i4_ctb_y * bs_strd);
148     pu4_ctb_vert_bs = pu4_vert_bs;
149 
150     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
151                     (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
152                     ps_deblk->i4_ctb_y * bs_strd);
153     pu4_ctb_horz_bs = pu4_horz_bs;
154 
155     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
156     pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
157 
158     pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
159 
160     ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
161     if(i4_is_last_ctb_y)
162     {
163         pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
164         pu4_ctb_vert_bs = pu4_vert_bs;
165         /* ctb_size/8 is the number of edges per CTB
166          * ctb_size/4 is the number of BS values needed per edge
167          * divided by 8 for the number of bytes
168          * 2 is the number of bits needed for each BS value */
169         memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
170 
171         pu1_qp += (qp_strd << (log2_ctb_size - 3));
172         pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
173         ctb_indx += ps_sps->i2_pic_wd_in_ctb;
174     }
175 
176     if(i4_is_last_ctb_x)
177     {
178         pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
179         pu4_ctb_horz_bs = pu4_horz_bs;
180         memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
181 
182         pu1_qp += (ctb_size >> 3);
183 
184         for(row = 0; row < (ctb_size >> 3) + 1; row++)
185             au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
186         pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
187         ctb_indx += 1;
188     }
189 
190     u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
191 
192     if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
193     {
194         u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
195     }
196 
197     if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
198     {
199         u4_qp_const_in_ctb[0] =
200                         ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
201                         (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
202     }
203 
204 
205 
206     if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
207     {
208         u4_qp_const_in_ctb[1] =
209                         ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
210                         (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
211     }
212 
213     src_strd = ps_codec->i4_strd;
214 
215     /* Luma Vertical Edge */
216 
217     if(0 == i4_is_last_ctb_x)
218     {
219         /* Top CTB's slice header */
220         slice_header_t *ps_slice_hdr_top;
221         {
222             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
223             if(i4_is_last_ctb_y)
224                 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
225             ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
226         }
227 
228         pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
229         pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
230 
231         /** Deblocking is done on a shifted CTB -
232          *  Vertical edge processing is done by shifting the CTB up by four pixels */
233         pu1_src -= 4 * src_strd;
234 
235         for(col = 0; col < ctb_size / 8; col++)
236         {
237             WORD32 shift = 0;
238 
239             /*  downshift vert_bs by ctb_size/2 for each column
240              *  shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
241              *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
242              *  and deblocking is done on 8x8 grid
243              */
244             if(6 != log2_ctb_size)
245                 shift = (col & 1) << (log2_ctb_size - 1);
246 
247             /* BS for the column - Last row is excluded and the top row is included*/
248             u4_bs = (pu4_vert_bs[0] >> shift) << 2;
249 
250             if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
251             {
252                 /* Picking the last BS of the previous CTB corresponding to the same column */
253                 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
254                 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
255                 u4_bs |= u4_top_bs & 3;
256             }
257 
258             for(row = 0; row < ctb_size / 4;)
259             {
260                 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
261                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
262 
263                 /* Trailing zeros are computed and the corresponding rows are not processed */
264                 bs_tz = CTZ(u4_bs) >> 1;
265                 if(0 != bs_tz)
266                 {
267                     u4_bs = u4_bs >> (bs_tz << 1);
268                     if((row + bs_tz) >= (ctb_size / 4))
269                         pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
270                     else
271                         pu1_src += 4 * bs_tz  * src_strd;
272 
273                     row += bs_tz;
274                     continue;
275                 }
276 
277                 if(0 == row)
278                 {
279                     i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
280                     i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
281 
282                     if(0 == col)
283                     {
284                         qp_p = u4_qp_const_in_ctb[0] ?
285                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
286                                         pu1_qp[-qp_strd - 1];
287                     }
288                     else
289                     {
290                         qp_p = u4_qp_const_in_ctb[1] ?
291                                         pu1_qp[-ctb_size / 8 * qp_strd] :
292                                         pu1_qp[col - 1 - qp_strd];
293                     }
294 
295                     qp_q = u4_qp_const_in_ctb[1] ?
296                                     pu1_qp[-ctb_size / 8 * qp_strd] :
297                                     pu1_qp[col - qp_strd];
298                 }
299                 else
300                 {
301                     if(0 == col)
302                     {
303                         qp_p = u4_qp_const_in_ctb[2] ?
304                                         pu1_qp[-ctb_size / 8] :
305                                         pu1_qp[((row - 1) >> 1) * qp_strd - 1];
306                     }
307                     else
308                     {
309                         qp_p = u4_qp_const_in_ctb[3] ?
310                                         pu1_qp[0] :
311                                         pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
312                     }
313 
314                     qp_q = u4_qp_const_in_ctb[3] ?
315                                     pu1_qp[0] :
316                                     pu1_qp[((row - 1) >> 1) * qp_strd + col];
317                 }
318 
319                 filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
320                 filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
321                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
322                 filter_p = !filter_p;
323                 filter_q = !filter_q;
324 
325                 if(filter_p || filter_q)
326                 {
327                     DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
328                                          u4_bs & 3, qp_p, qp_q,
329                                          ps_slice_hdr->i1_beta_offset_div2,
330                                          ps_slice_hdr->i1_tc_offset_div2,
331                                          filter_p, filter_q);
332                     ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
333                                                                          u4_bs & 3, qp_p, qp_q,
334                                                                          i1_beta_offset_div2,
335                                                                          i1_tc_offset_div2,
336                                                                          filter_p, filter_q);
337                 }
338 
339                 pu1_src += 4 * src_strd;
340                 u4_bs = u4_bs >> 2;
341                 row++;
342             }
343 
344             if((64 == ctb_size) ||
345                             ((32 == ctb_size) && (col & 1)))
346             {
347                 pu4_vert_bs++;
348             }
349             pu1_src -= (src_strd << log2_ctb_size);
350             pu1_src += 8;
351         }
352         pu4_vert_bs = pu4_ctb_vert_bs;
353     }
354 
355 
356     /* Luma Horizontal Edge */
357 
358     if(0 == i4_is_last_ctb_y)
359     {
360 
361         /* Left CTB's slice header */
362         slice_header_t *ps_slice_hdr_left;
363         {
364             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
365             if(i4_is_last_ctb_x)
366                 cur_ctb_indx += 1;
367             ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
368         }
369         pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
370         pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
371 
372         /** Deblocking is done on a shifted CTB -
373          *  Horizontal edge processing is done by shifting the CTB left by four pixels */
374         pu1_src -= 4;
375         for(row = 0; row < ctb_size / 8; row++)
376         {
377             WORD32 shift = 0;
378 
379             /* downshift vert_bs by ctb_size/2 for each column
380              *  shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
381              *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
382              *  and deblocking is done on 8x8 grid
383              */
384             if(6 != log2_ctb_size)
385                 shift = (row & 1) << (log2_ctb_size - 1);
386 
387             /* BS for the row - Last column is excluded and the left column is included*/
388             u4_bs = (pu4_horz_bs[0] >> shift) << 2;
389 
390             if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
391             {
392                 /** Picking the last BS of the previous CTB corresponding to the same row
393                 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
394                 */
395                 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
396                 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
397                 u4_bs |= u4_left_bs & 3;
398             }
399 
400             for(col = 0; col < ctb_size / 4;)
401             {
402                 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
403                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
404 
405                 bs_tz = CTZ(u4_bs) >> 1;
406                 if(0 != bs_tz)
407                 {
408                     u4_bs = u4_bs >> (bs_tz << 1);
409 
410                     if((col + bs_tz) >= (ctb_size / 4))
411                         pu1_src += 4 * (ctb_size / 4 - col);
412                     else
413                         pu1_src += 4 * bs_tz;
414 
415                     col += bs_tz;
416                     continue;
417                 }
418 
419                 if(0 == col)
420                 {
421                     i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
422                     i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
423 
424                     if(0 == row)
425                     {
426                         qp_p = u4_qp_const_in_ctb[0] ?
427                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
428                                         pu1_qp[-qp_strd - 1];
429                     }
430                     else
431                     {
432                         qp_p = u4_qp_const_in_ctb[2] ?
433                                         pu1_qp[-ctb_size / 8] :
434                                         pu1_qp[(row - 1) * qp_strd - 1];
435                     }
436 
437                     qp_q = u4_qp_const_in_ctb[2] ?
438                                     pu1_qp[-ctb_size / 8] :
439                                     pu1_qp[row * qp_strd - 1];
440                 }
441                 else
442                 {
443                     if(0 == row)
444                     {
445                         qp_p = u4_qp_const_in_ctb[1] ?
446                                         pu1_qp[-ctb_size / 8 * qp_strd] :
447                                         pu1_qp[((col - 1) >> 1) - qp_strd];
448                     }
449                     else
450                     {
451                         qp_p = u4_qp_const_in_ctb[3] ?
452                                         pu1_qp[0] :
453                                         pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
454                     }
455 
456                     qp_q = u4_qp_const_in_ctb[3] ?
457                                     pu1_qp[0] :
458                                     pu1_qp[((col - 1) >> 1) + row * qp_strd];
459                 }
460 
461                 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
462                 filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
463                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
464                 filter_p = !filter_p;
465                 filter_q = !filter_q;
466 
467                 if(filter_p || filter_q)
468                 {
469                     DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
470                                          u4_bs & 3, qp_p, qp_q,
471                                          ps_slice_hdr->i1_beta_offset_div2,
472                                          ps_slice_hdr->i1_tc_offset_div2,
473                                          filter_p, filter_q);
474                     ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
475                                                                          u4_bs & 3, qp_p, qp_q,
476                                                                          i1_beta_offset_div2,
477                                                                          i1_tc_offset_div2, filter_p, filter_q);
478                 }
479 
480                 pu1_src += 4;
481                 u4_bs = u4_bs >> 2;
482                 col++;
483             }
484 
485             if((64 == ctb_size) ||
486                             ((32 == ctb_size) && (row & 1)))
487             {
488                 pu4_horz_bs++;
489             }
490             pu1_src -= ctb_size;
491             pu1_src += (src_strd << 3);
492         }
493         pu4_horz_bs = pu4_ctb_horz_bs;
494     }
495 
496 
497     /* Chroma Veritcal Edge */
498 
499     if(0 == i4_is_last_ctb_x)
500     {
501 
502         /* Top CTB's slice header */
503         slice_header_t *ps_slice_hdr_top;
504         {
505             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
506             if(i4_is_last_ctb_y)
507                 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
508             ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
509         }
510 
511         pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
512         pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
513 
514         /** Deblocking is done on a shifted CTB -
515          *  Vertical edge processing is done by shifting the CTB up by four pixels */
516         pu1_src -= 4 * src_strd;
517 
518         for(col = 0; col < ctb_size / 16; col++)
519         {
520 
521             /* BS for the column - Last row is excluded and the top row is included*/
522             u4_bs = pu4_vert_bs[0] << 2;
523 
524             if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
525             {
526                 /* Picking the last BS of the previous CTB corresponding to the same column */
527                 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
528                 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
529                 u4_bs |= u4_top_bs & 3;
530             }
531 
532             /* Every alternate boundary strength value is used for chroma */
533             u4_bs &= 0x22222222;
534 
535             for(row = 0; row < ctb_size / 8;)
536             {
537                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
538 
539                 bs_tz = CTZ(u4_bs) >> 2;
540                 if(0 != bs_tz)
541                 {
542                     if((row + bs_tz) >= (ctb_size / 8))
543                         pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
544                     else
545                         pu1_src += 4 * bs_tz  * src_strd;
546                     row += bs_tz;
547                     u4_bs = u4_bs >> (bs_tz << 2);
548                     continue;
549                 }
550 
551                 if(0 == row)
552                 {
553                     i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
554 
555                     if(0 == col)
556                     {
557                         qp_p = u4_qp_const_in_ctb[0] ?
558                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
559                                         pu1_qp[-qp_strd - 1];
560                     }
561                     else
562                     {
563                         qp_p = u4_qp_const_in_ctb[1] ?
564                                         pu1_qp[-ctb_size / 8 * qp_strd] :
565                                         pu1_qp[2 * col - 1 - qp_strd];
566                     }
567 
568                     qp_q = u4_qp_const_in_ctb[1] ?
569                                     pu1_qp[-ctb_size / 8 * qp_strd] :
570                                     pu1_qp[2 * col - qp_strd];
571                 }
572                 else
573                 {
574                     if(0 == col)
575                     {
576                         qp_p = u4_qp_const_in_ctb[2] ?
577                                         pu1_qp[-ctb_size / 8] :
578                                         pu1_qp[(row - 1) * qp_strd - 1];
579                     }
580                     else
581                     {
582                         qp_p = u4_qp_const_in_ctb[3] ?
583                                         pu1_qp[0] :
584                                         pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
585                     }
586 
587                     qp_q = u4_qp_const_in_ctb[3] ?
588                                     pu1_qp[0] :
589                                     pu1_qp[(row - 1) * qp_strd + 2 * col];
590                 }
591 
592                 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
593                 filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
594                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
595                 filter_p = !filter_p;
596                 filter_q = !filter_q;
597 
598                 if(filter_p || filter_q)
599                 {
600                     ASSERT(1 == ((u4_bs & 3) >> 1));
601                     DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
602                                            u4_bs & 3, qp_p, qp_q,
603                                            ps_pps->i1_pic_cb_qp_offset,
604                                            ps_pps->i1_pic_cr_qp_offset,
605                                            ps_slice_hdr->i1_tc_offset_div2,
606                                            filter_p, filter_q);
607                     if(chroma_yuv420sp_vu)
608                     {
609                         ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
610                                                                                src_strd,
611                                                                                qp_q,
612                                                                                qp_p,
613                                                                                ps_pps->i1_pic_cr_qp_offset,
614                                                                                ps_pps->i1_pic_cb_qp_offset,
615                                                                                i1_tc_offset_div2,
616                                                                                filter_q,
617                                                                                filter_p);
618                     }
619                     else
620                     {
621                         ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
622                                                                                src_strd,
623                                                                                qp_p,
624                                                                                qp_q,
625                                                                                ps_pps->i1_pic_cb_qp_offset,
626                                                                                ps_pps->i1_pic_cr_qp_offset,
627                                                                                i1_tc_offset_div2,
628                                                                                filter_p,
629                                                                                filter_q);
630                     }
631                 }
632 
633                 pu1_src += 4 * src_strd;
634                 u4_bs = u4_bs >> 4;
635                 row++;
636             }
637 
638             pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
639             pu1_src -= ((src_strd / 2) << log2_ctb_size);
640             pu1_src += 16;
641         }
642     }
643 
644     /* Chroma Horizontal Edge */
645 
646     if(0 == i4_is_last_ctb_y)
647     {
648 
649         /* Left CTB's slice header */
650         slice_header_t *ps_slice_hdr_left;
651         {
652             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
653             if(i4_is_last_ctb_x)
654                 cur_ctb_indx += 1;
655             ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
656         }
657 
658         pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
659         pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
660 
661         /** Deblocking is done on a shifted CTB -
662          * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
663         pu1_src -= 8;
664         for(row = 0; row < ctb_size / 16; row++)
665         {
666             /* BS for the row - Last column is excluded and the left column is included*/
667             u4_bs = pu4_horz_bs[0] << 2;
668 
669             if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
670             {
671                 /** Picking the last BS of the previous CTB corresponding to the same row
672                 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
673                 */
674                 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
675                 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
676                 u4_bs |= u4_left_bs & 3;
677             }
678 
679             /* Every alternate boundary strength value is used for chroma */
680             u4_bs &= 0x22222222;
681 
682             for(col = 0; col < ctb_size / 8;)
683             {
684                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
685 
686                 bs_tz = CTZ(u4_bs) >> 2;
687                 if(0 != bs_tz)
688                 {
689                     u4_bs = u4_bs >> (bs_tz << 2);
690 
691                     if((col + bs_tz) >= (ctb_size / 8))
692                         pu1_src += 8 * (ctb_size / 8 - col);
693                     else
694                         pu1_src += 8 * bs_tz;
695 
696                     col += bs_tz;
697                     continue;
698                 }
699 
700                 if(0 == col)
701                 {
702                     i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
703 
704                     if(0 == row)
705                     {
706                         qp_p = u4_qp_const_in_ctb[0] ?
707                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
708                                         pu1_qp[-qp_strd - 1];
709                     }
710                     else
711                     {
712                         qp_p = u4_qp_const_in_ctb[2] ?
713                                         pu1_qp[-ctb_size / 8] :
714                                         pu1_qp[(2 * row - 1) * qp_strd - 1];
715                     }
716 
717                     qp_q = u4_qp_const_in_ctb[2] ?
718                                     pu1_qp[-ctb_size / 8] :
719                                     pu1_qp[(2 * row) * qp_strd - 1];
720                 }
721                 else
722                 {
723                     if(0 == row)
724                     {
725                         qp_p = u4_qp_const_in_ctb[1] ?
726                                         pu1_qp[-ctb_size / 8 * qp_strd] :
727                                         pu1_qp[col - 1 - qp_strd];
728                     }
729                     else
730                     {
731                         qp_p = u4_qp_const_in_ctb[3] ?
732                                         pu1_qp[0] :
733                                         pu1_qp[(col - 1) +  (2 * row - 1) * qp_strd];
734                     }
735 
736                     qp_q = u4_qp_const_in_ctb[3] ?
737                                     pu1_qp[0] :
738                                     pu1_qp[(col - 1) + 2 * row * qp_strd];
739                 }
740 
741                 filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
742                 filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
743                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
744                 filter_p = !filter_p;
745                 filter_q = !filter_q;
746 
747                 if(filter_p || filter_q)
748                 {
749                     ASSERT(1 == ((u4_bs & 3) >> 1));
750                     DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
751                                            u4_bs & 3, qp_p, qp_q,
752                                            ps_pps->i1_pic_cb_qp_offset,
753                                            ps_pps->i1_pic_cr_qp_offset,
754                                            ps_slice_hdr->i1_tc_offset_div2,
755                                            filter_p, filter_q);
756                     if(chroma_yuv420sp_vu)
757                     {
758                         ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
759                                                                                src_strd,
760                                                                                qp_q,
761                                                                                qp_p,
762                                                                                ps_pps->i1_pic_cr_qp_offset,
763                                                                                ps_pps->i1_pic_cb_qp_offset,
764                                                                                i1_tc_offset_div2,
765                                                                                filter_q,
766                                                                                filter_p);
767                     }
768                     else
769                     {
770                         ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
771                                                                                src_strd,
772                                                                                qp_p,
773                                                                                qp_q,
774                                                                                ps_pps->i1_pic_cb_qp_offset,
775                                                                                ps_pps->i1_pic_cr_qp_offset,
776                                                                                i1_tc_offset_div2,
777                                                                                filter_p,
778                                                                                filter_q);
779                     }
780                 }
781 
782                 pu1_src += 8;
783                 u4_bs = u4_bs >> 4;
784                 col++;
785             }
786 
787             pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
788             pu1_src -= ctb_size;
789             pu1_src += 8 * src_strd;
790 
791         }
792     }
793 }
794