1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_boundary_strength.c
22  *
23  * @brief
24  *  Contains functions for computing boundary strength
25  *
26  * @author
27  *  Harish
28  *
29  * @par List of Functions:
30  *
31  * @remarks
32  *  None
33  *
34  *******************************************************************************
35  */
36 /*****************************************************************************/
37 /* File Includes                                                             */
38 /*****************************************************************************/
39 #include <stdio.h>
40 #include <stddef.h>
41 #include <stdlib.h>
42 #include <string.h>
43 
44 #include "ihevc_typedefs.h"
45 #include "iv.h"
46 #include "ivd.h"
47 #include "ihevcd_cxa.h"
48 #include "ithread.h"
49 
50 #include "ihevc_defs.h"
51 #include "ihevc_debug.h"
52 #include "ihevc_defs.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57 
58 #include "ihevc_error.h"
59 #include "ihevc_common_tables.h"
60 
61 #include "ihevcd_trace.h"
62 #include "ihevcd_defs.h"
63 #include "ihevcd_function_selector.h"
64 #include "ihevcd_structs.h"
65 #include "ihevcd_error.h"
66 #include "ihevcd_nal.h"
67 #include "ihevcd_bitstream.h"
68 #include "ihevcd_job_queue.h"
69 #include "ihevcd_utils.h"
70 #include "ihevcd_profile.h"
71 
72 /*****************************************************************************/
73 /* Function Prototypes                                                       */
74 /*****************************************************************************/
75 
76 
77 #define SET_NGBHR_ALL_AVAIL(avail)          avail = 0x1F;
78 
79 #define SET_NGBHR_BOTLEFT_NOTAVAIL(avail)   avail &= ~0x10;
80 #define SET_NGBHR_LEFT_NOTAVAIL(avail)      avail &= ~0x8;
81 #define SET_NGBHR_TOPLEFT_NOTAVAIL(avail)   avail &= ~0x4;
82 #define SET_NGBHR_TOP_NOTAVAIL(avail)       avail &= ~0x2;
83 #define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail)  avail &= ~0x1;
84 
ihevcd_pu_boundary_strength(pu_t * ps_pu,pu_t * ps_ngbr_pu)85 WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
86                                    pu_t *ps_ngbr_pu)
87 {
88     WORD32 i4_bs;
89     UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
90     UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
91 
92     WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
93     WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
94 
95     WORD32 num_mv, ngbr_num_mv;
96 
97     num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
98     ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
99 
100     l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
101     l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
102     ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
103     ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
104 
105 
106     i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
107     i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
108     i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
109     i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
110 
111     i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
112     i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
113     i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
114     i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
115 
116 
117     /* If two motion vectors are used */
118     if((2 == num_mv) &&
119             (2 == ngbr_num_mv))
120     {
121         if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
122                 (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
123         {
124             if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
125             {
126                 if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
127                 {
128                     i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
129                             (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
130                             (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
131                             (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
132                 }
133                 else
134                 {
135                     i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
136                             (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
137                             (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
138                             (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
139                 }
140             }
141             else /* Same L0 and L1 */
142             {
143                 i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
144                          (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
145                          (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
146                          (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
147                                 ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
148                                  (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
149                                  (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
150                                  (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
151             }
152         }
153         else /* If the reference pictures used are different */
154         {
155             i4_bs = 1;
156         }
157     }
158 
159     /* If one motion vector is used in both PUs */
160     else if((1 == num_mv) &&
161             (1 == ngbr_num_mv))
162     {
163         WORD16 i2_mv_x, i2_mv_y;
164         WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
165         UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
166 
167         if(PRED_L0 == ps_pu->b2_pred_mode)
168         {
169             i2_mv_x = i2_mv_x0;
170             i2_mv_y = i2_mv_y0;
171             ref_pic_buf_id = l0_ref_pic_buf_id;
172         }
173         else
174         {
175             i2_mv_x = i2_mv_x1;
176             i2_mv_y = i2_mv_y1;
177             ref_pic_buf_id = l1_ref_pic_buf_id;
178         }
179 
180         if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
181         {
182             i2_ngbr_mv_x = i2_ngbr_mv_x0;
183             i2_ngbr_mv_y = i2_ngbr_mv_y0;
184             ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
185         }
186         else
187         {
188             i2_ngbr_mv_x = i2_ngbr_mv_x1;
189             i2_ngbr_mv_y = i2_ngbr_mv_y1;
190             ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
191         }
192 
193         i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
194                 (ABS(i2_mv_x - i2_ngbr_mv_x) < 4)  &&
195                 (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
196     }
197 
198     /* If the no. of motion vectors is not the same */
199     else
200     {
201         i4_bs = 1;
202     }
203 
204 
205     return i4_bs;
206 }
207 
208 /* QP is also populated in the same function */
ihevcd_ctb_boundary_strength_islice(bs_ctxt_t * ps_bs_ctxt)209 WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
210 {
211     pps_t *ps_pps;
212     sps_t *ps_sps;
213     tu_t *ps_tu;
214     UWORD32 *pu4_vert_bs;
215     UWORD32 *pu4_horz_bs;
216     WORD32 bs_strd;
217     WORD32 vert_bs0_tmp;
218     WORD32 horz_bs0_tmp;
219     UWORD8 *pu1_qp;
220     WORD32 qp_strd;
221     UWORD32 u4_qp_const_in_ctb;
222     WORD32 ctb_indx;
223     WORD32 i4_tu_cnt;
224     WORD32 log2_ctb_size;
225     WORD32 ctb_size;
226 
227     WORD8 i1_loop_filter_across_tiles_enabled_flag;
228     WORD8 i1_loop_filter_across_slices_enabled_flag;
229 
230     WORD32 i;
231 
232     PROFILE_DISABLE_BOUNDARY_STRENGTH();
233 
234     ps_pps = ps_bs_ctxt->ps_pps;
235     ps_sps = ps_bs_ctxt->ps_sps;
236     i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
237     i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
238     i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
239 
240     log2_ctb_size = ps_sps->i1_log2_ctb_size;
241     ctb_size = (1 << log2_ctb_size);
242 
243     /* strides are in units of number of bytes */
244     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
245     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
246 
247     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
248                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
249                     ps_bs_ctxt->i4_ctb_y * bs_strd);
250     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
251                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
252                     ps_bs_ctxt->i4_ctb_y * bs_strd);
253 
254     /* ctb_size/8 elements per CTB */
255     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
256     pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
257 
258     ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
259     u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
260 
261     vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
262     horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
263 
264     /* ctb_size/8 is the number of edges per CTB
265      * ctb_size/4 is the number of BS values needed per edge
266      * divided by 8 for the number of bytes
267      * 2 is the number of bits needed for each BS value */
268 /*
269     memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
270     memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
271 */
272     memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
273     memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
274 
275     /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
276     if(0 != ps_bs_ctxt->i4_ctb_x)
277     {
278         pu4_vert_bs[0] |= vert_bs0_tmp;
279     }
280 
281     /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
282     if(0 != ps_bs_ctxt->i4_ctb_y)
283     {
284         pu4_horz_bs[0] |= horz_bs0_tmp;
285     }
286 
287     ps_tu = ps_bs_ctxt->ps_tu;
288 
289     /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
290     if(u4_qp_const_in_ctb)
291         pu1_qp[0] = ps_tu->b7_qp;
292 
293     for(i = 0; i < i4_tu_cnt; i++)
294     {
295         WORD32 start_pos_x;
296         WORD32 start_pos_y;
297         WORD32 tu_size;
298 
299 
300         UWORD32 u4_bs;
301         ps_tu = ps_bs_ctxt->ps_tu + i;
302 
303         /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
304         start_pos_x = ps_tu->b4_pos_x;
305         start_pos_y = ps_tu->b4_pos_y;
306 
307         tu_size = 1 << (ps_tu->b3_size + 2);
308         tu_size >>= 2; /* TU size divided by 4 */
309 
310         u4_bs = DUP_LSB_10(tu_size);
311 
312         /* Only if the current edge falls on 8 pixel grid set BS */
313         if(0 == (start_pos_x & 1))
314         {
315             WORD32 shift;
316             shift = start_pos_y * 2;
317             /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
318              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
319              *  and deblocking is done on 8x8 grid
320              */
321             if(6 != log2_ctb_size)
322                 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
323             pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
324         }
325         /* Only if the current edge falls on 8 pixel grid set BS */
326         if(0 == (start_pos_y & 1))
327         {
328             WORD32 shift;
329             shift = start_pos_x * 2;
330             /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
331              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
332              *  and deblocking is done on 8x8 grid
333              */
334             if(6 != log2_ctb_size)
335                 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
336             pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
337         }
338 
339         /* Populating the QP array */
340         if(0 == u4_qp_const_in_ctb)
341         {
342             if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
343             {
344                 WORD32 row, col;
345                 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
346                 {
347                     for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
348                     {
349                         pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
350                     }
351                 }
352             }
353         }
354 
355     }
356     {
357         /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
358         UWORD32 ctb_addr;
359         WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
360         /* If left neighbor is not available, then set BS for entire first column to zero */
361         if(!ps_pps->i1_tiles_enabled_flag)
362         {
363             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
364                             (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
365                             (0 == ps_bs_ctxt->i4_ctb_x))
366             {
367                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
368             }
369         }
370         else
371         {
372             //If across-tiles is disabled
373             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
374             {
375                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
376             }
377             else
378             {
379                 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
380                 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
381                 if(ps_bs_ctxt->i4_ctb_x)
382                 {
383                     ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
384                     left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
385                 }
386                 /*If the 1st slice in a new tile is a dependent slice*/
387                 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
388                 {
389                     /* Removed reduntant checks */
390                     if((0 == i1_loop_filter_across_slices_enabled_flag && (
391                                     ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
392                                     ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
393                                     (0 == ps_bs_ctxt->i4_ctb_x))
394                     {
395                         pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
396                     }
397                 }
398             }
399         }
400 
401         ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
402         slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
403         if(ps_bs_ctxt->i4_ctb_y)
404         {
405             ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
406             top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
407         }
408 
409         /* If top neighbor is not available, then set BS for entire first row to zero */
410         /* Removed reduntant checks */
411         if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
412                         || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
413                         || (0 == ps_bs_ctxt->i4_ctb_y))
414         {
415             pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
416         }
417     }
418 
419     /**
420      *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
421      *   (They might have been set to  non zero values because of CBF of the current CTB)
422      *   This block might not be needed for I slices*/
423     {
424         WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
425         WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
426         if(num_rows_remaining < (ctb_size >> 3))
427         {
428             /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
429              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
430              *  and deblocking is done on 8x8 grid
431              */
432             WORD32 offset;
433             offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
434             if(6 != log2_ctb_size)
435                 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
436 
437             memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
438         }
439 
440         if(num_cols_remaining < (ctb_size >> 3))
441         {
442             /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
443              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
444              *  and deblocking is done on 8x8 grid
445              */
446 
447             WORD32 offset;
448             offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
449             if(6 != log2_ctb_size)
450                 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
451 
452             memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
453         }
454     }
455 
456     return 0;
457 }
ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t * ps_bs_ctxt)458 WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
459 {
460     sps_t *ps_sps;
461     pps_t *ps_pps;
462     WORD32 cur_ctb_idx, next_ctb_idx = 0;
463     WORD32 i4_tu_cnt;
464     WORD32 i4_pu_cnt;
465     tu_t *ps_tu;
466 
467     UWORD32 *pu4_vert_bs;
468     UWORD32 *pu4_horz_bs;
469     WORD32 bs_strd;
470     WORD32 vert_bs0_tmp;
471     WORD32 horz_bs0_tmp;
472     UWORD8 *pu1_qp;
473     WORD32 qp_strd;
474     UWORD32 u4_qp_const_in_ctb;
475     WORD32 ctb_indx;
476     WORD32 log2_ctb_size;
477     WORD32 ctb_size;
478 
479     WORD32 i;
480     WORD8 i1_loop_filter_across_tiles_enabled_flag;
481     WORD8 i1_loop_filter_across_slices_enabled_flag;
482 
483     PROFILE_DISABLE_BOUNDARY_STRENGTH();
484 
485     ps_sps = ps_bs_ctxt->ps_sps;
486     ps_pps = ps_bs_ctxt->ps_pps;
487 
488     log2_ctb_size = ps_sps->i1_log2_ctb_size;
489     ctb_size = (1 << log2_ctb_size);
490 
491     /* strides are in units of number of bytes */
492     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
493     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
494 
495     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
496                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
497                     ps_bs_ctxt->i4_ctb_y * bs_strd);
498     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
499                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
500                     ps_bs_ctxt->i4_ctb_y * bs_strd);
501 
502     vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
503     horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
504 
505     ps_tu = ps_bs_ctxt->ps_tu;
506 
507     /* ctb_size/8 elements per CTB */
508     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
509     pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
510 
511     ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
512     u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
513 
514     i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
515     i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
516 
517     /* ctb_size/8 is the number of edges per CTB
518      * ctb_size/4 is the number of BS values needed per edge
519      * divided by 8 for the number of bytes
520      * 2 is the number of bits needed for each BS value */
521 /*
522     memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
523     memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
524 */
525     memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
526     memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
527 
528     /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
529     if(0 != ps_bs_ctxt->i4_ctb_x)
530     {
531         pu4_vert_bs[0] |= vert_bs0_tmp;
532     }
533 
534     /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
535     if(0 != ps_bs_ctxt->i4_ctb_y)
536     {
537         pu4_horz_bs[0] |= horz_bs0_tmp;
538     }
539     /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
540     *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
541 
542     cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
543                     + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
544     next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
545     if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
546     {
547         i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
548     }
549     else
550     {
551         i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
552     }
553 
554     ps_tu = ps_bs_ctxt->ps_tu;
555     if(u4_qp_const_in_ctb)
556         pu1_qp[0] = ps_tu->b7_qp;
557 
558     /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
559     for(i = 0; i < i4_tu_cnt; i++)
560     {
561         WORD32 start_pos_x;
562         WORD32 start_pos_y;
563         WORD32 end_pos_x;
564         WORD32 end_pos_y;
565         WORD32 tu_size;
566         UWORD32 u4_bs;
567         WORD32 intra_flag;
568         UWORD8 *pu1_pic_intra_flag;
569 
570         ps_tu = ps_bs_ctxt->ps_tu + i;
571 
572         start_pos_x = ps_tu->b4_pos_x;
573         start_pos_y = ps_tu->b4_pos_y;
574 
575         tu_size = 1 << (ps_tu->b3_size + 2);
576         tu_size >>= 2;
577 
578         end_pos_x = start_pos_x + tu_size;
579         end_pos_y = start_pos_y + tu_size;
580 
581         {
582             WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
583             WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
584 
585             WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
586 
587             pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
588             pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
589             pu1_pic_intra_flag += (tu_abs_x >> 6);
590 
591             intra_flag = *pu1_pic_intra_flag;
592             intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
593         }
594         if(intra_flag)
595         {
596             u4_bs = DUP_LSB_10(tu_size);
597 
598             /* Only if the current edge falls on 8 pixel grid set BS */
599             if(0 == (start_pos_x & 1))
600             {
601                 WORD32 shift;
602                 shift = start_pos_y * 2;
603                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
604                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
605                  *  and deblocking is done on 8x8 grid
606                  */
607                 if(6 != log2_ctb_size)
608                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
609                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
610             }
611             /* Only if the current edge falls on 8 pixel grid set BS */
612             if(0 == (start_pos_y & 1))
613             {
614                 WORD32 shift;
615                 shift = start_pos_x * 2;
616                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
617                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
618                  *  and deblocking is done on 8x8 grid
619                  */
620                 if(6 != log2_ctb_size)
621                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
622                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
623             }
624         }
625 
626 
627         /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
628         if(ps_tu->b1_y_cbf)
629         {
630             u4_bs = DUP_LSB_01(tu_size);
631 
632             /* Only if the current edge falls on 8 pixel grid set BS */
633             if(0 == (start_pos_x & 1))
634             {
635                 WORD32 shift;
636                 shift = start_pos_y * 2;
637                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
638                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
639                  *  and deblocking is done on 8x8 grid
640                  */
641                 if(6 != log2_ctb_size)
642                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
643                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
644             }
645             /* Only if the current edge falls on 8 pixel grid set BS */
646             if(0 == (start_pos_y & 1))
647             {
648                 WORD32 shift;
649                 shift = start_pos_x * 2;
650                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
651                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
652                  *  and deblocking is done on 8x8 grid
653                  */
654                 if(6 != log2_ctb_size)
655                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
656                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
657             }
658             /* Only if the current edge falls on 8 pixel grid set BS */
659             if(0 == (end_pos_x & 1))
660             {
661                 if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
662                 {
663                     WORD32 shift;
664                     shift = start_pos_y * 2;
665                     shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
666                     pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
667                 }
668             }
669             /* Only if the current edge falls on 8 pixel grid set BS */
670             if(0 == (end_pos_y & 1))
671             {
672                 /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
673                 if(ctb_size / 8 == (end_pos_y >> 1))
674                 {
675                     *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
676                 }
677                 else
678                 {
679                     WORD32 shift;
680                     shift = start_pos_x * 2;
681                     shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
682                     pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
683                 }
684             }
685         }
686 
687         if(0 == u4_qp_const_in_ctb)
688         {
689             if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
690             {
691                 WORD32 row, col;
692                 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
693                 {
694                     for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
695                     {
696                         pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
697                     }
698                 }
699             }
700         }
701     }
702 
703     /* For all PUs in the CTB,
704     For left and top edges, compute BS */
705 
706     cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
707                     + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
708 
709     {
710         WORD32 next_ctb_idx;
711         next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
712         i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
713     }
714 
715     for(i = 0; i < i4_pu_cnt; i++)
716     {
717         WORD32 start_pos_x;
718         WORD32 start_pos_y;
719         WORD32 end_pos_x;
720         WORD32 end_pos_y;
721         WORD32 pu_wd, pu_ht;
722         UWORD32 u4_bs;
723         pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
724         pu_t *ps_ngbr_pu;
725         UWORD32 u4_ngbr_pu_indx;
726 
727         start_pos_x = ps_pu->b4_pos_x;
728         start_pos_y = ps_pu->b4_pos_y;
729 
730         pu_wd = (ps_pu->b4_wd + 1);
731         pu_ht = (ps_pu->b4_ht + 1);
732 
733         end_pos_x = start_pos_x + pu_wd;
734         end_pos_y = start_pos_y + pu_ht;
735 
736         /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
737         /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
738         if(ps_pu->b1_intra_flag)
739         {
740             u4_bs = DUP_LSB_10(pu_ht);
741 
742             /* Only if the current edge falls on 8 pixel grid set BS */
743             if(0 == (start_pos_x & 1))
744             {
745                 WORD32 shift;
746                 shift = start_pos_y * 2;
747                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
748                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
749                  *  and deblocking is done on 8x8 grid
750                  */
751                 if(6 != log2_ctb_size)
752                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
753                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
754             }
755 
756             u4_bs = DUP_LSB_10(pu_wd);
757 
758             /* Only if the current edge falls on 8 pixel grid set BS */
759             if(0 == (start_pos_y & 1))
760             {
761                 WORD32 shift;
762                 shift = start_pos_x * 2;
763                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
764                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
765                  *  and deblocking is done on 8x8 grid
766                  */
767                 if(6 != log2_ctb_size)
768                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
769                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
770             }
771         }
772 
773         else
774         {
775             /* Vertical edge */
776             /* Process only if the edge is not a frame edge */
777             if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
778             {
779                 do
780                 {
781                     WORD32 pu_ngbr_ht;
782                     WORD32 min_pu_ht;
783                     WORD32 ngbr_end_pos_y;
784                     UWORD32 ngbr_pu_idx_strd;
785                     ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
786                     u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
787                     ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
788 
789                     pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
790                     ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
791 
792                     min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
793 
794                     if(ps_ngbr_pu->b1_intra_flag)
795                     {
796                         u4_bs = DUP_LSB_10(min_pu_ht);
797 
798                         /* Only if the current edge falls on 8 pixel grid set BS */
799                         if(0 == (start_pos_x & 1))
800                         {
801                             WORD32 shift;
802                             shift = start_pos_y * 2;
803                             /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
804                              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
805                              *  and deblocking is done on 8x8 grid
806                              */
807                             if(6 != log2_ctb_size)
808                                 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
809                             pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
810                         }
811                     }
812                     else
813                     {
814                         u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
815                         if(u4_bs)
816                         {
817                             u4_bs = DUP_LSB_01(min_pu_ht);
818                             if(0 == (start_pos_x & 1))
819                             {
820                                 WORD32 shift;
821                                 shift = start_pos_y * 2;
822                                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
823                                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
824                                  *  and deblocking is done on 8x8 grid
825                                  */
826                                 if(6 != log2_ctb_size)
827                                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
828                                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
829                             }
830                         }
831                     }
832 
833                     pu_ht -= min_pu_ht;
834                     start_pos_y += min_pu_ht;
835                 }while(pu_ht > 0);
836 
837                 /* Reinitialising since the values are updated in the previous loop */
838                 pu_ht = ps_pu->b4_ht + 1;
839                 start_pos_y = ps_pu->b4_pos_y;
840             }
841 
842             /* Horizontal edge */
843             /* Process only if the edge is not a frame edge */
844             if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
845             {
846                 do
847                 {
848                     WORD32 pu_ngbr_wd;
849                     WORD32 min_pu_wd;
850                     WORD32 ngbr_end_pos_x;
851                     UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
852                     u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
853                     ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
854 
855                     pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
856                     ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
857 
858                     min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
859 
860                     if(ps_ngbr_pu->b1_intra_flag)
861                     {
862                         u4_bs = DUP_LSB_10(min_pu_wd);
863 
864                         /* Only if the current edge falls on 8 pixel grid set BS */
865                         if(0 == (start_pos_y & 1))
866                         {
867                             WORD32 shift;
868                             shift = start_pos_x * 2;
869                             /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
870                              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
871                              *  and deblocking is done on 8x8 grid
872                              */
873                             if(6 != log2_ctb_size)
874                                 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
875                             pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
876                         }
877                     }
878                     else
879                     {
880                         u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
881                         if(u4_bs)
882                         {
883                             u4_bs = DUP_LSB_01(min_pu_wd);
884 
885                             /* Only if the current edge falls on 8 pixel grid set BS */
886                             if(0 == (start_pos_y & 1))
887                             {
888                                 WORD32 shift;
889                                 shift = start_pos_x * 2;
890                                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
891                                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
892                                  *  and deblocking is done on 8x8 grid
893                                  */
894                                 if(6 != log2_ctb_size)
895                                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
896                                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
897                             }
898                         }
899                     }
900 
901                     pu_wd -= min_pu_wd;
902                     start_pos_x += min_pu_wd;
903                 }while(pu_wd > 0);
904 
905                 /* Reinitialising since the values are updated in the previous loop */
906                 pu_wd = ps_pu->b4_wd + 1;
907                 start_pos_x = ps_pu->b4_pos_x;
908             }
909         }
910     }
911 
912     {
913         /* If left neighbor is not available, then set BS for entire first column to zero */
914         UWORD32 ctb_addr;
915         WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
916 
917         if(!ps_pps->i1_tiles_enabled_flag)
918         {
919             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
920                             (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
921                             (0 == ps_bs_ctxt->i4_ctb_x))
922             {
923                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
924             }
925         }
926         else
927         {
928             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
929             {
930                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
931             }
932             else
933             {
934 
935                 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
936                 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
937 
938                 if(ps_bs_ctxt->i4_ctb_x)
939                 {
940                     ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
941                     left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
942                 }
943 
944                 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
945                 {
946                     /* Removed reduntant checks */
947                     if((0 == i1_loop_filter_across_slices_enabled_flag && (
948                                     (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
949                                     ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
950                     {
951                         pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
952                     }
953                 }
954             }
955         }
956 
957         ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
958         slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
959         if(ps_bs_ctxt->i4_ctb_y)
960         {
961             ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
962             top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
963         }
964         /* If top neighbor is not available, then set BS for entire first row to zero */
965         /* Removed reduntant checks */
966         if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
967                         || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
968                         || (0 == ps_bs_ctxt->i4_ctb_y))
969         {
970             pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
971         }
972     }
973 
974     /**
975      *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
976      *   (They might have set to  non zero values because of CBF of the current CTB)*/
977     {
978         WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
979         WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
980         if(num_rows_remaining < (ctb_size >> 3))
981         {
982             /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
983              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
984              *  and deblocking is done on 8x8 grid
985              */
986             WORD32 offset;
987             offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
988             if(6 != log2_ctb_size)
989                 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
990 
991             memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
992         }
993 
994         if(num_cols_remaining < (ctb_size >> 3))
995         {
996             /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
997              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
998              *  and deblocking is done on 8x8 grid
999              */
1000 
1001             WORD32 offset;
1002             offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
1003             if(6 != log2_ctb_size)
1004                 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
1005 
1006             memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
1007         }
1008     }
1009     return 0;
1010 }
1011