1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_sao.c
22  *
23  * @brief
24  *  Contains function definitions for sample adaptive offset process
25  *
26  * @author
27  *  Srinivas T
28  *
29  * @par List of Functions:
30  *
31  * @remarks
32  *  None
33  *
34  *******************************************************************************
35  */
36 
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48 
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58 
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61 
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71 
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77 
78 #define SAO_SHIFT_CTB    8
79 
80 /**
81  * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82  */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86     UWORD8 *pu1_src_luma;
87     UWORD8 *pu1_src_chroma;
88     WORD32 src_strd;
89     WORD32 ctb_size;
90     WORD32 log2_ctb_size;
91     sps_t *ps_sps;
92     sao_t *ps_sao;
93     WORD32 row, col;
94     UWORD8 au1_avail_luma[8];
95     UWORD8 au1_avail_chroma[8];
96     WORD32 i;
97     UWORD8 *pu1_src_top_luma;
98     UWORD8 *pu1_src_top_chroma;
99     UWORD8 *pu1_src_left_luma;
100     UWORD8 *pu1_src_left_chroma;
101     UWORD8 au1_src_top_right[2];
102     UWORD8 au1_src_bot_left[2];
103     UWORD8 *pu1_no_loop_filter_flag;
104     WORD32 loop_filter_strd;
105 
106     WORD8 ai1_offset_y[5];
107     WORD8 ai1_offset_cb[5];
108     WORD8 ai1_offset_cr[5];
109 
110     PROFILE_DISABLE_SAO();
111 
112     ai1_offset_y[0] = 0;
113     ai1_offset_cb[0] = 0;
114     ai1_offset_cr[0] = 0;
115 
116     ps_sps = ps_sao_ctxt->ps_sps;
117     log2_ctb_size = ps_sps->i1_log2_ctb_size;
118     ctb_size = (1 << log2_ctb_size);
119     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
120     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
121     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
122 
123     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
124     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
125 
126     /* Current CTB */
127     {
128         WORD32 sao_wd_luma;
129         WORD32 sao_wd_chroma;
130         WORD32 sao_ht_luma;
131         WORD32 sao_ht_chroma;
132 
133         WORD32 remaining_rows;
134         WORD32 remaining_cols;
135 
136         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
137         sao_wd_luma = MIN(ctb_size, remaining_cols);
138         sao_wd_chroma = MIN(ctb_size, remaining_cols);
139 
140         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
141         sao_ht_luma = MIN(ctb_size, remaining_rows);
142         sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
143 
144         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
145         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
146         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
147         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
148 
149         pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
150                         ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
151                         ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
152 
153         ai1_offset_y[1] = ps_sao->b4_y_offset_1;
154         ai1_offset_y[2] = ps_sao->b4_y_offset_2;
155         ai1_offset_y[3] = ps_sao->b4_y_offset_3;
156         ai1_offset_y[4] = ps_sao->b4_y_offset_4;
157 
158         ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
159         ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
160         ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
161         ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
162 
163         ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
164         ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
165         ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
166         ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
167 
168         for(i = 0; i < 8; i++)
169         {
170             au1_avail_luma[i] = 255;
171             au1_avail_chroma[i] = 255;
172         }
173 
174 
175         if(0 == ps_sao_ctxt->i4_ctb_x)
176         {
177             au1_avail_luma[0] = 0;
178             au1_avail_luma[4] = 0;
179             au1_avail_luma[6] = 0;
180 
181             au1_avail_chroma[0] = 0;
182             au1_avail_chroma[4] = 0;
183             au1_avail_chroma[6] = 0;
184         }
185 
186         if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
187         {
188             au1_avail_luma[1] = 0;
189             au1_avail_luma[5] = 0;
190             au1_avail_luma[7] = 0;
191 
192             au1_avail_chroma[1] = 0;
193             au1_avail_chroma[5] = 0;
194             au1_avail_chroma[7] = 0;
195         }
196 
197         if(0 == ps_sao_ctxt->i4_ctb_y)
198         {
199             au1_avail_luma[2] = 0;
200             au1_avail_luma[4] = 0;
201             au1_avail_luma[5] = 0;
202 
203             au1_avail_chroma[2] = 0;
204             au1_avail_chroma[4] = 0;
205             au1_avail_chroma[5] = 0;
206         }
207 
208         if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
209         {
210             au1_avail_luma[3] = 0;
211             au1_avail_luma[6] = 0;
212             au1_avail_luma[7] = 0;
213 
214             au1_avail_chroma[3] = 0;
215             au1_avail_chroma[6] = 0;
216             au1_avail_chroma[7] = 0;
217         }
218 
219 
220         if(0 == ps_sao->b3_y_type_idx)
221         {
222             /* Update left, top and top-left */
223             for(row = 0; row < sao_ht_luma; row++)
224             {
225                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
226             }
227             ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
228 
229             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
230 
231         }
232         else
233         {
234             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
235             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
236             WORD32 tmp_strd = MAX_CTB_SIZE + 2;
237             WORD32 no_loop_filter_enabled = 0;
238 
239             /* Check the loop filter flags and copy the original values for back up */
240             {
241                 UWORD32 u4_no_loop_filter_flag;
242                 WORD32 min_cu = 8;
243                 UWORD8 *pu1_src_tmp = pu1_src_luma;
244 
245                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
246                 {
247                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
248                                     ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
249                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
250 
251                     if(u4_no_loop_filter_flag)
252                     {
253                         WORD32 tmp_wd = sao_wd_luma;
254                         no_loop_filter_enabled = 1;
255                         while(tmp_wd > 0)
256                         {
257                             if(CTZ(u4_no_loop_filter_flag))
258                             {
259                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
260                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
261                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
262                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
263                             }
264                             else
265                             {
266                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
267                                 {
268                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
269                                     {
270                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
271                                     }
272                                 }
273 
274                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
275                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
276                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
277                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
278                             }
279                         }
280 
281                         pu1_src_tmp -= sao_wd_luma;
282                     }
283 
284                     pu1_src_tmp += min_cu * src_strd;
285                     pu1_src_copy += min_cu * tmp_strd;
286                 }
287             }
288 
289             if(1 == ps_sao->b3_y_type_idx)
290             {
291                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
292                                                                           src_strd,
293                                                                           pu1_src_left_luma,
294                                                                           pu1_src_top_luma,
295                                                                           ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
296                                                                           ps_sao->b5_y_band_pos,
297                                                                           ai1_offset_y,
298                                                                           sao_wd_luma,
299                                                                           sao_ht_luma);
300             }
301             else // if(2 <= ps_sao->b3_y_type_idx)
302             {
303                 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
304                 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
305                 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
306                                                                   src_strd,
307                                                                   pu1_src_left_luma,
308                                                                   pu1_src_top_luma,
309                                                                   ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
310                                                                   au1_src_top_right,
311                                                                   au1_src_bot_left,
312                                                                   au1_avail_luma,
313                                                                   ai1_offset_y,
314                                                                   sao_wd_luma,
315                                                                   sao_ht_luma);
316             }
317 
318             /* Check the loop filter flags and copy the original values back if they are set */
319             if(no_loop_filter_enabled)
320             {
321                 UWORD32 u4_no_loop_filter_flag;
322                 WORD32 min_cu = 8;
323                 UWORD8 *pu1_src_tmp = pu1_src_luma;
324 
325                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
326                 {
327                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
328                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
329 
330                     if(u4_no_loop_filter_flag)
331                     {
332                         WORD32 tmp_wd = sao_wd_luma;
333                         while(tmp_wd > 0)
334                         {
335                             if(CTZ(u4_no_loop_filter_flag))
336                             {
337                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
338                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
339                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
340                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
341                             }
342                             else
343                             {
344                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
345                                 {
346                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
347                                     {
348                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
349                                     }
350                                 }
351 
352                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
353                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
354                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
355                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
356                             }
357                         }
358 
359                         pu1_src_tmp -= sao_wd_luma;
360                     }
361 
362                     pu1_src_tmp += min_cu * src_strd;
363                     pu1_src_copy += min_cu * tmp_strd;
364                 }
365             }
366 
367         }
368 
369         if(0 == ps_sao->b3_cb_type_idx)
370         {
371             for(row = 0; row < sao_ht_chroma; row++)
372             {
373                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
374                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
375             }
376             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
377             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
378 
379             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
380         }
381         else
382         {
383             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
384             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
385             WORD32 tmp_strd = MAX_CTB_SIZE + 4;
386             WORD32 no_loop_filter_enabled = 0;
387 
388             /* Check the loop filter flags and copy the original values for back up */
389             {
390                 UWORD32 u4_no_loop_filter_flag;
391                 WORD32 min_cu = 4;
392                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
393 
394                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
395                 {
396                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
397                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
398 
399                     if(u4_no_loop_filter_flag)
400                     {
401                         WORD32 tmp_wd = sao_wd_chroma;
402                         no_loop_filter_enabled = 1;
403                         while(tmp_wd > 0)
404                         {
405                             if(CTZ(u4_no_loop_filter_flag))
406                             {
407                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
408                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
409                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
410                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
411                             }
412                             else
413                             {
414                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
415                                 {
416                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
417                                     {
418                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
419                                     }
420                                 }
421 
422                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
423                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
424                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
425                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
426                             }
427                         }
428 
429                         pu1_src_tmp -= sao_wd_chroma;
430                     }
431 
432                     pu1_src_tmp += min_cu * src_strd;
433                     pu1_src_copy += min_cu * tmp_strd;
434                 }
435             }
436 
437             if(1 == ps_sao->b3_cb_type_idx)
438             {
439                 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
440                                                                             src_strd,
441                                                                             pu1_src_left_chroma,
442                                                                             pu1_src_top_chroma,
443                                                                             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
444                                                                             ps_sao->b5_cb_band_pos,
445                                                                             ps_sao->b5_cr_band_pos,
446                                                                             ai1_offset_cb,
447                                                                             ai1_offset_cr,
448                                                                             sao_wd_chroma,
449                                                                             sao_ht_chroma
450                                                                            );
451             }
452             else // if(2 <= ps_sao->b3_cb_type_idx)
453             {
454                 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
455                 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
456                 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
457                 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
458                 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
459                                                                      src_strd,
460                                                                      pu1_src_left_chroma,
461                                                                      pu1_src_top_chroma,
462                                                                      ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
463                                                                      au1_src_top_right,
464                                                                      au1_src_bot_left,
465                                                                      au1_avail_chroma,
466                                                                      ai1_offset_cb,
467                                                                      ai1_offset_cr,
468                                                                      sao_wd_chroma,
469                                                                      sao_ht_chroma);
470             }
471 
472             /* Check the loop filter flags and copy the original values back if they are set */
473             if(no_loop_filter_enabled)
474             {
475                 UWORD32 u4_no_loop_filter_flag;
476                 WORD32 min_cu = 4;
477                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
478 
479                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
480                 {
481                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
482                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
483 
484                     if(u4_no_loop_filter_flag)
485                     {
486                         WORD32 tmp_wd = sao_wd_chroma;
487                         while(tmp_wd > 0)
488                         {
489                             if(CTZ(u4_no_loop_filter_flag))
490                             {
491                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
492                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
493                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
494                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
495                             }
496                             else
497                             {
498                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
499                                 {
500                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
501                                     {
502                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
503                                     }
504                                 }
505 
506                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
507                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
508                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
509                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
510                             }
511                         }
512 
513                         pu1_src_tmp -= sao_wd_chroma;
514                     }
515 
516                     pu1_src_tmp += min_cu * src_strd;
517                     pu1_src_copy += min_cu * tmp_strd;
518                 }
519             }
520 
521         }
522 
523     }
524 }
525 
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
527 {
528     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
529     UWORD8 *pu1_src_luma;
530     UWORD8 *pu1_src_chroma;
531     WORD32 src_strd;
532     WORD32 ctb_size;
533     WORD32 log2_ctb_size;
534     sps_t *ps_sps;
535     sao_t *ps_sao;
536     pps_t *ps_pps;
537     slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
538     tile_t *ps_tile;
539     UWORD16 *pu1_slice_idx;
540     UWORD16 *pu1_tile_idx;
541     WORD32 row, col;
542     UWORD8 au1_avail_luma[8];
543     UWORD8 au1_avail_chroma[8];
544     UWORD8 au1_tile_slice_boundary[8];
545     UWORD8 au4_ilf_across_tile_slice_enable[8];
546     WORD32 i;
547     UWORD8 *pu1_src_top_luma;
548     UWORD8 *pu1_src_top_chroma;
549     UWORD8 *pu1_src_left_luma;
550     UWORD8 *pu1_src_left_chroma;
551     UWORD8 au1_src_top_right[2];
552     UWORD8 au1_src_bot_left[2];
553     UWORD8 *pu1_no_loop_filter_flag;
554     UWORD8 *pu1_src_backup_luma;
555     UWORD8 *pu1_src_backup_chroma;
556     WORD32 backup_strd;
557     WORD32 loop_filter_strd;
558 
559     WORD32 no_loop_filter_enabled_luma = 0;
560     WORD32 no_loop_filter_enabled_chroma = 0;
561     UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
562     UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
563     UWORD8 *pu1_sao_src_luma_top_left_ctb;
564     UWORD8 *pu1_sao_src_chroma_top_left_ctb;
565     UWORD8 *pu1_sao_src_top_left_luma_top_right;
566     UWORD8 *pu1_sao_src_top_left_chroma_top_right;
567     UWORD8  u1_sao_src_top_left_luma_bot_left;
568     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
569     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
570     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
571 
572     WORD8 ai1_offset_y[5];
573     WORD8 ai1_offset_cb[5];
574     WORD8 ai1_offset_cr[5];
575     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
576 
577     PROFILE_DISABLE_SAO();
578 
579     ai1_offset_y[0] = 0;
580     ai1_offset_cb[0] = 0;
581     ai1_offset_cr[0] = 0;
582 
583     ps_sps = ps_sao_ctxt->ps_sps;
584     ps_pps = ps_sao_ctxt->ps_pps;
585     ps_tile = ps_sao_ctxt->ps_tile;
586 
587     log2_ctb_size = ps_sps->i1_log2_ctb_size;
588     ctb_size = (1 << log2_ctb_size);
589     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
590     ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
591     ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
592 
593     pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
594     pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
595     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
596     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
597 
598     /*Stores the left value for each row ctbs- Needed for column tiles*/
599     pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
600     pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
601     pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
602     pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
603     u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
604     pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
605     au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
606     pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
607     pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
608     pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
609 
610     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
611     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
612     backup_strd = 2 * MAX_CTB_SIZE;
613 
614     DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
615 
616     {
617         /* Check the loop filter flags and copy the original values for back up */
618         /* Luma */
619 
620         /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
621          * can belong to different slice with their own sao_enable flag */
622         {
623             UWORD32 u4_no_loop_filter_flag;
624             WORD32 loop_filter_bit_pos;
625             WORD32 log2_min_cu = 3;
626             WORD32 min_cu = (1 << log2_min_cu);
627             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
628             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
629             WORD32 sao_blk_wd = ctb_size;
630             WORD32 remaining_rows;
631             WORD32 remaining_cols;
632 
633             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
634             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
635             if(remaining_rows <= SAO_SHIFT_CTB)
636                 sao_blk_ht += remaining_rows;
637             if(remaining_cols <= SAO_SHIFT_CTB)
638                 sao_blk_wd += remaining_cols;
639 
640             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
641             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
642 
643             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
644 
645             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
646                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
647             if(ps_sao_ctxt->i4_ctb_x > 0)
648                 loop_filter_bit_pos -= 1;
649 
650             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
651                             (loop_filter_bit_pos >> 3);
652 
653             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
654                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
655             {
656                 WORD32 tmp_wd = sao_blk_wd;
657 
658                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
659                                 (loop_filter_bit_pos & 7);
660                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
661 
662                 if(u4_no_loop_filter_flag)
663                 {
664                     no_loop_filter_enabled_luma = 1;
665                     while(tmp_wd > 0)
666                     {
667                         if(CTZ(u4_no_loop_filter_flag))
668                         {
669                             pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
670                             pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
671                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
672                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
673                         }
674                         else
675                         {
676                             for(row = 0; row < min_cu; row++)
677                             {
678                                 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
679                                 {
680                                     pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
681                                 }
682                             }
683                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
684                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
685                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
686                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
687                         }
688                     }
689 
690                     pu1_src_tmp_luma -= sao_blk_wd;
691                     pu1_src_backup_luma -= sao_blk_wd;
692                 }
693 
694                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
695                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
696             }
697         }
698 
699         /* Chroma */
700 
701         {
702             UWORD32 u4_no_loop_filter_flag;
703             WORD32 loop_filter_bit_pos;
704             WORD32 log2_min_cu = 3;
705             WORD32 min_cu = (1 << log2_min_cu);
706             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
707             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
708             WORD32 sao_blk_wd = ctb_size;
709             WORD32 remaining_rows;
710             WORD32 remaining_cols;
711 
712             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
713             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
714             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
715                 sao_blk_ht += remaining_rows;
716             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
717                 sao_blk_wd += remaining_cols;
718 
719             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
720             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
721 
722             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
723 
724             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
725                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
726             if(ps_sao_ctxt->i4_ctb_x > 0)
727                 loop_filter_bit_pos -= 2;
728 
729             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
730                             (loop_filter_bit_pos >> 3);
731 
732             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
733                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
734             {
735                 WORD32 tmp_wd = sao_blk_wd;
736 
737                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
738                                 (loop_filter_bit_pos & 7);
739                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
740 
741                 if(u4_no_loop_filter_flag)
742                 {
743                     no_loop_filter_enabled_chroma = 1;
744                     while(tmp_wd > 0)
745                     {
746                         if(CTZ(u4_no_loop_filter_flag))
747                         {
748                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
749                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
750                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
751                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
752                         }
753                         else
754                         {
755                             for(row = 0; row < min_cu / 2; row++)
756                             {
757                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
758                                 {
759                                     pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
760                                 }
761                             }
762 
763                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
764                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
765                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
766                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
767                         }
768                     }
769 
770                     pu1_src_tmp_chroma -= sao_blk_wd;
771                     pu1_src_backup_chroma -= sao_blk_wd;
772                 }
773 
774                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
775                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
776             }
777         }
778     }
779 
780     DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
781 
782     /* Top-left CTB */
783     if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
784     {
785         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
786         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
787         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
788         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
789 
790         WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
791         WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
792         WORD32 au4_idx_tl[8], idx_tl;
793 
794         slice_header_t *ps_slice_hdr_top_left;
795         {
796             WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
797                                         (ps_sao_ctxt->i4_ctb_x - 1);
798             ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
799         }
800 
801 
802         pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
803         pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
804         ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
805         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
806         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
807         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
808         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
809 
810         if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
811         {
812             if(0 == ps_sao->b3_y_type_idx)
813             {
814                 /* Update left, top and top-left */
815                 for(row = 0; row < sao_ht_luma; row++)
816                 {
817                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
818                 }
819                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
820 
821                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
822 
823 
824             }
825 
826             else if(1 == ps_sao->b3_y_type_idx)
827             {
828                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
829                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
830                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
831                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
832 
833                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
834                                                                           src_strd,
835                                                                           pu1_src_left_luma,
836                                                                           pu1_src_top_luma,
837                                                                           pu1_sao_src_luma_top_left_ctb,
838                                                                           ps_sao->b5_y_band_pos,
839                                                                           ai1_offset_y,
840                                                                           sao_wd_luma,
841                                                                           sao_ht_luma
842                                                                          );
843             }
844 
845             else // if(2 <= ps_sao->b3_y_type_idx)
846             {
847                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
848                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
849                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
850                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
851 
852                 for(i = 0; i < 8; i++)
853                 {
854                     au1_avail_luma[i] = 255;
855                     au1_tile_slice_boundary[i] = 0;
856                     au4_idx_tl[i] = 0;
857                     au4_ilf_across_tile_slice_enable[i] = 1;
858                 }
859 
860                 /******************************************************************
861                  * Derive the  Top-left CTB's neighbor pixel's slice indices.
862                  *
863                  *          TL_T
864                  *       4  _2__5________
865                  *     0   |    |       |
866                  *    TL_L | TL | 1 TL_R|
867                  *         |____|_______|____
868                  *        6|TL_D|7      |    |
869                  *         | 3  |       |    |
870                  *         |____|_______|    |
871                  *              |            |
872                  *              |            |
873                  *              |____________|
874                  *
875                  *****************************************************************/
876 
877                 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
878                 {
879                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
880                     {
881                         {
882                             /*Assuming that sao shift is uniform along x and y directions*/
883                             if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
884                             {
885                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
886                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
887                             }
888                             else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
889                             {
890                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
891                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
892                             }
893                             ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
894                             ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
895 
896                             ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
897                             ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
898 
899                             ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
900                             ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
901 
902                             ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
903                             ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
904                         }
905 
906                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
907                         {
908                             /*Calculate slice indices for neighbor pixels*/
909                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
910                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
911                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
912                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
913                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
914                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
915 
916                             if((0 == (1 << log2_ctb_size) - sao_wd_luma))
917                             {
918                                 if(ps_sao_ctxt->i4_ctb_x == 1)
919                                 {
920                                     au4_idx_tl[6] = -1;
921                                     au4_idx_tl[4] = -1;
922                                 }
923                                 else
924                                 {
925                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
926                                 }
927                                 if(ps_sao_ctxt->i4_ctb_y == 1)
928                                 {
929                                     au4_idx_tl[5] = -1;
930                                     au4_idx_tl[4] = -1;
931                                 }
932                                 else
933                                 {
934                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
935                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
936                                 }
937                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
938                             }
939 
940                             /* Verify that the neighbor ctbs dont cross pic boundary.
941                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
942                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
943                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
944                              * the respective pixel's flags are checked
945                              */
946 
947                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
948                             {
949                                 au4_ilf_across_tile_slice_enable[4] = 0;
950                                 au4_ilf_across_tile_slice_enable[6] = 0;
951                             }
952                             else
953                             {
954                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
955                             }
956                             if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
957                             {
958                                 au4_ilf_across_tile_slice_enable[5] = 0;
959                                 au4_ilf_across_tile_slice_enable[4] = 0;
960                             }
961                             else
962                             {
963                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
964                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
965                             }
966                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
967                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
968                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
969                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
970                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
971 
972                             if(au4_idx_tl[5] > idx_tl)
973                             {
974                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
975                             }
976 
977                             /*
978                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
979                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
980                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
981                              * the respective pixel's flags are checked
982                              */
983                             for(i = 0; i < 8; i++)
984                             {
985                                 /*Sets the edges that lie on the slice/tile boundary*/
986                                 if(au4_idx_tl[i] != idx_tl)
987                                 {
988                                     au1_tile_slice_boundary[i] = 1;
989                                 }
990                                 else
991                                 {
992                                     au4_ilf_across_tile_slice_enable[i] = 1;
993                                 }
994                             }
995 
996                             ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
997                         }
998 
999                         if(ps_pps->i1_tiles_enabled_flag)
1000                         {
1001                             /* Calculate availability flags at slice boundary */
1002                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1003                             {
1004                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1005                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1006                                 {
1007                                     /*Set the boundary arrays*/
1008                                     /*Calculate tile indices for neighbor pixels*/
1009                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1010                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1011                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1012                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1013                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1014                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1015 
1016                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1017                                     {
1018                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1019                                         {
1020                                             au4_idx_tl[6] = -1;
1021                                             au4_idx_tl[4] = -1;
1022                                         }
1023                                         else
1024                                         {
1025                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1026                                         }
1027                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1028                                         {
1029                                             au4_idx_tl[5] = -1;
1030                                             au4_idx_tl[4] = -1;
1031                                         }
1032                                         else
1033                                         {
1034                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1035                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1036                                         }
1037                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1038                                     }
1039                                     for(i = 0; i < 8; i++)
1040                                     {
1041                                         /*Sets the edges that lie on the tile boundary*/
1042                                         if(au4_idx_tl[i] != idx_tl)
1043                                         {
1044                                             au1_tile_slice_boundary[i] |= 1;
1045                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1046                                         }
1047                                     }
1048                                 }
1049                             }
1050                         }
1051 
1052 
1053                         /*Set availability flags based on tile and slice boundaries*/
1054                         for(i = 0; i < 8; i++)
1055                         {
1056                             /*Sets the edges that lie on the slice/tile boundary*/
1057                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1058                             {
1059                                 au1_avail_luma[i] = 0;
1060                             }
1061                         }
1062                     }
1063                 }
1064 
1065                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1066                 {
1067                     au1_avail_luma[0] = 0;
1068                     au1_avail_luma[4] = 0;
1069                     au1_avail_luma[6] = 0;
1070                 }
1071 
1072                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1073                 {
1074                     au1_avail_luma[1] = 0;
1075                     au1_avail_luma[5] = 0;
1076                     au1_avail_luma[7] = 0;
1077                 }
1078                 //y==1 case
1079                 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1080                 {
1081                     au1_avail_luma[2] = 0;
1082                     au1_avail_luma[4] = 0;
1083                     au1_avail_luma[5] = 0;
1084                 }
1085                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1086                 {
1087                     au1_avail_luma[3] = 0;
1088                     au1_avail_luma[6] = 0;
1089                     au1_avail_luma[7] = 0;
1090                 }
1091 
1092                 {
1093                     au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1094                     u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1095                     ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1096                                                                       src_strd,
1097                                                                       pu1_src_left_luma,
1098                                                                       pu1_src_top_luma,
1099                                                                       pu1_sao_src_luma_top_left_ctb,
1100                                                                       au1_src_top_right,
1101                                                                       &u1_sao_src_top_left_luma_bot_left,
1102                                                                       au1_avail_luma,
1103                                                                       ai1_offset_y,
1104                                                                       sao_wd_luma,
1105                                                                       sao_ht_luma);
1106                 }
1107             }
1108 
1109         }
1110         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1111         {
1112             /* Update left, top and top-left */
1113             for(row = 0; row < sao_ht_luma; row++)
1114             {
1115                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1116             }
1117             pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1118 
1119             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1120         }
1121 
1122         if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1123         {
1124             if(0 == ps_sao->b3_cb_type_idx)
1125             {
1126                 for(row = 0; row < sao_ht_chroma; row++)
1127                 {
1128                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1129                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1130                 }
1131                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1132                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1133 
1134                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1135 
1136             }
1137 
1138             else if(1 == ps_sao->b3_cb_type_idx)
1139             {
1140                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1141                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1142                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1143                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1144 
1145                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1146                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1147                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1148                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1149 
1150                 if(chroma_yuv420sp_vu)
1151                 {
1152                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1153                                                                                 src_strd,
1154                                                                                 pu1_src_left_chroma,
1155                                                                                 pu1_src_top_chroma,
1156                                                                                 pu1_sao_src_chroma_top_left_ctb,
1157                                                                                 ps_sao->b5_cr_band_pos,
1158                                                                                 ps_sao->b5_cb_band_pos,
1159                                                                                 ai1_offset_cr,
1160                                                                                 ai1_offset_cb,
1161                                                                                 sao_wd_chroma,
1162                                                                                 sao_ht_chroma
1163                                                                                );
1164                 }
1165                 else
1166                 {
1167                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1168                                                                                 src_strd,
1169                                                                                 pu1_src_left_chroma,
1170                                                                                 pu1_src_top_chroma,
1171                                                                                 pu1_sao_src_chroma_top_left_ctb,
1172                                                                                 ps_sao->b5_cb_band_pos,
1173                                                                                 ps_sao->b5_cr_band_pos,
1174                                                                                 ai1_offset_cb,
1175                                                                                 ai1_offset_cr,
1176                                                                                 sao_wd_chroma,
1177                                                                                 sao_ht_chroma
1178                                                                                );
1179                 }
1180             }
1181 
1182             else // if(2 <= ps_sao->b3_cb_type_idx)
1183             {
1184                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1185                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1186                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1187                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1188 
1189                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1190                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1191                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1192                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1193                 for(i = 0; i < 8; i++)
1194                 {
1195                     au1_avail_chroma[i] = 255;
1196                     au1_tile_slice_boundary[i] = 0;
1197                     au4_idx_tl[i] = 0;
1198                     au4_ilf_across_tile_slice_enable[i] = 1;
1199                 }
1200                 /*In case of slices*/
1201                 {
1202                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1203                     {
1204                         if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1205                         {
1206                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1207                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1208                         }
1209                         else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1210                         {
1211                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1212                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1213                         }
1214                         ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1215                         ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1216 
1217                         ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1218                         ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1219 
1220                         ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1221                         ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1222 
1223                         ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1224                         ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1225 
1226                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1227                         {
1228 
1229                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1230                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1231                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1232                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1233                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1234                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1235 
1236                             if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1237                             {
1238                                 if(ps_sao_ctxt->i4_ctb_x == 1)
1239                                 {
1240                                     au4_idx_tl[6] = -1;
1241                                     au4_idx_tl[4] = -1;
1242                                 }
1243                                 else
1244                                 {
1245                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1246                                 }
1247                                 if(ps_sao_ctxt->i4_ctb_y == 1)
1248                                 {
1249                                     au4_idx_tl[5] = -1;
1250                                     au4_idx_tl[4] = -1;
1251                                 }
1252                                 else
1253                                 {
1254                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1255                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1256                                 }
1257                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1258                             }
1259 
1260                             /* Verify that the neighbor ctbs don't cross pic boundary
1261                              * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1262                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1263                             {
1264                                 au4_ilf_across_tile_slice_enable[4] = 0;
1265                                 au4_ilf_across_tile_slice_enable[6] = 0;
1266                             }
1267                             else
1268                             {
1269                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1270                             }
1271                             if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1272                             {
1273                                 au4_ilf_across_tile_slice_enable[5] = 0;
1274                                 au4_ilf_across_tile_slice_enable[4] = 0;
1275                             }
1276                             else
1277                             {
1278                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1279                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1280                             }
1281                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1282                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1283                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1284                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1285                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1286                             /*
1287                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1288                              * of the pixel having a greater address is checked. Accordingly, set the availability flags
1289                              */
1290                             for(i = 0; i < 8; i++)
1291                             {
1292                                 /*Sets the edges that lie on the slice/tile boundary*/
1293                                 if(au4_idx_tl[i] != idx_tl)
1294                                 {
1295                                     au1_tile_slice_boundary[i] = 1;
1296                                 }
1297                                 else
1298                                 {
1299                                     au4_ilf_across_tile_slice_enable[i] = 1;
1300                                 }
1301                             }
1302 
1303                             /*Reset indices*/
1304                             for(i = 0; i < 8; i++)
1305                             {
1306                                 au4_idx_tl[i] = 0;
1307                             }
1308                         }
1309                         if(ps_pps->i1_tiles_enabled_flag)
1310                         {
1311                             /* Calculate availability flags at slice boundary */
1312                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1313                             {
1314                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1315                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1316                                 {
1317                                     /*Set the boundary arrays*/
1318                                     /*Calculate tile indices for neighbor pixels*/
1319                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1320                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1321                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1322                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1323                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1324                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1325 
1326                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1327                                     {
1328                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1329                                         {
1330                                             au4_idx_tl[6] = -1;
1331                                             au4_idx_tl[4] = -1;
1332                                         }
1333                                         else
1334                                         {
1335                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1336                                         }
1337                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1338                                         {
1339                                             au4_idx_tl[5] = -1;
1340                                             au4_idx_tl[4] = -1;
1341                                         }
1342                                         else
1343                                         {
1344                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1345                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1346                                         }
1347                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1348                                     }
1349                                     for(i = 0; i < 8; i++)
1350                                     {
1351                                         /*Sets the edges that lie on the tile boundary*/
1352                                         if(au4_idx_tl[i] != idx_tl)
1353                                         {
1354                                             au1_tile_slice_boundary[i] |= 1;
1355                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1356                                         }
1357                                     }
1358                                 }
1359                             }
1360                         }
1361 
1362                         for(i = 0; i < 8; i++)
1363                         {
1364                             /*Sets the edges that lie on the slice/tile boundary*/
1365                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1366                             {
1367                                 au1_avail_chroma[i] = 0;
1368                             }
1369                         }
1370                     }
1371                 }
1372 
1373                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1374                 {
1375                     au1_avail_chroma[0] = 0;
1376                     au1_avail_chroma[4] = 0;
1377                     au1_avail_chroma[6] = 0;
1378                 }
1379                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1380                 {
1381                     au1_avail_chroma[1] = 0;
1382                     au1_avail_chroma[5] = 0;
1383                     au1_avail_chroma[7] = 0;
1384                 }
1385 
1386                 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1387                 {
1388                     au1_avail_chroma[2] = 0;
1389                     au1_avail_chroma[4] = 0;
1390                     au1_avail_chroma[5] = 0;
1391                 }
1392                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1393                 {
1394                     au1_avail_chroma[3] = 0;
1395                     au1_avail_chroma[6] = 0;
1396                     au1_avail_chroma[7] = 0;
1397                 }
1398 
1399                 {
1400                     au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1401                     au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1402                     au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1403                     au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1404                     if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1405                     {
1406                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1407                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1408                     }
1409 
1410                     if(chroma_yuv420sp_vu)
1411                     {
1412                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1413                                                                              src_strd,
1414                                                                              pu1_src_left_chroma,
1415                                                                              pu1_src_top_chroma,
1416                                                                              pu1_sao_src_chroma_top_left_ctb,
1417                                                                              au1_src_top_right,
1418                                                                              au1_sao_src_top_left_chroma_bot_left,
1419                                                                              au1_avail_chroma,
1420                                                                              ai1_offset_cr,
1421                                                                              ai1_offset_cb,
1422                                                                              sao_wd_chroma,
1423                                                                              sao_ht_chroma);
1424                     }
1425                     else
1426                     {
1427                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1428                                                                              src_strd,
1429                                                                              pu1_src_left_chroma,
1430                                                                              pu1_src_top_chroma,
1431                                                                              pu1_sao_src_chroma_top_left_ctb,
1432                                                                              au1_src_top_right,
1433                                                                              au1_sao_src_top_left_chroma_bot_left,
1434                                                                              au1_avail_chroma,
1435                                                                              ai1_offset_cb,
1436                                                                              ai1_offset_cr,
1437                                                                              sao_wd_chroma,
1438                                                                              sao_ht_chroma);
1439                     }
1440                 }
1441             }
1442         }
1443         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1444         {
1445             for(row = 0; row < sao_ht_chroma; row++)
1446             {
1447                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1448                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1449             }
1450             pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1451             pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1452 
1453             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1454         }
1455 
1456         pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1457         pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1458         ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1459     }
1460 
1461 
1462     /* Top CTB */
1463     if((ps_sao_ctxt->i4_ctb_y > 0))
1464     {
1465         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1466         WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1467         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1468         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1469 
1470         WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1471         WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1472         WORD32 au4_idx_t[8], idx_t;
1473 
1474         WORD32 remaining_cols;
1475 
1476         slice_header_t *ps_slice_hdr_top;
1477         {
1478             WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1479                                         (ps_sao_ctxt->i4_ctb_x);
1480             ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1481         }
1482 
1483         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1484         if(remaining_cols <= SAO_SHIFT_CTB)
1485         {
1486             sao_wd_luma += remaining_cols;
1487         }
1488         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1489         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1490         {
1491             sao_wd_chroma += remaining_cols;
1492         }
1493 
1494         pu1_src_luma -= (sao_ht_luma * src_strd);
1495         pu1_src_chroma -= (sao_ht_chroma * src_strd);
1496         ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1497         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1498         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1499         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1500         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1501 
1502         if(0 != sao_wd_luma)
1503         {
1504             if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1505             {
1506                 if(0 == ps_sao->b3_y_type_idx)
1507                 {
1508                     /* Update left, top and top-left */
1509                     for(row = 0; row < sao_ht_luma; row++)
1510                     {
1511                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1512                     }
1513                     pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1514 
1515                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1516 
1517                 }
1518 
1519                 else if(1 == ps_sao->b3_y_type_idx)
1520                 {
1521                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1522                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1523                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1524                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1525 
1526                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1527                                                                               src_strd,
1528                                                                               pu1_src_left_luma,
1529                                                                               pu1_src_top_luma,
1530                                                                               pu1_sao_src_luma_top_left_ctb,
1531                                                                               ps_sao->b5_y_band_pos,
1532                                                                               ai1_offset_y,
1533                                                                               sao_wd_luma,
1534                                                                               sao_ht_luma
1535                                                                              );
1536                 }
1537 
1538                 else // if(2 <= ps_sao->b3_y_type_idx)
1539                 {
1540                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1541                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1542                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1543                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1544 
1545                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1546                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1547                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1548 
1549                     for(i = 0; i < 8; i++)
1550                     {
1551 
1552                         au4_ilf_across_tile_slice_enable[i] = 1;
1553                     }
1554                     /******************************************************************
1555                      * Derive the  Top-left CTB's neighbor pixel's slice indices.
1556                      *
1557                      *               T_T
1558                      *          ____________
1559                      *         |    |       |
1560                      *         | T_L|  T    |T_R
1561                      *         |    | ______|____
1562                      *         |    |  T_D  |    |
1563                      *         |    |       |    |
1564                      *         |____|_______|    |
1565                      *              |            |
1566                      *              |            |
1567                      *              |____________|
1568                      *
1569                      *****************************************************************/
1570 
1571                     /*In case of slices*/
1572                     {
1573                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1574                         {
1575 
1576                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1577                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1578 
1579                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1580                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1581 
1582                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1583                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1584 
1585                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1586                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1587 
1588                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1589                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1590 
1591                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1592                             {
1593                                 /*Calculate neighbor ctb slice indices*/
1594                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1595                                 {
1596                                     au4_idx_t[0] = -1;
1597                                     au4_idx_t[6] = -1;
1598                                     au4_idx_t[4] = -1;
1599                                 }
1600                                 else
1601                                 {
1602                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1603                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604                                 }
1605                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1606                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1607                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1608                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1609 
1610                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1611                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1612                                 {
1613                                     au4_ilf_across_tile_slice_enable[4] = 0;
1614                                     au4_ilf_across_tile_slice_enable[6] = 0;
1615                                     au4_ilf_across_tile_slice_enable[0] = 0;
1616                                 }
1617                                 else
1618                                 {
1619                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1620                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1621                                 }
1622 
1623 
1624 
1625                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1626                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1627                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1628                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1629                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1630 
1631                                 if(au4_idx_t[6] < idx_t)
1632                                 {
1633                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1634                                 }
1635 
1636                                 /*
1637                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1638                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1639                                  */
1640 
1641                                 for(i = 0; i < 8; i++)
1642                                 {
1643                                     /*Sets the edges that lie on the slice/tile boundary*/
1644                                     if(au4_idx_t[i] != idx_t)
1645                                     {
1646                                         au1_tile_slice_boundary[i] = 1;
1647                                         /*Check for slice flag at such boundaries*/
1648                                     }
1649                                     else
1650                                     {
1651                                         au4_ilf_across_tile_slice_enable[i] = 1;
1652                                     }
1653                                 }
1654                                 /*Reset indices*/
1655                                 for(i = 0; i < 8; i++)
1656                                 {
1657                                     au4_idx_t[i] = 0;
1658                                 }
1659                             }
1660 
1661                             if(ps_pps->i1_tiles_enabled_flag)
1662                             {
1663                                 /* Calculate availability flags at slice boundary */
1664                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1665                                 {
1666                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1667                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1668                                     {
1669                                         /*Calculate neighbor ctb slice indices*/
1670                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1671                                         {
1672                                             au4_idx_t[0] = -1;
1673                                             au4_idx_t[6] = -1;
1674                                             au4_idx_t[4] = -1;
1675                                         }
1676                                         else
1677                                         {
1678                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1679                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680                                         }
1681                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1682                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1683                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1684                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1685 
1686                                         for(i = 0; i < 8; i++)
1687                                         {
1688                                             /*Sets the edges that lie on the tile boundary*/
1689                                             if(au4_idx_t[i] != idx_t)
1690                                             {
1691                                                 au1_tile_slice_boundary[i] |= 1;
1692                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1693                                             }
1694                                         }
1695                                     }
1696                                 }
1697                             }
1698 
1699                             for(i = 0; i < 8; i++)
1700                             {
1701                                 /*Sets the edges that lie on the slice/tile boundary*/
1702                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1703                                 {
1704                                     au1_avail_luma[i] = 0;
1705                                 }
1706                             }
1707                         }
1708                     }
1709 
1710 
1711                     if(0 == ps_sao_ctxt->i4_ctb_x)
1712                     {
1713                         au1_avail_luma[0] = 0;
1714                         au1_avail_luma[4] = 0;
1715                         au1_avail_luma[6] = 0;
1716                     }
1717 
1718                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1719                     {
1720                         au1_avail_luma[1] = 0;
1721                         au1_avail_luma[5] = 0;
1722                         au1_avail_luma[7] = 0;
1723                     }
1724 
1725                     if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1726                     {
1727                         au1_avail_luma[2] = 0;
1728                         au1_avail_luma[4] = 0;
1729                         au1_avail_luma[5] = 0;
1730                     }
1731 
1732                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1733                     {
1734                         au1_avail_luma[3] = 0;
1735                         au1_avail_luma[6] = 0;
1736                         au1_avail_luma[7] = 0;
1737                     }
1738 
1739                     {
1740                         au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1741                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1742                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1743                                                                           src_strd,
1744                                                                           pu1_src_left_luma,
1745                                                                           pu1_src_top_luma,
1746                                                                           pu1_sao_src_luma_top_left_ctb,
1747                                                                           au1_src_top_right,
1748                                                                           &u1_sao_src_top_left_luma_bot_left,
1749                                                                           au1_avail_luma,
1750                                                                           ai1_offset_y,
1751                                                                           sao_wd_luma,
1752                                                                           sao_ht_luma);
1753                     }
1754                 }
1755             }
1756             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1757             {
1758                 /* Update left, top and top-left */
1759                 for(row = 0; row < sao_ht_luma; row++)
1760                 {
1761                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1762                 }
1763                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1764 
1765                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1766             }
1767         }
1768 
1769         if(0 != sao_wd_chroma)
1770         {
1771             if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1772             {
1773                 if(0 == ps_sao->b3_cb_type_idx)
1774                 {
1775 
1776                     for(row = 0; row < sao_ht_chroma; row++)
1777                     {
1778                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1779                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1780                     }
1781                     pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1782                     pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1783 
1784                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1785 
1786                 }
1787 
1788                 else if(1 == ps_sao->b3_cb_type_idx)
1789                 {
1790                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1791                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1792                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1793                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1794 
1795                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1796                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1797                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1798                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1799 
1800                     if(chroma_yuv420sp_vu)
1801                     {
1802                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1803                                                                                     src_strd,
1804                                                                                     pu1_src_left_chroma,
1805                                                                                     pu1_src_top_chroma,
1806                                                                                     pu1_sao_src_chroma_top_left_ctb,
1807                                                                                     ps_sao->b5_cr_band_pos,
1808                                                                                     ps_sao->b5_cb_band_pos,
1809                                                                                     ai1_offset_cr,
1810                                                                                     ai1_offset_cb,
1811                                                                                     sao_wd_chroma,
1812                                                                                     sao_ht_chroma
1813                                                                                    );
1814                     }
1815                     else
1816                     {
1817                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1818                                                                                     src_strd,
1819                                                                                     pu1_src_left_chroma,
1820                                                                                     pu1_src_top_chroma,
1821                                                                                     pu1_sao_src_chroma_top_left_ctb,
1822                                                                                     ps_sao->b5_cb_band_pos,
1823                                                                                     ps_sao->b5_cr_band_pos,
1824                                                                                     ai1_offset_cb,
1825                                                                                     ai1_offset_cr,
1826                                                                                     sao_wd_chroma,
1827                                                                                     sao_ht_chroma
1828                                                                                    );
1829                     }
1830                 }
1831                 else // if(2 <= ps_sao->b3_cb_type_idx)
1832                 {
1833                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1834                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1835                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1836                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1837 
1838                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1839                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1840                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1841                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1842 
1843                     for(i = 0; i < 8; i++)
1844                     {
1845                         au1_avail_chroma[i] = 255;
1846                         au1_tile_slice_boundary[i] = 0;
1847                         au4_idx_t[i] = 0;
1848                         au4_ilf_across_tile_slice_enable[i] = 1;
1849                     }
1850 
1851                     {
1852                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1853                         {
1854                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1855                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1856 
1857                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1858                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1859 
1860                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1861                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1862 
1863                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1864                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1865 
1866                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1867                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1868 
1869                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1870                             {
1871                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1872                                 {
1873                                     au4_idx_t[0] = -1;
1874                                     au4_idx_t[6] = -1;
1875                                     au4_idx_t[4] = -1;
1876                                 }
1877                                 else
1878                                 {
1879                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1880                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881                                 }
1882                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1883                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1884                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1885                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1886 
1887                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1888 
1889                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1890                                 {
1891                                     au4_ilf_across_tile_slice_enable[4] = 0;
1892                                     au4_ilf_across_tile_slice_enable[6] = 0;
1893                                     au4_ilf_across_tile_slice_enable[0] = 0;
1894                                 }
1895                                 else
1896                                 {
1897                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1899                                 }
1900 
1901                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1902                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1903                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1904                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1905                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1906 
1907                                 if(idx_t > au4_idx_t[6])
1908                                 {
1909                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1910                                 }
1911 
1912                                 /*
1913                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1914                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1915                                  */
1916                                 for(i = 0; i < 8; i++)
1917                                 {
1918                                     /*Sets the edges that lie on the slice/tile boundary*/
1919                                     if(au4_idx_t[i] != idx_t)
1920                                     {
1921                                         au1_tile_slice_boundary[i] = 1;
1922                                     }
1923                                     else
1924                                     {
1925                                         /*Indicates that the neighbour belongs to same/dependent slice*/
1926                                         au4_ilf_across_tile_slice_enable[i] = 1;
1927                                     }
1928                                 }
1929                                 /*Reset indices*/
1930                                 for(i = 0; i < 8; i++)
1931                                 {
1932                                     au4_idx_t[i] = 0;
1933                                 }
1934                             }
1935                             if(ps_pps->i1_tiles_enabled_flag)
1936                             {
1937                                 /* Calculate availability flags at slice boundary */
1938                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1939                                 {
1940                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1941                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1942                                     {
1943                                         /*Calculate neighbor ctb slice indices*/
1944                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1945                                         {
1946                                             au4_idx_t[0] = -1;
1947                                             au4_idx_t[6] = -1;
1948                                             au4_idx_t[4] = -1;
1949                                         }
1950                                         else
1951                                         {
1952                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1953                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954                                         }
1955                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1956                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1957                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1958                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1959 
1960                                         for(i = 0; i < 8; i++)
1961                                         {
1962                                             /*Sets the edges that lie on the tile boundary*/
1963                                             if(au4_idx_t[i] != idx_t)
1964                                             {
1965                                                 au1_tile_slice_boundary[i] |= 1;
1966                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1967                                             }
1968                                         }
1969                                     }
1970                                 }
1971                             }
1972                             for(i = 0; i < 8; i++)
1973                             {
1974                                 /*Sets the edges that lie on the slice/tile boundary*/
1975                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1976                                 {
1977                                     au1_avail_chroma[i] = 0;
1978                                 }
1979                             }
1980 
1981                         }
1982                     }
1983                     if(0 == ps_sao_ctxt->i4_ctb_x)
1984                     {
1985                         au1_avail_chroma[0] = 0;
1986                         au1_avail_chroma[4] = 0;
1987                         au1_avail_chroma[6] = 0;
1988                     }
1989 
1990                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1991                     {
1992                         au1_avail_chroma[1] = 0;
1993                         au1_avail_chroma[5] = 0;
1994                         au1_avail_chroma[7] = 0;
1995                     }
1996 
1997                     if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1998                     {
1999                         au1_avail_chroma[2] = 0;
2000                         au1_avail_chroma[4] = 0;
2001                         au1_avail_chroma[5] = 0;
2002                     }
2003 
2004                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2005                     {
2006                         au1_avail_chroma[3] = 0;
2007                         au1_avail_chroma[6] = 0;
2008                         au1_avail_chroma[7] = 0;
2009                     }
2010 
2011                     {
2012                         au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2013                         au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2014                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2015                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2016 
2017                         if(chroma_yuv420sp_vu)
2018                         {
2019                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2020                                                                                  src_strd,
2021                                                                                  pu1_src_left_chroma,
2022                                                                                  pu1_src_top_chroma,
2023                                                                                  pu1_sao_src_chroma_top_left_ctb,
2024                                                                                  au1_src_top_right,
2025                                                                                  au1_sao_src_top_left_chroma_bot_left,
2026                                                                                  au1_avail_chroma,
2027                                                                                  ai1_offset_cr,
2028                                                                                  ai1_offset_cb,
2029                                                                                  sao_wd_chroma,
2030                                                                                  sao_ht_chroma);
2031                         }
2032                         else
2033                         {
2034                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2035                                                                                  src_strd,
2036                                                                                  pu1_src_left_chroma,
2037                                                                                  pu1_src_top_chroma,
2038                                                                                  pu1_sao_src_chroma_top_left_ctb,
2039                                                                                  au1_src_top_right,
2040                                                                                  au1_sao_src_top_left_chroma_bot_left,
2041                                                                                  au1_avail_chroma,
2042                                                                                  ai1_offset_cb,
2043                                                                                  ai1_offset_cr,
2044                                                                                  sao_wd_chroma,
2045                                                                                  sao_ht_chroma);
2046                         }
2047                     }
2048 
2049                 }
2050             }
2051             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2052             {
2053                 for(row = 0; row < sao_ht_chroma; row++)
2054                 {
2055                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2056                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2057                 }
2058                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2059                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2060 
2061                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2062             }
2063         }
2064 
2065         pu1_src_luma += sao_ht_luma * src_strd;
2066         pu1_src_chroma += sao_ht_chroma * src_strd;
2067         ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2068     }
2069 
2070     /* Left CTB */
2071     if(ps_sao_ctxt->i4_ctb_x > 0)
2072     {
2073         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2074         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2075         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2076         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2077 
2078         WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2079         WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2080         WORD32 au4_idx_l[8], idx_l;
2081 
2082         WORD32 remaining_rows;
2083         slice_header_t *ps_slice_hdr_left;
2084         {
2085             WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2086                                         (ps_sao_ctxt->i4_ctb_x - 1);
2087             ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2088         }
2089 
2090         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2091         if(remaining_rows <= SAO_SHIFT_CTB)
2092         {
2093             sao_ht_luma += remaining_rows;
2094         }
2095         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2096         if(remaining_rows <= SAO_SHIFT_CTB)
2097         {
2098             sao_ht_chroma += remaining_rows;
2099         }
2100 
2101         pu1_src_luma -= sao_wd_luma;
2102         pu1_src_chroma -= sao_wd_chroma;
2103         ps_sao -= 1;
2104         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2105         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2106         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2107         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2108 
2109 
2110         if(0 != sao_ht_luma)
2111         {
2112             if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2113             {
2114                 if(0 == ps_sao->b3_y_type_idx)
2115                 {
2116                     /* Update left, top and top-left */
2117                     for(row = 0; row < sao_ht_luma; row++)
2118                     {
2119                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2120                     }
2121                     /*Update in next location*/
2122                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2123 
2124                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2125 
2126                 }
2127 
2128                 else if(1 == ps_sao->b3_y_type_idx)
2129                 {
2130                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2131                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2132                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2133                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2134 
2135                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2136                                                                               src_strd,
2137                                                                               pu1_src_left_luma,
2138                                                                               pu1_src_top_luma,
2139                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2140                                                                               ps_sao->b5_y_band_pos,
2141                                                                               ai1_offset_y,
2142                                                                               sao_wd_luma,
2143                                                                               sao_ht_luma
2144                                                                              );
2145                 }
2146 
2147                 else // if(2 <= ps_sao->b3_y_type_idx)
2148                 {
2149                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2150                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2151                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2152                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2153 
2154                     for(i = 0; i < 8; i++)
2155                     {
2156                         au1_avail_luma[i] = 255;
2157                         au1_tile_slice_boundary[i] = 0;
2158                         au4_idx_l[i] = 0;
2159                         au4_ilf_across_tile_slice_enable[i] = 1;
2160                     }
2161                     /******************************************************************
2162                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2163                      *
2164                      *
2165                      *          ____________
2166                      *         |    |       |
2167                      *         | L_T|       |
2168                      *         |____|_______|____
2169                      *         |    |       |    |
2170                      *     L_L |  L |  L_R  |    |
2171                      *         |____|_______|    |
2172                      *              |            |
2173                      *          L_D |            |
2174                      *              |____________|
2175                      *
2176                      *****************************************************************/
2177 
2178                     /*In case of slices or tiles*/
2179                     {
2180                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2181                         {
2182                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2183                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2184 
2185                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2186                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2187 
2188                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2189                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2190 
2191                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2192                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2193 
2194                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2195                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2196 
2197                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2198                             {
2199                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2200                                 {
2201                                     au4_idx_l[2] = -1;
2202                                     au4_idx_l[4] = -1;
2203                                     au4_idx_l[5] = -1;
2204                                 }
2205                                 else
2206                                 {
2207                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2208                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2209                                 }
2210                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2211                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2212                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2213                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2214 
2215                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2216                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2217                                 {
2218                                     au4_ilf_across_tile_slice_enable[2] = 0;
2219                                     au4_ilf_across_tile_slice_enable[4] = 0;
2220                                     au4_ilf_across_tile_slice_enable[5] = 0;
2221                                 }
2222                                 else
2223                                 {
2224                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2225                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2226 
2227                                 }
2228                                 //TODO: ILF flag checks for [0] and [6] is missing.
2229                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2230                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2231                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2232 
2233                                 if(idx_l < au4_idx_l[5])
2234                                 {
2235                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2236                                 }
2237 
2238                                 /*
2239                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2240                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2241                                  */
2242                                 for(i = 0; i < 8; i++)
2243                                 {
2244                                     /*Sets the edges that lie on the slice/tile boundary*/
2245                                     if(au4_idx_l[i] != idx_l)
2246                                     {
2247                                         au1_tile_slice_boundary[i] = 1;
2248                                     }
2249                                     else
2250                                     {
2251                                         au4_ilf_across_tile_slice_enable[i] = 1;
2252                                     }
2253                                 }
2254                                 /*Reset indices*/
2255                                 for(i = 0; i < 8; i++)
2256                                 {
2257                                     au4_idx_l[i] = 0;
2258                                 }
2259                             }
2260 
2261                             if(ps_pps->i1_tiles_enabled_flag)
2262                             {
2263                                 /* Calculate availability flags at slice boundary */
2264                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2265                                 {
2266                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2267                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2268                                     {
2269                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2270                                         {
2271                                             au4_idx_l[2] = -1;
2272                                             au4_idx_l[4] = -1;
2273                                             au4_idx_l[5] = -1;
2274                                         }
2275                                         else
2276                                         {
2277                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2278                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2279                                         }
2280 
2281                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2282                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2283                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2284                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2285 
2286                                         for(i = 0; i < 8; i++)
2287                                         {
2288                                             /*Sets the edges that lie on the slice/tile boundary*/
2289                                             if(au4_idx_l[i] != idx_l)
2290                                             {
2291                                                 au1_tile_slice_boundary[i] |= 1;
2292                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2293                                             }
2294                                         }
2295                                     }
2296                                 }
2297                             }
2298 
2299                             for(i = 0; i < 8; i++)
2300                             {
2301                                 /*Sets the edges that lie on the slice/tile boundary*/
2302                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2303                                 {
2304                                     au1_avail_luma[i] = 0;
2305                                 }
2306                             }
2307                         }
2308                     }
2309                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2310                     {
2311                         au1_avail_luma[0] = 0;
2312                         au1_avail_luma[4] = 0;
2313                         au1_avail_luma[6] = 0;
2314                     }
2315                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2316                     {
2317                         au1_avail_luma[1] = 0;
2318                         au1_avail_luma[5] = 0;
2319                         au1_avail_luma[7] = 0;
2320                     }
2321 
2322                     if(0 == ps_sao_ctxt->i4_ctb_y)
2323                     {
2324                         au1_avail_luma[2] = 0;
2325                         au1_avail_luma[4] = 0;
2326                         au1_avail_luma[5] = 0;
2327                     }
2328 
2329                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2330                     {
2331                         au1_avail_luma[3] = 0;
2332                         au1_avail_luma[6] = 0;
2333                         au1_avail_luma[7] = 0;
2334                     }
2335 
2336                     {
2337                         au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2338                         u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2339                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2340                                                                           src_strd,
2341                                                                           pu1_src_left_luma,
2342                                                                           pu1_src_top_luma,
2343                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2344                                                                           au1_src_top_right,
2345                                                                           &u1_sao_src_top_left_luma_bot_left,
2346                                                                           au1_avail_luma,
2347                                                                           ai1_offset_y,
2348                                                                           sao_wd_luma,
2349                                                                           sao_ht_luma);
2350                     }
2351 
2352                 }
2353             }
2354             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2355             {
2356                 /* Update left, top and top-left */
2357                 for(row = 0; row < sao_ht_luma; row++)
2358                 {
2359                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2360                 }
2361                 /*Update in next location*/
2362                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2363 
2364                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2365             }
2366         }
2367 
2368         if(0 != sao_ht_chroma)
2369         {
2370             if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2371             {
2372                 if(0 == ps_sao->b3_cb_type_idx)
2373                 {
2374                     for(row = 0; row < sao_ht_chroma; row++)
2375                     {
2376                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2377                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2378                     }
2379                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2380                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2381 
2382                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2383                 }
2384 
2385                 else if(1 == ps_sao->b3_cb_type_idx)
2386                 {
2387                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2388                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2389                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2390                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2391 
2392                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2393                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2394                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2395                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2396 
2397                     if(chroma_yuv420sp_vu)
2398                     {
2399                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2400                                                                                     src_strd,
2401                                                                                     pu1_src_left_chroma,
2402                                                                                     pu1_src_top_chroma,
2403                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2404                                                                                     ps_sao->b5_cr_band_pos,
2405                                                                                     ps_sao->b5_cb_band_pos,
2406                                                                                     ai1_offset_cr,
2407                                                                                     ai1_offset_cb,
2408                                                                                     sao_wd_chroma,
2409                                                                                     sao_ht_chroma
2410                                                                                    );
2411                     }
2412                     else
2413                     {
2414                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2415                                                                                     src_strd,
2416                                                                                     pu1_src_left_chroma,
2417                                                                                     pu1_src_top_chroma,
2418                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2419                                                                                     ps_sao->b5_cb_band_pos,
2420                                                                                     ps_sao->b5_cr_band_pos,
2421                                                                                     ai1_offset_cb,
2422                                                                                     ai1_offset_cr,
2423                                                                                     sao_wd_chroma,
2424                                                                                     sao_ht_chroma
2425                                                                                    );
2426                     }
2427                 }
2428 
2429                 else // if(2 <= ps_sao->b3_cb_type_idx)
2430                 {
2431                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2432                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2433                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2434                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2435 
2436                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2437                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2438                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2439                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2440 
2441                     for(i = 0; i < 8; i++)
2442                     {
2443                         au1_avail_chroma[i] = 255;
2444                         au1_tile_slice_boundary[i] = 0;
2445                         au4_idx_l[i] = 0;
2446                         au4_ilf_across_tile_slice_enable[i] = 1;
2447                     }
2448                     /*In case of slices*/
2449                     {
2450                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2451                         {
2452                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2453                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2454 
2455                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2456                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2457 
2458                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2459                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2460 
2461                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2462                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2463 
2464                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2465                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2466 
2467                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2468                             {
2469                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2470                                 {
2471                                     au4_idx_l[2] = -1;
2472                                     au4_idx_l[4] = -1;
2473                                     au4_idx_l[5] = -1;
2474                                 }
2475                                 else
2476                                 {
2477                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2478                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2479                                 }
2480                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2481                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2482                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2483                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2484 
2485                                 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2486                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2487                                 {
2488                                     au4_ilf_across_tile_slice_enable[2] = 0;
2489                                     au4_ilf_across_tile_slice_enable[4] = 0;
2490                                     au4_ilf_across_tile_slice_enable[5] = 0;
2491                                 }
2492                                 else
2493                                 {
2494                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2495                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2496                                 }
2497 
2498                                 if(au4_idx_l[5] > idx_l)
2499                                 {
2500                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2501                                 }
2502 
2503                                 //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2504                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2505                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2506                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2507                                 /*
2508                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2509                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2510                                  */
2511                                 for(i = 0; i < 8; i++)
2512                                 {
2513                                     /*Sets the edges that lie on the slice/tile boundary*/
2514                                     if(au4_idx_l[i] != idx_l)
2515                                     {
2516                                         au1_tile_slice_boundary[i] = 1;
2517                                     }
2518                                     else
2519                                     {
2520                                         au4_ilf_across_tile_slice_enable[i] = 1;
2521                                     }
2522                                 }
2523                                 /*Reset indices*/
2524                                 for(i = 0; i < 8; i++)
2525                                 {
2526                                     au4_idx_l[i] = 0;
2527                                 }
2528                             }
2529                             if(ps_pps->i1_tiles_enabled_flag)
2530                             {
2531                                 /* Calculate availability flags at slice boundary */
2532                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2533                                 {
2534                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2535                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2536                                     {
2537                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2538                                         {
2539                                             au4_idx_l[2] = -1;
2540                                             au4_idx_l[4] = -1;
2541                                             au4_idx_l[5] = -1;
2542                                         }
2543                                         else
2544                                         {
2545                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2546                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2547                                         }
2548 
2549                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2550                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2551                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2552                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2553 
2554                                         for(i = 0; i < 8; i++)
2555                                         {
2556                                             /*Sets the edges that lie on the slice/tile boundary*/
2557                                             if(au4_idx_l[i] != idx_l)
2558                                             {
2559                                                 au1_tile_slice_boundary[i] |= 1;
2560                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2561                                             }
2562                                         }
2563                                     }
2564                                 }
2565                             }
2566                             for(i = 0; i < 8; i++)
2567                             {
2568                                 /*Sets the edges that lie on the slice/tile boundary*/
2569                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2570                                 {
2571                                     au1_avail_chroma[i] = 0;
2572                                 }
2573                             }
2574                         }
2575                     }
2576                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2577                     {
2578                         au1_avail_chroma[0] = 0;
2579                         au1_avail_chroma[4] = 0;
2580                         au1_avail_chroma[6] = 0;
2581                     }
2582 
2583                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2584                     {
2585                         au1_avail_chroma[1] = 0;
2586                         au1_avail_chroma[5] = 0;
2587                         au1_avail_chroma[7] = 0;
2588                     }
2589 
2590                     if(0 == ps_sao_ctxt->i4_ctb_y)
2591                     {
2592                         au1_avail_chroma[2] = 0;
2593                         au1_avail_chroma[4] = 0;
2594                         au1_avail_chroma[5] = 0;
2595                     }
2596 
2597                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2598                     {
2599                         au1_avail_chroma[3] = 0;
2600                         au1_avail_chroma[6] = 0;
2601                         au1_avail_chroma[7] = 0;
2602                     }
2603 
2604                     {
2605                         au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2606                         au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2607                         au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2608                         au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2609                         //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2610                         //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2611                         if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2612                         {
2613                             au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2614                             au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2615                         }
2616 
2617 
2618                         if(chroma_yuv420sp_vu)
2619                         {
2620                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2621                                                                                  src_strd,
2622                                                                                  pu1_src_left_chroma,
2623                                                                                  pu1_src_top_chroma,
2624                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2625                                                                                  au1_src_top_right,
2626                                                                                  au1_src_bot_left,
2627                                                                                  au1_avail_chroma,
2628                                                                                  ai1_offset_cr,
2629                                                                                  ai1_offset_cb,
2630                                                                                  sao_wd_chroma,
2631                                                                                  sao_ht_chroma);
2632                         }
2633                         else
2634                         {
2635                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2636                                                                                  src_strd,
2637                                                                                  pu1_src_left_chroma,
2638                                                                                  pu1_src_top_chroma,
2639                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2640                                                                                  au1_src_top_right,
2641                                                                                  au1_src_bot_left,
2642                                                                                  au1_avail_chroma,
2643                                                                                  ai1_offset_cb,
2644                                                                                  ai1_offset_cr,
2645                                                                                  sao_wd_chroma,
2646                                                                                  sao_ht_chroma);
2647                         }
2648                     }
2649 
2650                 }
2651             }
2652             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2653             {
2654                 for(row = 0; row < sao_ht_chroma; row++)
2655                 {
2656                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2657                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2658                 }
2659                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2660                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2661 
2662                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2663             }
2664 
2665         }
2666         pu1_src_luma += sao_wd_luma;
2667         pu1_src_chroma += sao_wd_chroma;
2668         ps_sao += 1;
2669     }
2670 
2671 
2672     /* Current CTB */
2673     {
2674         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2675         WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2676         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2677         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2678         WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2679         WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2680         WORD32 au4_idx_c[8], idx_c;
2681 
2682         WORD32 remaining_rows;
2683         WORD32 remaining_cols;
2684 
2685         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2686         if(remaining_cols <= SAO_SHIFT_CTB)
2687         {
2688             sao_wd_luma += remaining_cols;
2689         }
2690         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2691         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2692         {
2693             sao_wd_chroma += remaining_cols;
2694         }
2695 
2696         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2697         if(remaining_rows <= SAO_SHIFT_CTB)
2698         {
2699             sao_ht_luma += remaining_rows;
2700         }
2701         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2702         if(remaining_rows <= SAO_SHIFT_CTB)
2703         {
2704             sao_ht_chroma += remaining_rows;
2705         }
2706 
2707         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2708         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2709         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2710         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2711 
2712         if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2713         {
2714             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2715             {
2716                 if(0 == ps_sao->b3_y_type_idx)
2717                 {
2718                     /* Update left, top and top-left */
2719                     for(row = 0; row < sao_ht_luma; row++)
2720                     {
2721                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2722                     }
2723                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2724 
2725                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2726 
2727                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2728 
2729                 }
2730 
2731                 else if(1 == ps_sao->b3_y_type_idx)
2732                 {
2733                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2734                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2735                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2736                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2737 
2738                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2739                                                                               src_strd,
2740                                                                               pu1_src_left_luma,
2741                                                                               pu1_src_top_luma,
2742                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2743                                                                               ps_sao->b5_y_band_pos,
2744                                                                               ai1_offset_y,
2745                                                                               sao_wd_luma,
2746                                                                               sao_ht_luma
2747                                                                              );
2748                 }
2749 
2750                 else // if(2 <= ps_sao->b3_y_type_idx)
2751                 {
2752                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2753                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2754                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2755                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2756 
2757                     for(i = 0; i < 8; i++)
2758                     {
2759                         au1_avail_luma[i] = 255;
2760                         au1_tile_slice_boundary[i] = 0;
2761                         au4_idx_c[i] = 0;
2762                         au4_ilf_across_tile_slice_enable[i] = 1;
2763                     }
2764                     /******************************************************************
2765                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2766                      *
2767                      *
2768                      *          ____________
2769                      *         |    |       |
2770                      *         |    | C_T   |
2771                      *         |____|_______|____
2772                      *         |    |       |    |
2773                      *         | C_L|   C   | C_R|
2774                      *         |____|_______|    |
2775                      *              |  C_D       |
2776                      *              |            |
2777                      *              |____________|
2778                      *
2779                      *****************************************************************/
2780 
2781                     /*In case of slices*/
2782                     {
2783                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2784                         {
2785                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2786                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2787 
2788                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2789                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2790 
2791                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2792                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2793 
2794                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2795                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2796 
2797                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
2798                             ctby_c = ps_sao_ctxt->i4_ctb_y;
2799 
2800                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2801                             {
2802                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2803                                 {
2804                                     au4_idx_c[6] = -1;
2805                                     au4_idx_c[0] = -1;
2806                                     au4_idx_c[4] = -1;
2807                                 }
2808                                 else
2809                                 {
2810                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2811                                 }
2812 
2813                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2814                                 {
2815                                     au4_idx_c[2] = -1;
2816                                     au4_idx_c[5] = -1;
2817                                     au4_idx_c[4] = -1;
2818                                 }
2819                                 else
2820                                 {
2821                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2822                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2823                                 }
2824                                 idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2825                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2826                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2827 
2828                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2829                                 {
2830                                     au4_ilf_across_tile_slice_enable[6] = 0;
2831                                     au4_ilf_across_tile_slice_enable[0] = 0;
2832                                     au4_ilf_across_tile_slice_enable[4] = 0;
2833                                 }
2834                                 else
2835                                 {
2836                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2837                                     au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2838                                 }
2839                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2840                                 {
2841                                     au4_ilf_across_tile_slice_enable[2] = 0;
2842                                     au4_ilf_across_tile_slice_enable[4] = 0;
2843                                     au4_ilf_across_tile_slice_enable[5] = 0;
2844                                 }
2845                                 else
2846                                 {
2847                                     au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2848                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2849                                 }
2850                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2851                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2852                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2853 
2854                                 if(au4_idx_c[6] < idx_c)
2855                                 {
2856                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2857                                 }
2858 
2859                                 /*
2860                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2861                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2862                                  */
2863                                 for(i = 0; i < 8; i++)
2864                                 {
2865                                     /*Sets the edges that lie on the slice/tile boundary*/
2866                                     if(au4_idx_c[i] != idx_c)
2867                                     {
2868                                         au1_tile_slice_boundary[i] = 1;
2869                                     }
2870                                     else
2871                                     {
2872                                         au4_ilf_across_tile_slice_enable[i] = 1;
2873                                     }
2874                                 }
2875                                 /*Reset indices*/
2876                                 for(i = 0; i < 8; i++)
2877                                 {
2878                                     au4_idx_c[i] = 0;
2879                                 }
2880                             }
2881 
2882                             if(ps_pps->i1_tiles_enabled_flag)
2883                             {
2884                                 /* Calculate availability flags at slice boundary */
2885                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2886                                 {
2887                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2888                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2889                                     {
2890                                         if(0 == ps_sao_ctxt->i4_ctb_x)
2891                                         {
2892                                             au4_idx_c[6] = -1;
2893                                             au4_idx_c[0] = -1;
2894                                             au4_idx_c[4] = -1;
2895                                         }
2896                                         else
2897                                         {
2898                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2899                                         }
2900 
2901                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2902                                         {
2903                                             au4_idx_c[2] = -1;
2904                                             au4_idx_c[5] = -1;
2905                                             au4_idx_c[4] = -1;
2906                                         }
2907                                         else
2908                                         {
2909                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2910                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2911                                         }
2912                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2913                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2914                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2915 
2916                                         for(i = 0; i < 8; i++)
2917                                         {
2918                                             /*Sets the edges that lie on the slice/tile boundary*/
2919                                             if(au4_idx_c[i] != idx_c)
2920                                             {
2921                                                 au1_tile_slice_boundary[i] |= 1;
2922                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2923                                             }
2924                                         }
2925                                     }
2926                                 }
2927                             }
2928 
2929                             for(i = 0; i < 8; i++)
2930                             {
2931                                 /*Sets the edges that lie on the slice/tile boundary*/
2932                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2933                                 {
2934                                     au1_avail_luma[i] = 0;
2935                                 }
2936                             }
2937 
2938                         }
2939                     }
2940                     if(0 == ps_sao_ctxt->i4_ctb_x)
2941                     {
2942                         au1_avail_luma[0] = 0;
2943                         au1_avail_luma[4] = 0;
2944                         au1_avail_luma[6] = 0;
2945                     }
2946 
2947                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2948                     {
2949                         au1_avail_luma[1] = 0;
2950                         au1_avail_luma[5] = 0;
2951                         au1_avail_luma[7] = 0;
2952                     }
2953 
2954                     if(0 == ps_sao_ctxt->i4_ctb_y)
2955                     {
2956                         au1_avail_luma[2] = 0;
2957                         au1_avail_luma[4] = 0;
2958                         au1_avail_luma[5] = 0;
2959                     }
2960 
2961                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2962                     {
2963                         au1_avail_luma[3] = 0;
2964                         au1_avail_luma[6] = 0;
2965                         au1_avail_luma[7] = 0;
2966                     }
2967 
2968                     {
2969                         au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2970                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2971 
2972                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2973                                                                           src_strd,
2974                                                                           pu1_src_left_luma,
2975                                                                           pu1_src_top_luma,
2976                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2977                                                                           au1_src_top_right,
2978                                                                           &u1_sao_src_top_left_luma_bot_left,
2979                                                                           au1_avail_luma,
2980                                                                           ai1_offset_y,
2981                                                                           sao_wd_luma,
2982                                                                           sao_ht_luma);
2983                     }
2984                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2985                     pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2986                 }
2987             }
2988             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2989             {
2990                 /* Update left, top and top-left */
2991                 for(row = 0; row < sao_ht_luma; row++)
2992                 {
2993                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2994                 }
2995                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2996 
2997                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2998 
2999                 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
3000             }
3001         }
3002 
3003         if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
3004         {
3005             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3006             {
3007                 if(0 == ps_sao->b3_cb_type_idx)
3008                 {
3009                     for(row = 0; row < sao_ht_chroma; row++)
3010                     {
3011                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3012                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3013                     }
3014                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3015                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3016 
3017                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3018 
3019                     pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3020                     pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3021                 }
3022 
3023                 else if(1 == ps_sao->b3_cb_type_idx)
3024                 {
3025                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3026                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3027                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3028                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3029 
3030                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3031                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3032                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3033                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3034 
3035                     if(chroma_yuv420sp_vu)
3036                     {
3037                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3038                                                                                     src_strd,
3039                                                                                     pu1_src_left_chroma,
3040                                                                                     pu1_src_top_chroma,
3041                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
3042                                                                                     ps_sao->b5_cr_band_pos,
3043                                                                                     ps_sao->b5_cb_band_pos,
3044                                                                                     ai1_offset_cr,
3045                                                                                     ai1_offset_cb,
3046                                                                                     sao_wd_chroma,
3047                                                                                     sao_ht_chroma
3048                                                                                    );
3049                     }
3050                     else
3051                     {
3052                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3053                                                                                     src_strd,
3054                                                                                     pu1_src_left_chroma,
3055                                                                                     pu1_src_top_chroma,
3056                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
3057                                                                                     ps_sao->b5_cb_band_pos,
3058                                                                                     ps_sao->b5_cr_band_pos,
3059                                                                                     ai1_offset_cb,
3060                                                                                     ai1_offset_cr,
3061                                                                                     sao_wd_chroma,
3062                                                                                     sao_ht_chroma
3063                                                                                    );
3064                     }
3065                 }
3066 
3067                 else // if(2 <= ps_sao->b3_cb_type_idx)
3068                 {
3069                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3070                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3071                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3072                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3073 
3074                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3075                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3076                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3077                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3078 
3079                     for(i = 0; i < 8; i++)
3080                     {
3081                         au1_avail_chroma[i] = 255;
3082                         au1_tile_slice_boundary[i] = 0;
3083                         au4_idx_c[i] = 0;
3084                         au4_ilf_across_tile_slice_enable[i] = 1;
3085                     }
3086                     {
3087                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3088                         {
3089                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3090                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3091 
3092                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3093                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3094 
3095                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3096                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3097 
3098                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3099                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3100 
3101                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
3102                             ctby_c = ps_sao_ctxt->i4_ctb_y;
3103 
3104                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3105                             {
3106                                 if(0 == ps_sao_ctxt->i4_ctb_x)
3107                                 {
3108                                     au4_idx_c[0] = -1;
3109                                     au4_idx_c[4] = -1;
3110                                     au4_idx_c[6] = -1;
3111                                 }
3112                                 else
3113                                 {
3114                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3115                                 }
3116 
3117                                 if(0 == ps_sao_ctxt->i4_ctb_y)
3118                                 {
3119                                     au4_idx_c[2] = -1;
3120                                     au4_idx_c[4] = -1;
3121                                     au4_idx_c[5] = -1;
3122                                 }
3123                                 else
3124                                 {
3125                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3126                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3127                                 }
3128                                 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3129                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3130                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3131 
3132                                 if(0 == ps_sao_ctxt->i4_ctb_x)
3133                                 {
3134                                     au4_ilf_across_tile_slice_enable[0] = 0;
3135                                     au4_ilf_across_tile_slice_enable[4] = 0;
3136                                     au4_ilf_across_tile_slice_enable[6] = 0;
3137                                 }
3138                                 else
3139                                 {
3140                                     au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3141                                     au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3142                                 }
3143 
3144                                 if(0 == ps_sao_ctxt->i4_ctb_y)
3145                                 {
3146                                     au4_ilf_across_tile_slice_enable[2] = 0;
3147                                     au4_ilf_across_tile_slice_enable[4] = 0;
3148                                     au4_ilf_across_tile_slice_enable[5] = 0;
3149                                 }
3150                                 else
3151                                 {
3152                                     au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3153                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3154                                 }
3155 
3156                                 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3157                                 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3158                                 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3159 
3160                                 if(idx_c > au4_idx_c[6])
3161                                 {
3162                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3163                                 }
3164 
3165                                 /*
3166                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3167                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
3168                                  */
3169                                 for(i = 0; i < 8; i++)
3170                                 {
3171                                     /*Sets the edges that lie on the slice/tile boundary*/
3172                                     if(au4_idx_c[i] != idx_c)
3173                                     {
3174                                         au1_tile_slice_boundary[i] = 1;
3175                                     }
3176                                     else
3177                                     {
3178                                         au4_ilf_across_tile_slice_enable[i] = 1;
3179                                     }
3180                                 }
3181                                 /*Reset indices*/
3182                                 for(i = 0; i < 8; i++)
3183                                 {
3184                                     au4_idx_c[i] = 0;
3185                                 }
3186                             }
3187 
3188                             if(ps_pps->i1_tiles_enabled_flag)
3189                             {
3190                                 /* Calculate availability flags at slice boundary */
3191                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3192                                 {
3193                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3194                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3195                                     {
3196                                         if(0 == ps_sao_ctxt->i4_ctb_x)
3197                                         {
3198                                             au4_idx_c[6] = -1;
3199                                             au4_idx_c[0] = -1;
3200                                             au4_idx_c[4] = -1;
3201                                         }
3202                                         else
3203                                         {
3204                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3205                                         }
3206 
3207                                         if(0 == ps_sao_ctxt->i4_ctb_y)
3208                                         {
3209                                             au4_idx_c[2] = -1;
3210                                             au4_idx_c[5] = -1;
3211                                             au4_idx_c[4] = -1;
3212                                         }
3213                                         else
3214                                         {
3215                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3216                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3217                                         }
3218                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3219                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3220                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3221 
3222                                         for(i = 0; i < 8; i++)
3223                                         {
3224                                             /*Sets the edges that lie on the slice/tile boundary*/
3225                                             if(au4_idx_c[i] != idx_c)
3226                                             {
3227                                                 au1_tile_slice_boundary[i] |= 1;
3228                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3229                                             }
3230                                         }
3231                                     }
3232                                 }
3233                             }
3234 
3235                             for(i = 0; i < 8; i++)
3236                             {
3237                                 /*Sets the edges that lie on the slice/tile boundary*/
3238                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3239                                 {
3240                                     au1_avail_chroma[i] = 0;
3241                                 }
3242                             }
3243                         }
3244                     }
3245 
3246                     if(0 == ps_sao_ctxt->i4_ctb_x)
3247                     {
3248                         au1_avail_chroma[0] = 0;
3249                         au1_avail_chroma[4] = 0;
3250                         au1_avail_chroma[6] = 0;
3251                     }
3252 
3253                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3254                     {
3255                         au1_avail_chroma[1] = 0;
3256                         au1_avail_chroma[5] = 0;
3257                         au1_avail_chroma[7] = 0;
3258                     }
3259 
3260                     if(0 == ps_sao_ctxt->i4_ctb_y)
3261                     {
3262                         au1_avail_chroma[2] = 0;
3263                         au1_avail_chroma[4] = 0;
3264                         au1_avail_chroma[5] = 0;
3265                     }
3266 
3267                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3268                     {
3269                         au1_avail_chroma[3] = 0;
3270                         au1_avail_chroma[6] = 0;
3271                         au1_avail_chroma[7] = 0;
3272                     }
3273 
3274                     {
3275                         au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3276                         au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3277 
3278                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3279                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3280 
3281                         if(chroma_yuv420sp_vu)
3282                         {
3283                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3284                                                                                  src_strd,
3285                                                                                  pu1_src_left_chroma,
3286                                                                                  pu1_src_top_chroma,
3287                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3288                                                                                  au1_src_top_right,
3289                                                                                  au1_sao_src_top_left_chroma_bot_left,
3290                                                                                  au1_avail_chroma,
3291                                                                                  ai1_offset_cr,
3292                                                                                  ai1_offset_cb,
3293                                                                                  sao_wd_chroma,
3294                                                                                  sao_ht_chroma);
3295                         }
3296                         else
3297                         {
3298                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3299                                                                                  src_strd,
3300                                                                                  pu1_src_left_chroma,
3301                                                                                  pu1_src_top_chroma,
3302                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3303                                                                                  au1_src_top_right,
3304                                                                                  au1_sao_src_top_left_chroma_bot_left,
3305                                                                                  au1_avail_chroma,
3306                                                                                  ai1_offset_cb,
3307                                                                                  ai1_offset_cr,
3308                                                                                  sao_wd_chroma,
3309                                                                                  sao_ht_chroma);
3310                         }
3311                     }
3312 
3313                 }
3314                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3315                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3316 
3317                 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3318                 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3319             }
3320             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3321             {
3322                 for(row = 0; row < sao_ht_chroma; row++)
3323                 {
3324                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3325                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3326                 }
3327                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3328                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3329 
3330                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3331 
3332                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3333                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3334             }
3335 
3336         }
3337     }
3338 
3339 
3340 
3341 
3342 /* If no loop filter is enabled copy the backed up values */
3343     {
3344         /* Luma */
3345         if(no_loop_filter_enabled_luma)
3346         {
3347             UWORD32 u4_no_loop_filter_flag;
3348             WORD32 loop_filter_bit_pos;
3349             WORD32 log2_min_cu = 3;
3350             WORD32 min_cu = (1 << log2_min_cu);
3351             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3352             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3353             WORD32 sao_blk_wd = ctb_size;
3354             WORD32 remaining_rows;
3355             WORD32 remaining_cols;
3356 
3357             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3358             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3359             if(remaining_rows <= SAO_SHIFT_CTB)
3360                 sao_blk_ht += remaining_rows;
3361             if(remaining_cols <= SAO_SHIFT_CTB)
3362                 sao_blk_wd += remaining_cols;
3363 
3364             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3365             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3366 
3367             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3368 
3369             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3370                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3371             if(ps_sao_ctxt->i4_ctb_x > 0)
3372                 loop_filter_bit_pos -= 1;
3373 
3374             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3375                             (loop_filter_bit_pos >> 3);
3376 
3377             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3378                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3379             {
3380                 WORD32 tmp_wd = sao_blk_wd;
3381 
3382                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3383                                 (loop_filter_bit_pos & 7);
3384                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3385 
3386                 if(u4_no_loop_filter_flag)
3387                 {
3388                     while(tmp_wd > 0)
3389                     {
3390                         if(CTZ(u4_no_loop_filter_flag))
3391                         {
3392                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3393                             pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3394                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3395                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3396                         }
3397                         else
3398                         {
3399                             for(row = 0; row < min_cu; row++)
3400                             {
3401                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3402                                 {
3403                                     pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3404                                 }
3405                             }
3406                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3407                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3408                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3409                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3410                         }
3411                     }
3412 
3413                     pu1_src_tmp_luma -= sao_blk_wd;
3414                     pu1_src_backup_luma -= sao_blk_wd;
3415                 }
3416 
3417                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3418                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3419             }
3420         }
3421 
3422         /* Chroma */
3423         if(no_loop_filter_enabled_chroma)
3424         {
3425             UWORD32 u4_no_loop_filter_flag;
3426             WORD32 loop_filter_bit_pos;
3427             WORD32 log2_min_cu = 3;
3428             WORD32 min_cu = (1 << log2_min_cu);
3429             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3430             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3431             WORD32 sao_blk_wd = ctb_size;
3432             WORD32 remaining_rows;
3433             WORD32 remaining_cols;
3434 
3435             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3436             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3437             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3438                 sao_blk_ht += remaining_rows;
3439             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3440                 sao_blk_wd += remaining_cols;
3441 
3442             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3443             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3444 
3445             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3446 
3447             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3448                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3449             if(ps_sao_ctxt->i4_ctb_x > 0)
3450                 loop_filter_bit_pos -= 2;
3451 
3452             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3453                             (loop_filter_bit_pos >> 3);
3454 
3455             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3456                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3457             {
3458                 WORD32 tmp_wd = sao_blk_wd;
3459 
3460                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3461                                 (loop_filter_bit_pos & 7);
3462                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3463 
3464                 if(u4_no_loop_filter_flag)
3465                 {
3466                     while(tmp_wd > 0)
3467                     {
3468                         if(CTZ(u4_no_loop_filter_flag))
3469                         {
3470                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3471                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3472                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3473                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3474                         }
3475                         else
3476                         {
3477                             for(row = 0; row < min_cu / 2; row++)
3478                             {
3479                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3480                                 {
3481                                     pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3482                                 }
3483                             }
3484 
3485                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3486                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3487                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3488                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3489                         }
3490                     }
3491 
3492                     pu1_src_tmp_chroma -= sao_blk_wd;
3493                     pu1_src_backup_chroma -= sao_blk_wd;
3494                 }
3495 
3496                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3497                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3498             }
3499         }
3500     }
3501 
3502 }
3503 
3504