1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_sao.c
22  *
23  * @brief
24  *  Contains function definitions for sample adaptive offset process
25  *
26  * @author
27  *  Srinivas T
28  *
29  * @par List of Functions:
30  *
31  * @remarks
32  *  None
33  *
34  *******************************************************************************
35  */
36 
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48 
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58 
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61 
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71 
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77 
78 #define SAO_SHIFT_CTB    8
79 
80 /**
81  * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82  */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86     UWORD8 *pu1_src_luma;
87     UWORD8 *pu1_src_chroma;
88     WORD32 src_strd;
89     WORD32 ctb_size;
90     WORD32 log2_ctb_size;
91     sps_t *ps_sps;
92     sao_t *ps_sao;
93     WORD32 row, col;
94     UWORD8 au1_avail_luma[8];
95     UWORD8 au1_avail_chroma[8];
96     WORD32 i;
97     UWORD8 *pu1_src_top_luma;
98     UWORD8 *pu1_src_top_chroma;
99     UWORD8 *pu1_src_left_luma;
100     UWORD8 *pu1_src_left_chroma;
101     UWORD8 au1_src_top_right[2];
102     UWORD8 au1_src_bot_left[2];
103     UWORD8 *pu1_no_loop_filter_flag;
104     WORD32 loop_filter_strd;
105 
106     /* Only first 5 values are used, but arrays are large
107      enough so that SIMD functions can read 64 bits at a time */
108     WORD8 ai1_offset_y[8] = {0};
109     WORD8 ai1_offset_cb[8] = {0};
110     WORD8 ai1_offset_cr[8] = {0};
111 
112     PROFILE_DISABLE_SAO();
113 
114     ps_sps = ps_sao_ctxt->ps_sps;
115     log2_ctb_size = ps_sps->i1_log2_ctb_size;
116     ctb_size = (1 << log2_ctb_size);
117     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120 
121     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123 
124     /* Current CTB */
125     {
126         WORD32 sao_wd_luma;
127         WORD32 sao_wd_chroma;
128         WORD32 sao_ht_luma;
129         WORD32 sao_ht_chroma;
130 
131         WORD32 remaining_rows;
132         WORD32 remaining_cols;
133 
134         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135         sao_wd_luma = MIN(ctb_size, remaining_cols);
136         sao_wd_chroma = MIN(ctb_size, remaining_cols);
137 
138         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139         sao_ht_luma = MIN(ctb_size, remaining_rows);
140         sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141 
142         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146 
147         pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148                         ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149                         ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150 
151         ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152         ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153         ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154         ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155 
156         ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157         ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158         ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159         ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160 
161         ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162         ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163         ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164         ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165 
166         for(i = 0; i < 8; i++)
167         {
168             au1_avail_luma[i] = 255;
169             au1_avail_chroma[i] = 255;
170         }
171 
172 
173         if(0 == ps_sao_ctxt->i4_ctb_x)
174         {
175             au1_avail_luma[0] = 0;
176             au1_avail_luma[4] = 0;
177             au1_avail_luma[6] = 0;
178 
179             au1_avail_chroma[0] = 0;
180             au1_avail_chroma[4] = 0;
181             au1_avail_chroma[6] = 0;
182         }
183 
184         if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185         {
186             au1_avail_luma[1] = 0;
187             au1_avail_luma[5] = 0;
188             au1_avail_luma[7] = 0;
189 
190             au1_avail_chroma[1] = 0;
191             au1_avail_chroma[5] = 0;
192             au1_avail_chroma[7] = 0;
193         }
194 
195         if(0 == ps_sao_ctxt->i4_ctb_y)
196         {
197             au1_avail_luma[2] = 0;
198             au1_avail_luma[4] = 0;
199             au1_avail_luma[5] = 0;
200 
201             au1_avail_chroma[2] = 0;
202             au1_avail_chroma[4] = 0;
203             au1_avail_chroma[5] = 0;
204         }
205 
206         if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207         {
208             au1_avail_luma[3] = 0;
209             au1_avail_luma[6] = 0;
210             au1_avail_luma[7] = 0;
211 
212             au1_avail_chroma[3] = 0;
213             au1_avail_chroma[6] = 0;
214             au1_avail_chroma[7] = 0;
215         }
216 
217 
218         if(0 == ps_sao->b3_y_type_idx)
219         {
220             /* Update left, top and top-left */
221             for(row = 0; row < sao_ht_luma; row++)
222             {
223                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224             }
225             ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226 
227             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228 
229         }
230         else
231         {
232             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234             WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235             WORD32 no_loop_filter_enabled = 0;
236 
237             /* Check the loop filter flags and copy the original values for back up */
238             {
239                 UWORD32 u4_no_loop_filter_flag;
240                 WORD32 min_cu = 8;
241                 UWORD8 *pu1_src_tmp = pu1_src_luma;
242 
243                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244                 {
245                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246                                     ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248 
249                     if(u4_no_loop_filter_flag)
250                     {
251                         WORD32 tmp_wd = sao_wd_luma;
252                         no_loop_filter_enabled = 1;
253                         while(tmp_wd > 0)
254                         {
255                             if(CTZ(u4_no_loop_filter_flag))
256                             {
257                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
258                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261                             }
262                             else
263                             {
264                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265                                 {
266                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267                                     {
268                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269                                     }
270                                 }
271 
272                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
273                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276                             }
277                         }
278 
279                         pu1_src_tmp -= sao_wd_luma;
280                     }
281 
282                     pu1_src_tmp += min_cu * src_strd;
283                     pu1_src_copy += min_cu * tmp_strd;
284                 }
285             }
286 
287             if(1 == ps_sao->b3_y_type_idx)
288             {
289                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290                                                                           src_strd,
291                                                                           pu1_src_left_luma,
292                                                                           pu1_src_top_luma,
293                                                                           ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294                                                                           ps_sao->b5_y_band_pos,
295                                                                           ai1_offset_y,
296                                                                           sao_wd_luma,
297                                                                           sao_ht_luma);
298             }
299             else // if(2 <= ps_sao->b3_y_type_idx)
300             {
301                 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302                 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303                 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304                                                                   src_strd,
305                                                                   pu1_src_left_luma,
306                                                                   pu1_src_top_luma,
307                                                                   ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308                                                                   au1_src_top_right,
309                                                                   au1_src_bot_left,
310                                                                   au1_avail_luma,
311                                                                   ai1_offset_y,
312                                                                   sao_wd_luma,
313                                                                   sao_ht_luma);
314             }
315 
316             /* Check the loop filter flags and copy the original values back if they are set */
317             if(no_loop_filter_enabled)
318             {
319                 UWORD32 u4_no_loop_filter_flag;
320                 WORD32 min_cu = 8;
321                 UWORD8 *pu1_src_tmp = pu1_src_luma;
322 
323                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324                 {
325                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327 
328                     if(u4_no_loop_filter_flag)
329                     {
330                         WORD32 tmp_wd = sao_wd_luma;
331                         while(tmp_wd > 0)
332                         {
333                             if(CTZ(u4_no_loop_filter_flag))
334                             {
335                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
336                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338                                 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339                             }
340                             else
341                             {
342                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343                                 {
344                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345                                     {
346                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347                                     }
348                                 }
349 
350                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
351                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354                             }
355                         }
356 
357                         pu1_src_tmp -= sao_wd_luma;
358                     }
359 
360                     pu1_src_tmp += min_cu * src_strd;
361                     pu1_src_copy += min_cu * tmp_strd;
362                 }
363             }
364 
365         }
366 
367         if(0 == ps_sao->b3_cb_type_idx)
368         {
369             for(row = 0; row < sao_ht_chroma; row++)
370             {
371                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373             }
374             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376 
377             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378         }
379         else
380         {
381             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383             WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384             WORD32 no_loop_filter_enabled = 0;
385 
386             /* Check the loop filter flags and copy the original values for back up */
387             {
388                 UWORD32 u4_no_loop_filter_flag;
389                 WORD32 min_cu = 4;
390                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
391 
392                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393                 {
394                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396 
397                     if(u4_no_loop_filter_flag)
398                     {
399                         WORD32 tmp_wd = sao_wd_chroma;
400                         no_loop_filter_enabled = 1;
401                         while(tmp_wd > 0)
402                         {
403                             if(CTZ(u4_no_loop_filter_flag))
404                             {
405                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
406                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408                                 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409                             }
410                             else
411                             {
412                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413                                 {
414                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415                                     {
416                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417                                     }
418                                 }
419 
420                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
421                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424                             }
425                         }
426 
427                         pu1_src_tmp -= sao_wd_chroma;
428                     }
429 
430                     pu1_src_tmp += min_cu * src_strd;
431                     pu1_src_copy += min_cu * tmp_strd;
432                 }
433             }
434 
435             if(1 == ps_sao->b3_cb_type_idx)
436             {
437                 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438                                                                             src_strd,
439                                                                             pu1_src_left_chroma,
440                                                                             pu1_src_top_chroma,
441                                                                             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442                                                                             ps_sao->b5_cb_band_pos,
443                                                                             ps_sao->b5_cr_band_pos,
444                                                                             ai1_offset_cb,
445                                                                             ai1_offset_cr,
446                                                                             sao_wd_chroma,
447                                                                             sao_ht_chroma
448                                                                            );
449             }
450             else // if(2 <= ps_sao->b3_cb_type_idx)
451             {
452                 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453                 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454                 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455                 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456                 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457                                                                      src_strd,
458                                                                      pu1_src_left_chroma,
459                                                                      pu1_src_top_chroma,
460                                                                      ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461                                                                      au1_src_top_right,
462                                                                      au1_src_bot_left,
463                                                                      au1_avail_chroma,
464                                                                      ai1_offset_cb,
465                                                                      ai1_offset_cr,
466                                                                      sao_wd_chroma,
467                                                                      sao_ht_chroma);
468             }
469 
470             /* Check the loop filter flags and copy the original values back if they are set */
471             if(no_loop_filter_enabled)
472             {
473                 UWORD32 u4_no_loop_filter_flag;
474                 WORD32 min_cu = 4;
475                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
476 
477                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478                 {
479                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481 
482                     if(u4_no_loop_filter_flag)
483                     {
484                         WORD32 tmp_wd = sao_wd_chroma;
485                         while(tmp_wd > 0)
486                         {
487                             if(CTZ(u4_no_loop_filter_flag))
488                             {
489                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
490                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492                                 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493                             }
494                             else
495                             {
496                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497                                 {
498                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499                                     {
500                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501                                     }
502                                 }
503 
504                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
505                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508                             }
509                         }
510 
511                         pu1_src_tmp -= sao_wd_chroma;
512                     }
513 
514                     pu1_src_tmp += min_cu * src_strd;
515                     pu1_src_copy += min_cu * tmp_strd;
516                 }
517             }
518 
519         }
520 
521     }
522 }
523 
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)524 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525 {
526     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527     UWORD8 *pu1_src_luma;
528     UWORD8 *pu1_src_chroma;
529     WORD32 src_strd;
530     WORD32 ctb_size;
531     WORD32 log2_ctb_size;
532     sps_t *ps_sps;
533     sao_t *ps_sao;
534     pps_t *ps_pps;
535     slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536     tile_t *ps_tile;
537     UWORD16 *pu1_slice_idx;
538     UWORD16 *pu1_tile_idx;
539     WORD32 row, col;
540     UWORD8 au1_avail_luma[8];
541     UWORD8 au1_avail_chroma[8];
542     UWORD8 au1_tile_slice_boundary[8];
543     UWORD8 au4_ilf_across_tile_slice_enable[8];
544     WORD32 i;
545     UWORD8 *pu1_src_top_luma;
546     UWORD8 *pu1_src_top_chroma;
547     UWORD8 *pu1_src_left_luma;
548     UWORD8 *pu1_src_left_chroma;
549     UWORD8 au1_src_top_right[2];
550     UWORD8 au1_src_bot_left[2];
551     UWORD8 *pu1_no_loop_filter_flag;
552     UWORD8 *pu1_src_backup_luma;
553     UWORD8 *pu1_src_backup_chroma;
554     WORD32 backup_strd;
555     WORD32 loop_filter_strd;
556 
557     WORD32 no_loop_filter_enabled_luma = 0;
558     WORD32 no_loop_filter_enabled_chroma = 0;
559     UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560     UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561     UWORD8 *pu1_sao_src_luma_top_left_ctb;
562     UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563     UWORD8 *pu1_sao_src_top_left_luma_top_right;
564     UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565     UWORD8  u1_sao_src_top_left_luma_bot_left;
566     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
567     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569     /* Only first 5 values are used, but arrays are large
570      enough so that SIMD functions can read 64 bits at a time */
571     WORD8 ai1_offset_y[8] = {0};
572     WORD8 ai1_offset_cb[8] = {0};
573     WORD8 ai1_offset_cr[8] = {0};
574     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575 
576     PROFILE_DISABLE_SAO();
577 
578     ps_sps = ps_sao_ctxt->ps_sps;
579     ps_pps = ps_sao_ctxt->ps_pps;
580     ps_tile = ps_sao_ctxt->ps_tile;
581 
582     log2_ctb_size = ps_sps->i1_log2_ctb_size;
583     ctb_size = (1 << log2_ctb_size);
584     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585     ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586     ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587 
588     pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589     pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592 
593     /*Stores the left value for each row ctbs- Needed for column tiles*/
594     pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595     pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596     pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597     pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598     u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599     pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600     au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601     pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602     pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603     pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604 
605     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607     backup_strd = 2 * MAX_CTB_SIZE;
608 
609     DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610 
611     {
612         /* Check the loop filter flags and copy the original values for back up */
613         /* Luma */
614 
615         /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616          * can belong to different slice with their own sao_enable flag */
617         {
618             UWORD32 u4_no_loop_filter_flag;
619             WORD32 loop_filter_bit_pos;
620             WORD32 log2_min_cu = 3;
621             WORD32 min_cu = (1 << log2_min_cu);
622             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624             WORD32 sao_blk_wd = ctb_size;
625             WORD32 remaining_rows;
626             WORD32 remaining_cols;
627 
628             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630             if(remaining_rows <= SAO_SHIFT_CTB)
631                 sao_blk_ht += remaining_rows;
632             if(remaining_cols <= SAO_SHIFT_CTB)
633                 sao_blk_wd += remaining_cols;
634 
635             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637 
638             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639 
640             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642             if(ps_sao_ctxt->i4_ctb_x > 0)
643                 loop_filter_bit_pos -= 1;
644 
645             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646                             (loop_filter_bit_pos >> 3);
647 
648             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650             {
651                 WORD32 tmp_wd = sao_blk_wd;
652 
653                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654                                 (loop_filter_bit_pos & 7);
655                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656 
657                 if(u4_no_loop_filter_flag)
658                 {
659                     no_loop_filter_enabled_luma = 1;
660                     while(tmp_wd > 0)
661                     {
662                         if(CTZ(u4_no_loop_filter_flag))
663                         {
664                             pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665                             pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
668                         }
669                         else
670                         {
671                             for(row = 0; row < min_cu; row++)
672                             {
673                                 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674                                 {
675                                     pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676                                 }
677                             }
678                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
682                         }
683                     }
684 
685                     pu1_src_tmp_luma -= sao_blk_wd;
686                     pu1_src_backup_luma -= sao_blk_wd;
687                 }
688 
689                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
690                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
691             }
692         }
693 
694         /* Chroma */
695 
696         {
697             UWORD32 u4_no_loop_filter_flag;
698             WORD32 loop_filter_bit_pos;
699             WORD32 log2_min_cu = 3;
700             WORD32 min_cu = (1 << log2_min_cu);
701             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
702             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
703             WORD32 sao_blk_wd = ctb_size;
704             WORD32 remaining_rows;
705             WORD32 remaining_cols;
706 
707             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
708             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
710                 sao_blk_ht += remaining_rows;
711             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
712                 sao_blk_wd += remaining_cols;
713 
714             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
715             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
716 
717             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
718 
719             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
720                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
721             if(ps_sao_ctxt->i4_ctb_x > 0)
722                 loop_filter_bit_pos -= 2;
723 
724             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
725                             (loop_filter_bit_pos >> 3);
726 
727             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
728                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
729             {
730                 WORD32 tmp_wd = sao_blk_wd;
731 
732                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
733                                 (loop_filter_bit_pos & 7);
734                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
735 
736                 if(u4_no_loop_filter_flag)
737                 {
738                     no_loop_filter_enabled_chroma = 1;
739                     while(tmp_wd > 0)
740                     {
741                         if(CTZ(u4_no_loop_filter_flag))
742                         {
743                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
744                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
746                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
747                         }
748                         else
749                         {
750                             for(row = 0; row < min_cu / 2; row++)
751                             {
752                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
753                                 {
754                                     pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
755                                 }
756                             }
757 
758                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
759                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
761                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
762                         }
763                     }
764 
765                     pu1_src_tmp_chroma -= sao_blk_wd;
766                     pu1_src_backup_chroma -= sao_blk_wd;
767                 }
768 
769                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
770                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
771             }
772         }
773     }
774 
775     DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
776 
777     /* Top-left CTB */
778     if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
779     {
780         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
781         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
782         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
783         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
784 
785         WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
786         WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
787         WORD32 au4_idx_tl[8], idx_tl;
788 
789         slice_header_t *ps_slice_hdr_top_left;
790         {
791             WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
792                                         (ps_sao_ctxt->i4_ctb_x - 1);
793             ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
794         }
795 
796 
797         pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
798         pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
799         ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
800         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
801         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
802         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
803         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
804 
805         if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
806         {
807             if(0 == ps_sao->b3_y_type_idx)
808             {
809                 /* Update left, top and top-left */
810                 for(row = 0; row < sao_ht_luma; row++)
811                 {
812                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
813                 }
814                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
815 
816                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
817 
818 
819             }
820 
821             else if(1 == ps_sao->b3_y_type_idx)
822             {
823                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
824                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
825                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
826                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
827 
828                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
829                                                                           src_strd,
830                                                                           pu1_src_left_luma,
831                                                                           pu1_src_top_luma,
832                                                                           pu1_sao_src_luma_top_left_ctb,
833                                                                           ps_sao->b5_y_band_pos,
834                                                                           ai1_offset_y,
835                                                                           sao_wd_luma,
836                                                                           sao_ht_luma
837                                                                          );
838             }
839 
840             else // if(2 <= ps_sao->b3_y_type_idx)
841             {
842                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
843                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
844                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
845                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
846 
847                 for(i = 0; i < 8; i++)
848                 {
849                     au1_avail_luma[i] = 255;
850                     au1_tile_slice_boundary[i] = 0;
851                     au4_idx_tl[i] = 0;
852                     au4_ilf_across_tile_slice_enable[i] = 1;
853                 }
854 
855                 /******************************************************************
856                  * Derive the  Top-left CTB's neighbor pixel's slice indices.
857                  *
858                  *          TL_T
859                  *       4  _2__5________
860                  *     0   |    |       |
861                  *    TL_L | TL | 1 TL_R|
862                  *         |____|_______|____
863                  *        6|TL_D|7      |    |
864                  *         | 3  |       |    |
865                  *         |____|_______|    |
866                  *              |            |
867                  *              |            |
868                  *              |____________|
869                  *
870                  *****************************************************************/
871 
872                 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
873                 {
874                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
875                     {
876                         {
877                             /*Assuming that sao shift is uniform along x and y directions*/
878                             if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
879                             {
880                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
881                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
882                             }
883                             else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
884                             {
885                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
886                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
887                             }
888                             ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
889                             ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
890 
891                             ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
892                             ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
893 
894                             ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
895                             ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
896 
897                             ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
898                             ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
899                         }
900 
901                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
902                         {
903                             /*Calculate slice indices for neighbor pixels*/
904                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
905                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
906                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
907                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
908                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
909                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910 
911                             if((0 == (1 << log2_ctb_size) - sao_wd_luma))
912                             {
913                                 if(ps_sao_ctxt->i4_ctb_x == 1)
914                                 {
915                                     au4_idx_tl[6] = -1;
916                                     au4_idx_tl[4] = -1;
917                                 }
918                                 else
919                                 {
920                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
921                                 }
922                                 if(ps_sao_ctxt->i4_ctb_y == 1)
923                                 {
924                                     au4_idx_tl[5] = -1;
925                                     au4_idx_tl[4] = -1;
926                                 }
927                                 else
928                                 {
929                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
930                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
931                                 }
932                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
933                             }
934 
935                             /* Verify that the neighbor ctbs dont cross pic boundary.
936                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
937                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
938                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
939                              * the respective pixel's flags are checked
940                              */
941 
942                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
943                             {
944                                 au4_ilf_across_tile_slice_enable[4] = 0;
945                                 au4_ilf_across_tile_slice_enable[6] = 0;
946                             }
947                             else
948                             {
949                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
950                             }
951                             if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
952                             {
953                                 au4_ilf_across_tile_slice_enable[5] = 0;
954                                 au4_ilf_across_tile_slice_enable[4] = 0;
955                             }
956                             else
957                             {
958                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960                             }
961                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
962                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
964                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
965                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
966 
967                             if(au4_idx_tl[5] > idx_tl)
968                             {
969                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
970                             }
971 
972                             /*
973                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
974                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
975                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
976                              * the respective pixel's flags are checked
977                              */
978                             for(i = 0; i < 8; i++)
979                             {
980                                 /*Sets the edges that lie on the slice/tile boundary*/
981                                 if(au4_idx_tl[i] != idx_tl)
982                                 {
983                                     au1_tile_slice_boundary[i] = 1;
984                                 }
985                                 else
986                                 {
987                                     au4_ilf_across_tile_slice_enable[i] = 1;
988                                 }
989                             }
990 
991                             ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
992                         }
993 
994                         if(ps_pps->i1_tiles_enabled_flag)
995                         {
996                             /* Calculate availability flags at slice boundary */
997                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
998                             {
999                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1000                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1001                                 {
1002                                     /*Set the boundary arrays*/
1003                                     /*Calculate tile indices for neighbor pixels*/
1004                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1005                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1006                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1007                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1008                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1009                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010 
1011                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1012                                     {
1013                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1014                                         {
1015                                             au4_idx_tl[6] = -1;
1016                                             au4_idx_tl[4] = -1;
1017                                         }
1018                                         else
1019                                         {
1020                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1021                                         }
1022                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1023                                         {
1024                                             au4_idx_tl[5] = -1;
1025                                             au4_idx_tl[4] = -1;
1026                                         }
1027                                         else
1028                                         {
1029                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1030                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1031                                         }
1032                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1033                                     }
1034                                     for(i = 0; i < 8; i++)
1035                                     {
1036                                         /*Sets the edges that lie on the tile boundary*/
1037                                         if(au4_idx_tl[i] != idx_tl)
1038                                         {
1039                                             au1_tile_slice_boundary[i] |= 1;
1040                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1041                                         }
1042                                     }
1043                                 }
1044                             }
1045                         }
1046 
1047 
1048                         /*Set availability flags based on tile and slice boundaries*/
1049                         for(i = 0; i < 8; i++)
1050                         {
1051                             /*Sets the edges that lie on the slice/tile boundary*/
1052                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1053                             {
1054                                 au1_avail_luma[i] = 0;
1055                             }
1056                         }
1057                     }
1058                 }
1059 
1060                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1061                 {
1062                     au1_avail_luma[0] = 0;
1063                     au1_avail_luma[4] = 0;
1064                     au1_avail_luma[6] = 0;
1065                 }
1066 
1067                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1068                 {
1069                     au1_avail_luma[1] = 0;
1070                     au1_avail_luma[5] = 0;
1071                     au1_avail_luma[7] = 0;
1072                 }
1073                 //y==1 case
1074                 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1075                 {
1076                     au1_avail_luma[2] = 0;
1077                     au1_avail_luma[4] = 0;
1078                     au1_avail_luma[5] = 0;
1079                 }
1080                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1081                 {
1082                     au1_avail_luma[3] = 0;
1083                     au1_avail_luma[6] = 0;
1084                     au1_avail_luma[7] = 0;
1085                 }
1086 
1087                 {
1088                     au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1089                     u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1090                     ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1091                                                                       src_strd,
1092                                                                       pu1_src_left_luma,
1093                                                                       pu1_src_top_luma,
1094                                                                       pu1_sao_src_luma_top_left_ctb,
1095                                                                       au1_src_top_right,
1096                                                                       &u1_sao_src_top_left_luma_bot_left,
1097                                                                       au1_avail_luma,
1098                                                                       ai1_offset_y,
1099                                                                       sao_wd_luma,
1100                                                                       sao_ht_luma);
1101                 }
1102             }
1103 
1104         }
1105         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1106         {
1107             /* Update left, top and top-left */
1108             for(row = 0; row < sao_ht_luma; row++)
1109             {
1110                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1111             }
1112             pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1113 
1114             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1115         }
1116 
1117         if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1118         {
1119             if(0 == ps_sao->b3_cb_type_idx)
1120             {
1121                 for(row = 0; row < sao_ht_chroma; row++)
1122                 {
1123                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1124                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1125                 }
1126                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1127                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1128 
1129                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1130 
1131             }
1132 
1133             else if(1 == ps_sao->b3_cb_type_idx)
1134             {
1135                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1136                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1137                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1138                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1139 
1140                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1141                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1142                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1143                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1144 
1145                 if(chroma_yuv420sp_vu)
1146                 {
1147                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1148                                                                                 src_strd,
1149                                                                                 pu1_src_left_chroma,
1150                                                                                 pu1_src_top_chroma,
1151                                                                                 pu1_sao_src_chroma_top_left_ctb,
1152                                                                                 ps_sao->b5_cr_band_pos,
1153                                                                                 ps_sao->b5_cb_band_pos,
1154                                                                                 ai1_offset_cr,
1155                                                                                 ai1_offset_cb,
1156                                                                                 sao_wd_chroma,
1157                                                                                 sao_ht_chroma
1158                                                                                );
1159                 }
1160                 else
1161                 {
1162                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1163                                                                                 src_strd,
1164                                                                                 pu1_src_left_chroma,
1165                                                                                 pu1_src_top_chroma,
1166                                                                                 pu1_sao_src_chroma_top_left_ctb,
1167                                                                                 ps_sao->b5_cb_band_pos,
1168                                                                                 ps_sao->b5_cr_band_pos,
1169                                                                                 ai1_offset_cb,
1170                                                                                 ai1_offset_cr,
1171                                                                                 sao_wd_chroma,
1172                                                                                 sao_ht_chroma
1173                                                                                );
1174                 }
1175             }
1176 
1177             else // if(2 <= ps_sao->b3_cb_type_idx)
1178             {
1179                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1180                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1181                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1182                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1183 
1184                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1185                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1186                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1187                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1188                 for(i = 0; i < 8; i++)
1189                 {
1190                     au1_avail_chroma[i] = 255;
1191                     au1_tile_slice_boundary[i] = 0;
1192                     au4_idx_tl[i] = 0;
1193                     au4_ilf_across_tile_slice_enable[i] = 1;
1194                 }
1195                 /*In case of slices*/
1196                 {
1197                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1198                     {
1199                         if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1200                         {
1201                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1202                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1203                         }
1204                         else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1205                         {
1206                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1207                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1208                         }
1209                         ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1210                         ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1211 
1212                         ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1213                         ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1214 
1215                         ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1216                         ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1217 
1218                         ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1219                         ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1220 
1221                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1222                         {
1223 
1224                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1225                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1226                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1227                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1228                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1229                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230 
1231                             if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1232                             {
1233                                 if(ps_sao_ctxt->i4_ctb_x == 1)
1234                                 {
1235                                     au4_idx_tl[6] = -1;
1236                                     au4_idx_tl[4] = -1;
1237                                 }
1238                                 else
1239                                 {
1240                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1241                                 }
1242                                 if(ps_sao_ctxt->i4_ctb_y == 1)
1243                                 {
1244                                     au4_idx_tl[5] = -1;
1245                                     au4_idx_tl[4] = -1;
1246                                 }
1247                                 else
1248                                 {
1249                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1250                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1251                                 }
1252                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1253                             }
1254 
1255                             /* Verify that the neighbor ctbs don't cross pic boundary
1256                              * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1257                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1258                             {
1259                                 au4_ilf_across_tile_slice_enable[4] = 0;
1260                                 au4_ilf_across_tile_slice_enable[6] = 0;
1261                             }
1262                             else
1263                             {
1264                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1265                             }
1266                             if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1267                             {
1268                                 au4_ilf_across_tile_slice_enable[5] = 0;
1269                                 au4_ilf_across_tile_slice_enable[4] = 0;
1270                             }
1271                             else
1272                             {
1273                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1274                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1275                             }
1276                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1277                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1279                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1280                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1281                             /*
1282                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1283                              * of the pixel having a greater address is checked. Accordingly, set the availability flags
1284                              */
1285                             for(i = 0; i < 8; i++)
1286                             {
1287                                 /*Sets the edges that lie on the slice/tile boundary*/
1288                                 if(au4_idx_tl[i] != idx_tl)
1289                                 {
1290                                     au1_tile_slice_boundary[i] = 1;
1291                                 }
1292                                 else
1293                                 {
1294                                     au4_ilf_across_tile_slice_enable[i] = 1;
1295                                 }
1296                             }
1297 
1298                             /*Reset indices*/
1299                             for(i = 0; i < 8; i++)
1300                             {
1301                                 au4_idx_tl[i] = 0;
1302                             }
1303                         }
1304                         if(ps_pps->i1_tiles_enabled_flag)
1305                         {
1306                             /* Calculate availability flags at slice boundary */
1307                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1308                             {
1309                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1310                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1311                                 {
1312                                     /*Set the boundary arrays*/
1313                                     /*Calculate tile indices for neighbor pixels*/
1314                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1315                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1316                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1317                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1318                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1319                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320 
1321                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1322                                     {
1323                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1324                                         {
1325                                             au4_idx_tl[6] = -1;
1326                                             au4_idx_tl[4] = -1;
1327                                         }
1328                                         else
1329                                         {
1330                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1331                                         }
1332                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1333                                         {
1334                                             au4_idx_tl[5] = -1;
1335                                             au4_idx_tl[4] = -1;
1336                                         }
1337                                         else
1338                                         {
1339                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1340                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1341                                         }
1342                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1343                                     }
1344                                     for(i = 0; i < 8; i++)
1345                                     {
1346                                         /*Sets the edges that lie on the tile boundary*/
1347                                         if(au4_idx_tl[i] != idx_tl)
1348                                         {
1349                                             au1_tile_slice_boundary[i] |= 1;
1350                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1351                                         }
1352                                     }
1353                                 }
1354                             }
1355                         }
1356 
1357                         for(i = 0; i < 8; i++)
1358                         {
1359                             /*Sets the edges that lie on the slice/tile boundary*/
1360                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1361                             {
1362                                 au1_avail_chroma[i] = 0;
1363                             }
1364                         }
1365                     }
1366                 }
1367 
1368                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1369                 {
1370                     au1_avail_chroma[0] = 0;
1371                     au1_avail_chroma[4] = 0;
1372                     au1_avail_chroma[6] = 0;
1373                 }
1374                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1375                 {
1376                     au1_avail_chroma[1] = 0;
1377                     au1_avail_chroma[5] = 0;
1378                     au1_avail_chroma[7] = 0;
1379                 }
1380 
1381                 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1382                 {
1383                     au1_avail_chroma[2] = 0;
1384                     au1_avail_chroma[4] = 0;
1385                     au1_avail_chroma[5] = 0;
1386                 }
1387                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1388                 {
1389                     au1_avail_chroma[3] = 0;
1390                     au1_avail_chroma[6] = 0;
1391                     au1_avail_chroma[7] = 0;
1392                 }
1393 
1394                 {
1395                     au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1396                     au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1397                     au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1398                     au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1399                     if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1400                     {
1401                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1402                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1403                     }
1404 
1405                     if(chroma_yuv420sp_vu)
1406                     {
1407                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1408                                                                              src_strd,
1409                                                                              pu1_src_left_chroma,
1410                                                                              pu1_src_top_chroma,
1411                                                                              pu1_sao_src_chroma_top_left_ctb,
1412                                                                              au1_src_top_right,
1413                                                                              au1_sao_src_top_left_chroma_bot_left,
1414                                                                              au1_avail_chroma,
1415                                                                              ai1_offset_cr,
1416                                                                              ai1_offset_cb,
1417                                                                              sao_wd_chroma,
1418                                                                              sao_ht_chroma);
1419                     }
1420                     else
1421                     {
1422                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1423                                                                              src_strd,
1424                                                                              pu1_src_left_chroma,
1425                                                                              pu1_src_top_chroma,
1426                                                                              pu1_sao_src_chroma_top_left_ctb,
1427                                                                              au1_src_top_right,
1428                                                                              au1_sao_src_top_left_chroma_bot_left,
1429                                                                              au1_avail_chroma,
1430                                                                              ai1_offset_cb,
1431                                                                              ai1_offset_cr,
1432                                                                              sao_wd_chroma,
1433                                                                              sao_ht_chroma);
1434                     }
1435                 }
1436             }
1437         }
1438         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1439         {
1440             for(row = 0; row < sao_ht_chroma; row++)
1441             {
1442                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1443                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1444             }
1445             pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1446             pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1447 
1448             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1449         }
1450 
1451         pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1452         pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1453         ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1454     }
1455 
1456 
1457     /* Top CTB */
1458     if((ps_sao_ctxt->i4_ctb_y > 0))
1459     {
1460         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1461         WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1462         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1463         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1464 
1465         WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1466         WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1467         WORD32 au4_idx_t[8], idx_t;
1468 
1469         WORD32 remaining_cols;
1470 
1471         slice_header_t *ps_slice_hdr_top;
1472         {
1473             WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1474                                         (ps_sao_ctxt->i4_ctb_x);
1475             ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1476         }
1477 
1478         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1479         if(remaining_cols <= SAO_SHIFT_CTB)
1480         {
1481             sao_wd_luma += remaining_cols;
1482         }
1483         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1484         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1485         {
1486             sao_wd_chroma += remaining_cols;
1487         }
1488 
1489         pu1_src_luma -= (sao_ht_luma * src_strd);
1490         pu1_src_chroma -= (sao_ht_chroma * src_strd);
1491         ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1492         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1493         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1495         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1496 
1497         if(0 != sao_wd_luma)
1498         {
1499             if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1500             {
1501                 if(0 == ps_sao->b3_y_type_idx)
1502                 {
1503                     /* Update left, top and top-left */
1504                     for(row = 0; row < sao_ht_luma; row++)
1505                     {
1506                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1507                     }
1508                     pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1509 
1510                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1511 
1512                 }
1513 
1514                 else if(1 == ps_sao->b3_y_type_idx)
1515                 {
1516                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1517                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1518                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1519                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1520 
1521                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1522                                                                               src_strd,
1523                                                                               pu1_src_left_luma,
1524                                                                               pu1_src_top_luma,
1525                                                                               pu1_sao_src_luma_top_left_ctb,
1526                                                                               ps_sao->b5_y_band_pos,
1527                                                                               ai1_offset_y,
1528                                                                               sao_wd_luma,
1529                                                                               sao_ht_luma
1530                                                                              );
1531                 }
1532 
1533                 else // if(2 <= ps_sao->b3_y_type_idx)
1534                 {
1535                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1536                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1537                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1538                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1539 
1540                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1541                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1542                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1543 
1544                     for(i = 0; i < 8; i++)
1545                     {
1546 
1547                         au4_ilf_across_tile_slice_enable[i] = 1;
1548                     }
1549                     /******************************************************************
1550                      * Derive the  Top-left CTB's neighbor pixel's slice indices.
1551                      *
1552                      *               T_T
1553                      *          ____________
1554                      *         |    |       |
1555                      *         | T_L|  T    |T_R
1556                      *         |    | ______|____
1557                      *         |    |  T_D  |    |
1558                      *         |    |       |    |
1559                      *         |____|_______|    |
1560                      *              |            |
1561                      *              |            |
1562                      *              |____________|
1563                      *
1564                      *****************************************************************/
1565 
1566                     /*In case of slices*/
1567                     {
1568                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1569                         {
1570 
1571                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1572                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1573 
1574                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1575                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1576 
1577                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1578                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1579 
1580                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1581                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1582 
1583                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1584                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1585 
1586                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1587                             {
1588                                 /*Calculate neighbor ctb slice indices*/
1589                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1590                                 {
1591                                     au4_idx_t[0] = -1;
1592                                     au4_idx_t[6] = -1;
1593                                     au4_idx_t[4] = -1;
1594                                 }
1595                                 else
1596                                 {
1597                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1598                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1599                                 }
1600                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1601                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1602                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1603                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604 
1605                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1606                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1607                                 {
1608                                     au4_ilf_across_tile_slice_enable[4] = 0;
1609                                     au4_ilf_across_tile_slice_enable[6] = 0;
1610                                     au4_ilf_across_tile_slice_enable[0] = 0;
1611                                 }
1612                                 else
1613                                 {
1614                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1615                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1616                                 }
1617 
1618 
1619 
1620                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1623                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1624                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1625 
1626                                 if(au4_idx_t[6] < idx_t)
1627                                 {
1628                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1629                                 }
1630 
1631                                 /*
1632                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1633                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1634                                  */
1635 
1636                                 for(i = 0; i < 8; i++)
1637                                 {
1638                                     /*Sets the edges that lie on the slice/tile boundary*/
1639                                     if(au4_idx_t[i] != idx_t)
1640                                     {
1641                                         au1_tile_slice_boundary[i] = 1;
1642                                         /*Check for slice flag at such boundaries*/
1643                                     }
1644                                     else
1645                                     {
1646                                         au4_ilf_across_tile_slice_enable[i] = 1;
1647                                     }
1648                                 }
1649                                 /*Reset indices*/
1650                                 for(i = 0; i < 8; i++)
1651                                 {
1652                                     au4_idx_t[i] = 0;
1653                                 }
1654                             }
1655 
1656                             if(ps_pps->i1_tiles_enabled_flag)
1657                             {
1658                                 /* Calculate availability flags at slice boundary */
1659                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1660                                 {
1661                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1662                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1663                                     {
1664                                         /*Calculate neighbor ctb slice indices*/
1665                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1666                                         {
1667                                             au4_idx_t[0] = -1;
1668                                             au4_idx_t[6] = -1;
1669                                             au4_idx_t[4] = -1;
1670                                         }
1671                                         else
1672                                         {
1673                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1674                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1675                                         }
1676                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1677                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1678                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1679                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680 
1681                                         for(i = 0; i < 8; i++)
1682                                         {
1683                                             /*Sets the edges that lie on the tile boundary*/
1684                                             if(au4_idx_t[i] != idx_t)
1685                                             {
1686                                                 au1_tile_slice_boundary[i] |= 1;
1687                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1688                                             }
1689                                         }
1690                                     }
1691                                 }
1692                             }
1693 
1694                             for(i = 0; i < 8; i++)
1695                             {
1696                                 /*Sets the edges that lie on the slice/tile boundary*/
1697                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1698                                 {
1699                                     au1_avail_luma[i] = 0;
1700                                 }
1701                             }
1702                         }
1703                     }
1704 
1705 
1706                     if(0 == ps_sao_ctxt->i4_ctb_x)
1707                     {
1708                         au1_avail_luma[0] = 0;
1709                         au1_avail_luma[4] = 0;
1710                         au1_avail_luma[6] = 0;
1711                     }
1712 
1713                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1714                     {
1715                         au1_avail_luma[1] = 0;
1716                         au1_avail_luma[5] = 0;
1717                         au1_avail_luma[7] = 0;
1718                     }
1719 
1720                     if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1721                     {
1722                         au1_avail_luma[2] = 0;
1723                         au1_avail_luma[4] = 0;
1724                         au1_avail_luma[5] = 0;
1725                     }
1726 
1727                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1728                     {
1729                         au1_avail_luma[3] = 0;
1730                         au1_avail_luma[6] = 0;
1731                         au1_avail_luma[7] = 0;
1732                     }
1733 
1734                     {
1735                         au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1736                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1737                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1738                                                                           src_strd,
1739                                                                           pu1_src_left_luma,
1740                                                                           pu1_src_top_luma,
1741                                                                           pu1_sao_src_luma_top_left_ctb,
1742                                                                           au1_src_top_right,
1743                                                                           &u1_sao_src_top_left_luma_bot_left,
1744                                                                           au1_avail_luma,
1745                                                                           ai1_offset_y,
1746                                                                           sao_wd_luma,
1747                                                                           sao_ht_luma);
1748                     }
1749                 }
1750             }
1751             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1752             {
1753                 /* Update left, top and top-left */
1754                 for(row = 0; row < sao_ht_luma; row++)
1755                 {
1756                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1757                 }
1758                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1759 
1760                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1761             }
1762         }
1763 
1764         if(0 != sao_wd_chroma)
1765         {
1766             if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1767             {
1768                 if(0 == ps_sao->b3_cb_type_idx)
1769                 {
1770 
1771                     for(row = 0; row < sao_ht_chroma; row++)
1772                     {
1773                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1774                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1775                     }
1776                     pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1777                     pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1778 
1779                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1780 
1781                 }
1782 
1783                 else if(1 == ps_sao->b3_cb_type_idx)
1784                 {
1785                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1786                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1787                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1788                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1789 
1790                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1791                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1792                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1793                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1794 
1795                     if(chroma_yuv420sp_vu)
1796                     {
1797                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1798                                                                                     src_strd,
1799                                                                                     pu1_src_left_chroma,
1800                                                                                     pu1_src_top_chroma,
1801                                                                                     pu1_sao_src_chroma_top_left_ctb,
1802                                                                                     ps_sao->b5_cr_band_pos,
1803                                                                                     ps_sao->b5_cb_band_pos,
1804                                                                                     ai1_offset_cr,
1805                                                                                     ai1_offset_cb,
1806                                                                                     sao_wd_chroma,
1807                                                                                     sao_ht_chroma
1808                                                                                    );
1809                     }
1810                     else
1811                     {
1812                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1813                                                                                     src_strd,
1814                                                                                     pu1_src_left_chroma,
1815                                                                                     pu1_src_top_chroma,
1816                                                                                     pu1_sao_src_chroma_top_left_ctb,
1817                                                                                     ps_sao->b5_cb_band_pos,
1818                                                                                     ps_sao->b5_cr_band_pos,
1819                                                                                     ai1_offset_cb,
1820                                                                                     ai1_offset_cr,
1821                                                                                     sao_wd_chroma,
1822                                                                                     sao_ht_chroma
1823                                                                                    );
1824                     }
1825                 }
1826                 else // if(2 <= ps_sao->b3_cb_type_idx)
1827                 {
1828                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1829                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1830                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1831                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1832 
1833                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1834                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1835                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1836                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1837 
1838                     for(i = 0; i < 8; i++)
1839                     {
1840                         au1_avail_chroma[i] = 255;
1841                         au1_tile_slice_boundary[i] = 0;
1842                         au4_idx_t[i] = 0;
1843                         au4_ilf_across_tile_slice_enable[i] = 1;
1844                     }
1845 
1846                     {
1847                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1848                         {
1849                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1850                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1851 
1852                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1853                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1854 
1855                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1856                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1857 
1858                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1859                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1860 
1861                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1862                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1863 
1864                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1865                             {
1866                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1867                                 {
1868                                     au4_idx_t[0] = -1;
1869                                     au4_idx_t[6] = -1;
1870                                     au4_idx_t[4] = -1;
1871                                 }
1872                                 else
1873                                 {
1874                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1875                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1876                                 }
1877                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1878                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1879                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1880                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881 
1882                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1883 
1884                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1885                                 {
1886                                     au4_ilf_across_tile_slice_enable[4] = 0;
1887                                     au4_ilf_across_tile_slice_enable[6] = 0;
1888                                     au4_ilf_across_tile_slice_enable[0] = 0;
1889                                 }
1890                                 else
1891                                 {
1892                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1893                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1894                                 }
1895 
1896                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1897                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1899                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1900                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1901 
1902                                 if(idx_t > au4_idx_t[6])
1903                                 {
1904                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1905                                 }
1906 
1907                                 /*
1908                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1909                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1910                                  */
1911                                 for(i = 0; i < 8; i++)
1912                                 {
1913                                     /*Sets the edges that lie on the slice/tile boundary*/
1914                                     if(au4_idx_t[i] != idx_t)
1915                                     {
1916                                         au1_tile_slice_boundary[i] = 1;
1917                                     }
1918                                     else
1919                                     {
1920                                         /*Indicates that the neighbour belongs to same/dependent slice*/
1921                                         au4_ilf_across_tile_slice_enable[i] = 1;
1922                                     }
1923                                 }
1924                                 /*Reset indices*/
1925                                 for(i = 0; i < 8; i++)
1926                                 {
1927                                     au4_idx_t[i] = 0;
1928                                 }
1929                             }
1930                             if(ps_pps->i1_tiles_enabled_flag)
1931                             {
1932                                 /* Calculate availability flags at slice boundary */
1933                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1934                                 {
1935                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1936                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1937                                     {
1938                                         /*Calculate neighbor ctb slice indices*/
1939                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1940                                         {
1941                                             au4_idx_t[0] = -1;
1942                                             au4_idx_t[6] = -1;
1943                                             au4_idx_t[4] = -1;
1944                                         }
1945                                         else
1946                                         {
1947                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1948                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1949                                         }
1950                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1951                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1952                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1953                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954 
1955                                         for(i = 0; i < 8; i++)
1956                                         {
1957                                             /*Sets the edges that lie on the tile boundary*/
1958                                             if(au4_idx_t[i] != idx_t)
1959                                             {
1960                                                 au1_tile_slice_boundary[i] |= 1;
1961                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1962                                             }
1963                                         }
1964                                     }
1965                                 }
1966                             }
1967                             for(i = 0; i < 8; i++)
1968                             {
1969                                 /*Sets the edges that lie on the slice/tile boundary*/
1970                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1971                                 {
1972                                     au1_avail_chroma[i] = 0;
1973                                 }
1974                             }
1975 
1976                         }
1977                     }
1978                     if(0 == ps_sao_ctxt->i4_ctb_x)
1979                     {
1980                         au1_avail_chroma[0] = 0;
1981                         au1_avail_chroma[4] = 0;
1982                         au1_avail_chroma[6] = 0;
1983                     }
1984 
1985                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1986                     {
1987                         au1_avail_chroma[1] = 0;
1988                         au1_avail_chroma[5] = 0;
1989                         au1_avail_chroma[7] = 0;
1990                     }
1991 
1992                     if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1993                     {
1994                         au1_avail_chroma[2] = 0;
1995                         au1_avail_chroma[4] = 0;
1996                         au1_avail_chroma[5] = 0;
1997                     }
1998 
1999                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2000                     {
2001                         au1_avail_chroma[3] = 0;
2002                         au1_avail_chroma[6] = 0;
2003                         au1_avail_chroma[7] = 0;
2004                     }
2005 
2006                     {
2007                         au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2008                         au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2009                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2010                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2011 
2012                         if(chroma_yuv420sp_vu)
2013                         {
2014                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2015                                                                                  src_strd,
2016                                                                                  pu1_src_left_chroma,
2017                                                                                  pu1_src_top_chroma,
2018                                                                                  pu1_sao_src_chroma_top_left_ctb,
2019                                                                                  au1_src_top_right,
2020                                                                                  au1_sao_src_top_left_chroma_bot_left,
2021                                                                                  au1_avail_chroma,
2022                                                                                  ai1_offset_cr,
2023                                                                                  ai1_offset_cb,
2024                                                                                  sao_wd_chroma,
2025                                                                                  sao_ht_chroma);
2026                         }
2027                         else
2028                         {
2029                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2030                                                                                  src_strd,
2031                                                                                  pu1_src_left_chroma,
2032                                                                                  pu1_src_top_chroma,
2033                                                                                  pu1_sao_src_chroma_top_left_ctb,
2034                                                                                  au1_src_top_right,
2035                                                                                  au1_sao_src_top_left_chroma_bot_left,
2036                                                                                  au1_avail_chroma,
2037                                                                                  ai1_offset_cb,
2038                                                                                  ai1_offset_cr,
2039                                                                                  sao_wd_chroma,
2040                                                                                  sao_ht_chroma);
2041                         }
2042                     }
2043 
2044                 }
2045             }
2046             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2047             {
2048                 for(row = 0; row < sao_ht_chroma; row++)
2049                 {
2050                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2051                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2052                 }
2053                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2054                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2055 
2056                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2057             }
2058         }
2059 
2060         pu1_src_luma += sao_ht_luma * src_strd;
2061         pu1_src_chroma += sao_ht_chroma * src_strd;
2062         ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2063     }
2064 
2065     /* Left CTB */
2066     if(ps_sao_ctxt->i4_ctb_x > 0)
2067     {
2068         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2069         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2070         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2071         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2072 
2073         WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2074         WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2075         WORD32 au4_idx_l[8], idx_l;
2076 
2077         WORD32 remaining_rows;
2078         slice_header_t *ps_slice_hdr_left;
2079         {
2080             WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2081                                         (ps_sao_ctxt->i4_ctb_x - 1);
2082             ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2083         }
2084 
2085         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2086         if(remaining_rows <= SAO_SHIFT_CTB)
2087         {
2088             sao_ht_luma += remaining_rows;
2089         }
2090         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2091         if(remaining_rows <= SAO_SHIFT_CTB)
2092         {
2093             sao_ht_chroma += remaining_rows;
2094         }
2095 
2096         pu1_src_luma -= sao_wd_luma;
2097         pu1_src_chroma -= sao_wd_chroma;
2098         ps_sao -= 1;
2099         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2100         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2101         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2102         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103 
2104 
2105         if(0 != sao_ht_luma)
2106         {
2107             if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2108             {
2109                 if(0 == ps_sao->b3_y_type_idx)
2110                 {
2111                     /* Update left, top and top-left */
2112                     for(row = 0; row < sao_ht_luma; row++)
2113                     {
2114                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2115                     }
2116                     /*Update in next location*/
2117                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2118 
2119                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2120 
2121                 }
2122 
2123                 else if(1 == ps_sao->b3_y_type_idx)
2124                 {
2125                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2126                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2127                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2128                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2129 
2130                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2131                                                                               src_strd,
2132                                                                               pu1_src_left_luma,
2133                                                                               pu1_src_top_luma,
2134                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2135                                                                               ps_sao->b5_y_band_pos,
2136                                                                               ai1_offset_y,
2137                                                                               sao_wd_luma,
2138                                                                               sao_ht_luma
2139                                                                              );
2140                 }
2141 
2142                 else // if(2 <= ps_sao->b3_y_type_idx)
2143                 {
2144                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2145                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2146                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2147                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2148 
2149                     for(i = 0; i < 8; i++)
2150                     {
2151                         au1_avail_luma[i] = 255;
2152                         au1_tile_slice_boundary[i] = 0;
2153                         au4_idx_l[i] = 0;
2154                         au4_ilf_across_tile_slice_enable[i] = 1;
2155                     }
2156                     /******************************************************************
2157                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2158                      *
2159                      *
2160                      *          ____________
2161                      *         |    |       |
2162                      *         | L_T|       |
2163                      *         |____|_______|____
2164                      *         |    |       |    |
2165                      *     L_L |  L |  L_R  |    |
2166                      *         |____|_______|    |
2167                      *              |            |
2168                      *          L_D |            |
2169                      *              |____________|
2170                      *
2171                      *****************************************************************/
2172 
2173                     /*In case of slices or tiles*/
2174                     {
2175                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2176                         {
2177                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2178                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2179 
2180                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2181                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2182 
2183                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2184                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2185 
2186                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2187                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2188 
2189                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2190                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2191 
2192                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2193                             {
2194                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2195                                 {
2196                                     au4_idx_l[2] = -1;
2197                                     au4_idx_l[4] = -1;
2198                                     au4_idx_l[5] = -1;
2199                                 }
2200                                 else
2201                                 {
2202                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2203                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2204                                 }
2205                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2206                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2207                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2208                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2209 
2210                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2211                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2212                                 {
2213                                     au4_ilf_across_tile_slice_enable[2] = 0;
2214                                     au4_ilf_across_tile_slice_enable[4] = 0;
2215                                     au4_ilf_across_tile_slice_enable[5] = 0;
2216                                 }
2217                                 else
2218                                 {
2219                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2220                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2221 
2222                                 }
2223                                 //TODO: ILF flag checks for [0] and [6] is missing.
2224                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2225                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2226                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2227 
2228                                 if(idx_l < au4_idx_l[5])
2229                                 {
2230                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2231                                 }
2232 
2233                                 /*
2234                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2235                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2236                                  */
2237                                 for(i = 0; i < 8; i++)
2238                                 {
2239                                     /*Sets the edges that lie on the slice/tile boundary*/
2240                                     if(au4_idx_l[i] != idx_l)
2241                                     {
2242                                         au1_tile_slice_boundary[i] = 1;
2243                                     }
2244                                     else
2245                                     {
2246                                         au4_ilf_across_tile_slice_enable[i] = 1;
2247                                     }
2248                                 }
2249                                 /*Reset indices*/
2250                                 for(i = 0; i < 8; i++)
2251                                 {
2252                                     au4_idx_l[i] = 0;
2253                                 }
2254                             }
2255 
2256                             if(ps_pps->i1_tiles_enabled_flag)
2257                             {
2258                                 /* Calculate availability flags at slice boundary */
2259                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2260                                 {
2261                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2262                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2263                                     {
2264                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2265                                         {
2266                                             au4_idx_l[2] = -1;
2267                                             au4_idx_l[4] = -1;
2268                                             au4_idx_l[5] = -1;
2269                                         }
2270                                         else
2271                                         {
2272                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2273                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2274                                         }
2275 
2276                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2277                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2278                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2279                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2280 
2281                                         for(i = 0; i < 8; i++)
2282                                         {
2283                                             /*Sets the edges that lie on the slice/tile boundary*/
2284                                             if(au4_idx_l[i] != idx_l)
2285                                             {
2286                                                 au1_tile_slice_boundary[i] |= 1;
2287                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2288                                             }
2289                                         }
2290                                     }
2291                                 }
2292                             }
2293 
2294                             for(i = 0; i < 8; i++)
2295                             {
2296                                 /*Sets the edges that lie on the slice/tile boundary*/
2297                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2298                                 {
2299                                     au1_avail_luma[i] = 0;
2300                                 }
2301                             }
2302                         }
2303                     }
2304                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2305                     {
2306                         au1_avail_luma[0] = 0;
2307                         au1_avail_luma[4] = 0;
2308                         au1_avail_luma[6] = 0;
2309                     }
2310                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2311                     {
2312                         au1_avail_luma[1] = 0;
2313                         au1_avail_luma[5] = 0;
2314                         au1_avail_luma[7] = 0;
2315                     }
2316 
2317                     if(0 == ps_sao_ctxt->i4_ctb_y)
2318                     {
2319                         au1_avail_luma[2] = 0;
2320                         au1_avail_luma[4] = 0;
2321                         au1_avail_luma[5] = 0;
2322                     }
2323 
2324                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2325                     {
2326                         au1_avail_luma[3] = 0;
2327                         au1_avail_luma[6] = 0;
2328                         au1_avail_luma[7] = 0;
2329                     }
2330 
2331                     {
2332                         au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2333                         u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2334                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2335                                                                           src_strd,
2336                                                                           pu1_src_left_luma,
2337                                                                           pu1_src_top_luma,
2338                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2339                                                                           au1_src_top_right,
2340                                                                           &u1_sao_src_top_left_luma_bot_left,
2341                                                                           au1_avail_luma,
2342                                                                           ai1_offset_y,
2343                                                                           sao_wd_luma,
2344                                                                           sao_ht_luma);
2345                     }
2346 
2347                 }
2348             }
2349             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2350             {
2351                 /* Update left, top and top-left */
2352                 for(row = 0; row < sao_ht_luma; row++)
2353                 {
2354                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2355                 }
2356                 /*Update in next location*/
2357                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2358 
2359                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2360             }
2361         }
2362 
2363         if(0 != sao_ht_chroma)
2364         {
2365             if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2366             {
2367                 if(0 == ps_sao->b3_cb_type_idx)
2368                 {
2369                     for(row = 0; row < sao_ht_chroma; row++)
2370                     {
2371                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2372                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2373                     }
2374                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2375                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2376 
2377                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2378                 }
2379 
2380                 else if(1 == ps_sao->b3_cb_type_idx)
2381                 {
2382                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2383                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2384                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2385                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2386 
2387                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2388                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2389                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2390                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2391 
2392                     if(chroma_yuv420sp_vu)
2393                     {
2394                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2395                                                                                     src_strd,
2396                                                                                     pu1_src_left_chroma,
2397                                                                                     pu1_src_top_chroma,
2398                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2399                                                                                     ps_sao->b5_cr_band_pos,
2400                                                                                     ps_sao->b5_cb_band_pos,
2401                                                                                     ai1_offset_cr,
2402                                                                                     ai1_offset_cb,
2403                                                                                     sao_wd_chroma,
2404                                                                                     sao_ht_chroma
2405                                                                                    );
2406                     }
2407                     else
2408                     {
2409                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2410                                                                                     src_strd,
2411                                                                                     pu1_src_left_chroma,
2412                                                                                     pu1_src_top_chroma,
2413                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2414                                                                                     ps_sao->b5_cb_band_pos,
2415                                                                                     ps_sao->b5_cr_band_pos,
2416                                                                                     ai1_offset_cb,
2417                                                                                     ai1_offset_cr,
2418                                                                                     sao_wd_chroma,
2419                                                                                     sao_ht_chroma
2420                                                                                    );
2421                     }
2422                 }
2423 
2424                 else // if(2 <= ps_sao->b3_cb_type_idx)
2425                 {
2426                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2427                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2428                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2429                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2430 
2431                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2432                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2433                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2434                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2435 
2436                     for(i = 0; i < 8; i++)
2437                     {
2438                         au1_avail_chroma[i] = 255;
2439                         au1_tile_slice_boundary[i] = 0;
2440                         au4_idx_l[i] = 0;
2441                         au4_ilf_across_tile_slice_enable[i] = 1;
2442                     }
2443                     /*In case of slices*/
2444                     {
2445                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2446                         {
2447                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2448                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2449 
2450                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2451                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2452 
2453                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2454                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2455 
2456                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2457                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2458 
2459                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2460                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2461 
2462                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2463                             {
2464                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2465                                 {
2466                                     au4_idx_l[2] = -1;
2467                                     au4_idx_l[4] = -1;
2468                                     au4_idx_l[5] = -1;
2469                                 }
2470                                 else
2471                                 {
2472                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2473                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2474                                 }
2475                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2476                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2477                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2478                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2479 
2480                                 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2481                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2482                                 {
2483                                     au4_ilf_across_tile_slice_enable[2] = 0;
2484                                     au4_ilf_across_tile_slice_enable[4] = 0;
2485                                     au4_ilf_across_tile_slice_enable[5] = 0;
2486                                 }
2487                                 else
2488                                 {
2489                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2490                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2491                                 }
2492 
2493                                 if(au4_idx_l[5] > idx_l)
2494                                 {
2495                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2496                                 }
2497 
2498                                 //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2499                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2500                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2501                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2502                                 /*
2503                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2504                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2505                                  */
2506                                 for(i = 0; i < 8; i++)
2507                                 {
2508                                     /*Sets the edges that lie on the slice/tile boundary*/
2509                                     if(au4_idx_l[i] != idx_l)
2510                                     {
2511                                         au1_tile_slice_boundary[i] = 1;
2512                                     }
2513                                     else
2514                                     {
2515                                         au4_ilf_across_tile_slice_enable[i] = 1;
2516                                     }
2517                                 }
2518                                 /*Reset indices*/
2519                                 for(i = 0; i < 8; i++)
2520                                 {
2521                                     au4_idx_l[i] = 0;
2522                                 }
2523                             }
2524                             if(ps_pps->i1_tiles_enabled_flag)
2525                             {
2526                                 /* Calculate availability flags at slice boundary */
2527                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2528                                 {
2529                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2530                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2531                                     {
2532                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2533                                         {
2534                                             au4_idx_l[2] = -1;
2535                                             au4_idx_l[4] = -1;
2536                                             au4_idx_l[5] = -1;
2537                                         }
2538                                         else
2539                                         {
2540                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2541                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2542                                         }
2543 
2544                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2545                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2546                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2547                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2548 
2549                                         for(i = 0; i < 8; i++)
2550                                         {
2551                                             /*Sets the edges that lie on the slice/tile boundary*/
2552                                             if(au4_idx_l[i] != idx_l)
2553                                             {
2554                                                 au1_tile_slice_boundary[i] |= 1;
2555                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2556                                             }
2557                                         }
2558                                     }
2559                                 }
2560                             }
2561                             for(i = 0; i < 8; i++)
2562                             {
2563                                 /*Sets the edges that lie on the slice/tile boundary*/
2564                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2565                                 {
2566                                     au1_avail_chroma[i] = 0;
2567                                 }
2568                             }
2569                         }
2570                     }
2571                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2572                     {
2573                         au1_avail_chroma[0] = 0;
2574                         au1_avail_chroma[4] = 0;
2575                         au1_avail_chroma[6] = 0;
2576                     }
2577 
2578                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2579                     {
2580                         au1_avail_chroma[1] = 0;
2581                         au1_avail_chroma[5] = 0;
2582                         au1_avail_chroma[7] = 0;
2583                     }
2584 
2585                     if(0 == ps_sao_ctxt->i4_ctb_y)
2586                     {
2587                         au1_avail_chroma[2] = 0;
2588                         au1_avail_chroma[4] = 0;
2589                         au1_avail_chroma[5] = 0;
2590                     }
2591 
2592                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2593                     {
2594                         au1_avail_chroma[3] = 0;
2595                         au1_avail_chroma[6] = 0;
2596                         au1_avail_chroma[7] = 0;
2597                     }
2598 
2599                     {
2600                         au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2601                         au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2602                         au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2603                         au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2604                         //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2605                         //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2606                         if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2607                         {
2608                             au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2609                             au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2610                         }
2611 
2612 
2613                         if(chroma_yuv420sp_vu)
2614                         {
2615                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2616                                                                                  src_strd,
2617                                                                                  pu1_src_left_chroma,
2618                                                                                  pu1_src_top_chroma,
2619                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2620                                                                                  au1_src_top_right,
2621                                                                                  au1_src_bot_left,
2622                                                                                  au1_avail_chroma,
2623                                                                                  ai1_offset_cr,
2624                                                                                  ai1_offset_cb,
2625                                                                                  sao_wd_chroma,
2626                                                                                  sao_ht_chroma);
2627                         }
2628                         else
2629                         {
2630                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2631                                                                                  src_strd,
2632                                                                                  pu1_src_left_chroma,
2633                                                                                  pu1_src_top_chroma,
2634                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2635                                                                                  au1_src_top_right,
2636                                                                                  au1_src_bot_left,
2637                                                                                  au1_avail_chroma,
2638                                                                                  ai1_offset_cb,
2639                                                                                  ai1_offset_cr,
2640                                                                                  sao_wd_chroma,
2641                                                                                  sao_ht_chroma);
2642                         }
2643                     }
2644 
2645                 }
2646             }
2647             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2648             {
2649                 for(row = 0; row < sao_ht_chroma; row++)
2650                 {
2651                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2652                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2653                 }
2654                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2655                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2656 
2657                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2658             }
2659 
2660         }
2661         pu1_src_luma += sao_wd_luma;
2662         pu1_src_chroma += sao_wd_chroma;
2663         ps_sao += 1;
2664     }
2665 
2666 
2667     /* Current CTB */
2668     {
2669         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2670         WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2671         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2672         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2673         WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2674         WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2675         WORD32 au4_idx_c[8], idx_c;
2676 
2677         WORD32 remaining_rows;
2678         WORD32 remaining_cols;
2679 
2680         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2681         if(remaining_cols <= SAO_SHIFT_CTB)
2682         {
2683             sao_wd_luma += remaining_cols;
2684         }
2685         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2686         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2687         {
2688             sao_wd_chroma += remaining_cols;
2689         }
2690 
2691         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2692         if(remaining_rows <= SAO_SHIFT_CTB)
2693         {
2694             sao_ht_luma += remaining_rows;
2695         }
2696         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2697         if(remaining_rows <= SAO_SHIFT_CTB)
2698         {
2699             sao_ht_chroma += remaining_rows;
2700         }
2701 
2702         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2703         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2705         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706 
2707         if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2708         {
2709             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2710             {
2711                 if(0 == ps_sao->b3_y_type_idx)
2712                 {
2713                     /* Update left, top and top-left */
2714                     for(row = 0; row < sao_ht_luma; row++)
2715                     {
2716                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2717                     }
2718                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2719 
2720                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2721 
2722                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2723 
2724                 }
2725 
2726                 else if(1 == ps_sao->b3_y_type_idx)
2727                 {
2728                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2729                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2730                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2731                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2732 
2733                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2734                                                                               src_strd,
2735                                                                               pu1_src_left_luma,
2736                                                                               pu1_src_top_luma,
2737                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2738                                                                               ps_sao->b5_y_band_pos,
2739                                                                               ai1_offset_y,
2740                                                                               sao_wd_luma,
2741                                                                               sao_ht_luma
2742                                                                              );
2743                 }
2744 
2745                 else // if(2 <= ps_sao->b3_y_type_idx)
2746                 {
2747                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2748                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2749                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2750                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2751 
2752                     for(i = 0; i < 8; i++)
2753                     {
2754                         au1_avail_luma[i] = 255;
2755                         au1_tile_slice_boundary[i] = 0;
2756                         au4_idx_c[i] = 0;
2757                         au4_ilf_across_tile_slice_enable[i] = 1;
2758                     }
2759                     /******************************************************************
2760                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2761                      *
2762                      *
2763                      *          ____________
2764                      *         |    |       |
2765                      *         |    | C_T   |
2766                      *         |____|_______|____
2767                      *         |    |       |    |
2768                      *         | C_L|   C   | C_R|
2769                      *         |____|_______|    |
2770                      *              |  C_D       |
2771                      *              |            |
2772                      *              |____________|
2773                      *
2774                      *****************************************************************/
2775 
2776                     /*In case of slices*/
2777                     {
2778                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2779                         {
2780                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2781                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2782 
2783                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2784                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2785 
2786                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2787                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2788 
2789                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2790                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2791 
2792                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
2793                             ctby_c = ps_sao_ctxt->i4_ctb_y;
2794 
2795                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2796                             {
2797                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2798                                 {
2799                                     au4_idx_c[6] = -1;
2800                                     au4_idx_c[0] = -1;
2801                                     au4_idx_c[4] = -1;
2802                                 }
2803                                 else
2804                                 {
2805                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2806                                 }
2807 
2808                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2809                                 {
2810                                     au4_idx_c[2] = -1;
2811                                     au4_idx_c[5] = -1;
2812                                     au4_idx_c[4] = -1;
2813                                 }
2814                                 else
2815                                 {
2816                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2817                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2818                                 }
2819                                 idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2820                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2821                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2822 
2823                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2824                                 {
2825                                     au4_ilf_across_tile_slice_enable[6] = 0;
2826                                     au4_ilf_across_tile_slice_enable[0] = 0;
2827                                     au4_ilf_across_tile_slice_enable[4] = 0;
2828                                 }
2829                                 else
2830                                 {
2831                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2832                                     au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2833                                 }
2834                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2835                                 {
2836                                     au4_ilf_across_tile_slice_enable[2] = 0;
2837                                     au4_ilf_across_tile_slice_enable[4] = 0;
2838                                     au4_ilf_across_tile_slice_enable[5] = 0;
2839                                 }
2840                                 else
2841                                 {
2842                                     au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2843                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2844                                 }
2845                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2846                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2847                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2848 
2849                                 if(au4_idx_c[6] < idx_c)
2850                                 {
2851                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2852                                 }
2853 
2854                                 /*
2855                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2856                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2857                                  */
2858                                 for(i = 0; i < 8; i++)
2859                                 {
2860                                     /*Sets the edges that lie on the slice/tile boundary*/
2861                                     if(au4_idx_c[i] != idx_c)
2862                                     {
2863                                         au1_tile_slice_boundary[i] = 1;
2864                                     }
2865                                     else
2866                                     {
2867                                         au4_ilf_across_tile_slice_enable[i] = 1;
2868                                     }
2869                                 }
2870                                 /*Reset indices*/
2871                                 for(i = 0; i < 8; i++)
2872                                 {
2873                                     au4_idx_c[i] = 0;
2874                                 }
2875                             }
2876 
2877                             if(ps_pps->i1_tiles_enabled_flag)
2878                             {
2879                                 /* Calculate availability flags at slice boundary */
2880                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2881                                 {
2882                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2883                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2884                                     {
2885                                         if(0 == ps_sao_ctxt->i4_ctb_x)
2886                                         {
2887                                             au4_idx_c[6] = -1;
2888                                             au4_idx_c[0] = -1;
2889                                             au4_idx_c[4] = -1;
2890                                         }
2891                                         else
2892                                         {
2893                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2894                                         }
2895 
2896                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2897                                         {
2898                                             au4_idx_c[2] = -1;
2899                                             au4_idx_c[5] = -1;
2900                                             au4_idx_c[4] = -1;
2901                                         }
2902                                         else
2903                                         {
2904                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2905                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2906                                         }
2907                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2908                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2909                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2910 
2911                                         for(i = 0; i < 8; i++)
2912                                         {
2913                                             /*Sets the edges that lie on the slice/tile boundary*/
2914                                             if(au4_idx_c[i] != idx_c)
2915                                             {
2916                                                 au1_tile_slice_boundary[i] |= 1;
2917                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2918                                             }
2919                                         }
2920                                     }
2921                                 }
2922                             }
2923 
2924                             for(i = 0; i < 8; i++)
2925                             {
2926                                 /*Sets the edges that lie on the slice/tile boundary*/
2927                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2928                                 {
2929                                     au1_avail_luma[i] = 0;
2930                                 }
2931                             }
2932 
2933                         }
2934                     }
2935                     if(0 == ps_sao_ctxt->i4_ctb_x)
2936                     {
2937                         au1_avail_luma[0] = 0;
2938                         au1_avail_luma[4] = 0;
2939                         au1_avail_luma[6] = 0;
2940                     }
2941 
2942                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2943                     {
2944                         au1_avail_luma[1] = 0;
2945                         au1_avail_luma[5] = 0;
2946                         au1_avail_luma[7] = 0;
2947                     }
2948 
2949                     if(0 == ps_sao_ctxt->i4_ctb_y)
2950                     {
2951                         au1_avail_luma[2] = 0;
2952                         au1_avail_luma[4] = 0;
2953                         au1_avail_luma[5] = 0;
2954                     }
2955 
2956                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2957                     {
2958                         au1_avail_luma[3] = 0;
2959                         au1_avail_luma[6] = 0;
2960                         au1_avail_luma[7] = 0;
2961                     }
2962 
2963                     {
2964                         au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2965                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2966 
2967                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2968                                                                           src_strd,
2969                                                                           pu1_src_left_luma,
2970                                                                           pu1_src_top_luma,
2971                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2972                                                                           au1_src_top_right,
2973                                                                           &u1_sao_src_top_left_luma_bot_left,
2974                                                                           au1_avail_luma,
2975                                                                           ai1_offset_y,
2976                                                                           sao_wd_luma,
2977                                                                           sao_ht_luma);
2978                     }
2979                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2980                     pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2981                 }
2982             }
2983             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2984             {
2985                 /* Update left, top and top-left */
2986                 for(row = 0; row < sao_ht_luma; row++)
2987                 {
2988                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2989                 }
2990                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2991 
2992                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2993 
2994                 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2995             }
2996         }
2997 
2998         if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2999         {
3000             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3001             {
3002                 if(0 == ps_sao->b3_cb_type_idx)
3003                 {
3004                     for(row = 0; row < sao_ht_chroma; row++)
3005                     {
3006                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3007                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3008                     }
3009                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3010                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3011 
3012                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3013 
3014                     pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3015                     pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3016                 }
3017 
3018                 else if(1 == ps_sao->b3_cb_type_idx)
3019                 {
3020                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3021                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3022                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3023                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3024 
3025                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3026                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3027                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3028                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3029 
3030                     if(chroma_yuv420sp_vu)
3031                     {
3032                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3033                                                                                     src_strd,
3034                                                                                     pu1_src_left_chroma,
3035                                                                                     pu1_src_top_chroma,
3036                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
3037                                                                                     ps_sao->b5_cr_band_pos,
3038                                                                                     ps_sao->b5_cb_band_pos,
3039                                                                                     ai1_offset_cr,
3040                                                                                     ai1_offset_cb,
3041                                                                                     sao_wd_chroma,
3042                                                                                     sao_ht_chroma
3043                                                                                    );
3044                     }
3045                     else
3046                     {
3047                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3048                                                                                     src_strd,
3049                                                                                     pu1_src_left_chroma,
3050                                                                                     pu1_src_top_chroma,
3051                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
3052                                                                                     ps_sao->b5_cb_band_pos,
3053                                                                                     ps_sao->b5_cr_band_pos,
3054                                                                                     ai1_offset_cb,
3055                                                                                     ai1_offset_cr,
3056                                                                                     sao_wd_chroma,
3057                                                                                     sao_ht_chroma
3058                                                                                    );
3059                     }
3060                 }
3061 
3062                 else // if(2 <= ps_sao->b3_cb_type_idx)
3063                 {
3064                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3065                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3066                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3067                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3068 
3069                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3070                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3071                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3072                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3073 
3074                     for(i = 0; i < 8; i++)
3075                     {
3076                         au1_avail_chroma[i] = 255;
3077                         au1_tile_slice_boundary[i] = 0;
3078                         au4_idx_c[i] = 0;
3079                         au4_ilf_across_tile_slice_enable[i] = 1;
3080                     }
3081                     {
3082                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3083                         {
3084                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3085                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3086 
3087                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3088                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3089 
3090                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3091                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3092 
3093                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3094                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3095 
3096                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
3097                             ctby_c = ps_sao_ctxt->i4_ctb_y;
3098 
3099                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3100                             {
3101                                 if(0 == ps_sao_ctxt->i4_ctb_x)
3102                                 {
3103                                     au4_idx_c[0] = -1;
3104                                     au4_idx_c[4] = -1;
3105                                     au4_idx_c[6] = -1;
3106                                 }
3107                                 else
3108                                 {
3109                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3110                                 }
3111 
3112                                 if(0 == ps_sao_ctxt->i4_ctb_y)
3113                                 {
3114                                     au4_idx_c[2] = -1;
3115                                     au4_idx_c[4] = -1;
3116                                     au4_idx_c[5] = -1;
3117                                 }
3118                                 else
3119                                 {
3120                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3121                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3122                                 }
3123                                 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3124                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3125                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3126 
3127                                 if(0 == ps_sao_ctxt->i4_ctb_x)
3128                                 {
3129                                     au4_ilf_across_tile_slice_enable[0] = 0;
3130                                     au4_ilf_across_tile_slice_enable[4] = 0;
3131                                     au4_ilf_across_tile_slice_enable[6] = 0;
3132                                 }
3133                                 else
3134                                 {
3135                                     au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3136                                     au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3137                                 }
3138 
3139                                 if(0 == ps_sao_ctxt->i4_ctb_y)
3140                                 {
3141                                     au4_ilf_across_tile_slice_enable[2] = 0;
3142                                     au4_ilf_across_tile_slice_enable[4] = 0;
3143                                     au4_ilf_across_tile_slice_enable[5] = 0;
3144                                 }
3145                                 else
3146                                 {
3147                                     au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3148                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3149                                 }
3150 
3151                                 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3152                                 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3153                                 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3154 
3155                                 if(idx_c > au4_idx_c[6])
3156                                 {
3157                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3158                                 }
3159 
3160                                 /*
3161                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3162                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
3163                                  */
3164                                 for(i = 0; i < 8; i++)
3165                                 {
3166                                     /*Sets the edges that lie on the slice/tile boundary*/
3167                                     if(au4_idx_c[i] != idx_c)
3168                                     {
3169                                         au1_tile_slice_boundary[i] = 1;
3170                                     }
3171                                     else
3172                                     {
3173                                         au4_ilf_across_tile_slice_enable[i] = 1;
3174                                     }
3175                                 }
3176                                 /*Reset indices*/
3177                                 for(i = 0; i < 8; i++)
3178                                 {
3179                                     au4_idx_c[i] = 0;
3180                                 }
3181                             }
3182 
3183                             if(ps_pps->i1_tiles_enabled_flag)
3184                             {
3185                                 /* Calculate availability flags at slice boundary */
3186                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3187                                 {
3188                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3189                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3190                                     {
3191                                         if(0 == ps_sao_ctxt->i4_ctb_x)
3192                                         {
3193                                             au4_idx_c[6] = -1;
3194                                             au4_idx_c[0] = -1;
3195                                             au4_idx_c[4] = -1;
3196                                         }
3197                                         else
3198                                         {
3199                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3200                                         }
3201 
3202                                         if(0 == ps_sao_ctxt->i4_ctb_y)
3203                                         {
3204                                             au4_idx_c[2] = -1;
3205                                             au4_idx_c[5] = -1;
3206                                             au4_idx_c[4] = -1;
3207                                         }
3208                                         else
3209                                         {
3210                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3211                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3212                                         }
3213                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3214                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3215                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3216 
3217                                         for(i = 0; i < 8; i++)
3218                                         {
3219                                             /*Sets the edges that lie on the slice/tile boundary*/
3220                                             if(au4_idx_c[i] != idx_c)
3221                                             {
3222                                                 au1_tile_slice_boundary[i] |= 1;
3223                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3224                                             }
3225                                         }
3226                                     }
3227                                 }
3228                             }
3229 
3230                             for(i = 0; i < 8; i++)
3231                             {
3232                                 /*Sets the edges that lie on the slice/tile boundary*/
3233                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3234                                 {
3235                                     au1_avail_chroma[i] = 0;
3236                                 }
3237                             }
3238                         }
3239                     }
3240 
3241                     if(0 == ps_sao_ctxt->i4_ctb_x)
3242                     {
3243                         au1_avail_chroma[0] = 0;
3244                         au1_avail_chroma[4] = 0;
3245                         au1_avail_chroma[6] = 0;
3246                     }
3247 
3248                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3249                     {
3250                         au1_avail_chroma[1] = 0;
3251                         au1_avail_chroma[5] = 0;
3252                         au1_avail_chroma[7] = 0;
3253                     }
3254 
3255                     if(0 == ps_sao_ctxt->i4_ctb_y)
3256                     {
3257                         au1_avail_chroma[2] = 0;
3258                         au1_avail_chroma[4] = 0;
3259                         au1_avail_chroma[5] = 0;
3260                     }
3261 
3262                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3263                     {
3264                         au1_avail_chroma[3] = 0;
3265                         au1_avail_chroma[6] = 0;
3266                         au1_avail_chroma[7] = 0;
3267                     }
3268 
3269                     {
3270                         au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3271                         au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3272 
3273                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3274                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3275 
3276                         if(chroma_yuv420sp_vu)
3277                         {
3278                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3279                                                                                  src_strd,
3280                                                                                  pu1_src_left_chroma,
3281                                                                                  pu1_src_top_chroma,
3282                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3283                                                                                  au1_src_top_right,
3284                                                                                  au1_sao_src_top_left_chroma_bot_left,
3285                                                                                  au1_avail_chroma,
3286                                                                                  ai1_offset_cr,
3287                                                                                  ai1_offset_cb,
3288                                                                                  sao_wd_chroma,
3289                                                                                  sao_ht_chroma);
3290                         }
3291                         else
3292                         {
3293                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3294                                                                                  src_strd,
3295                                                                                  pu1_src_left_chroma,
3296                                                                                  pu1_src_top_chroma,
3297                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3298                                                                                  au1_src_top_right,
3299                                                                                  au1_sao_src_top_left_chroma_bot_left,
3300                                                                                  au1_avail_chroma,
3301                                                                                  ai1_offset_cb,
3302                                                                                  ai1_offset_cr,
3303                                                                                  sao_wd_chroma,
3304                                                                                  sao_ht_chroma);
3305                         }
3306                     }
3307 
3308                 }
3309                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3310                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3311 
3312                 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3313                 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3314             }
3315             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3316             {
3317                 for(row = 0; row < sao_ht_chroma; row++)
3318                 {
3319                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3320                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3321                 }
3322                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3323                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3324 
3325                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3326 
3327                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3328                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3329             }
3330 
3331         }
3332     }
3333 
3334 
3335 
3336 
3337 /* If no loop filter is enabled copy the backed up values */
3338     {
3339         /* Luma */
3340         if(no_loop_filter_enabled_luma)
3341         {
3342             UWORD32 u4_no_loop_filter_flag;
3343             WORD32 loop_filter_bit_pos;
3344             WORD32 log2_min_cu = 3;
3345             WORD32 min_cu = (1 << log2_min_cu);
3346             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3347             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3348             WORD32 sao_blk_wd = ctb_size;
3349             WORD32 remaining_rows;
3350             WORD32 remaining_cols;
3351 
3352             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3353             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354             if(remaining_rows <= SAO_SHIFT_CTB)
3355                 sao_blk_ht += remaining_rows;
3356             if(remaining_cols <= SAO_SHIFT_CTB)
3357                 sao_blk_wd += remaining_cols;
3358 
3359             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3360             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3361 
3362             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3363 
3364             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3365                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3366             if(ps_sao_ctxt->i4_ctb_x > 0)
3367                 loop_filter_bit_pos -= 1;
3368 
3369             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3370                             (loop_filter_bit_pos >> 3);
3371 
3372             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3373                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3374             {
3375                 WORD32 tmp_wd = sao_blk_wd;
3376 
3377                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3378                                 (loop_filter_bit_pos & 7);
3379                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3380 
3381                 if(u4_no_loop_filter_flag)
3382                 {
3383                     while(tmp_wd > 0)
3384                     {
3385                         if(CTZ(u4_no_loop_filter_flag))
3386                         {
3387                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3388                             pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3390                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3391                         }
3392                         else
3393                         {
3394                             for(row = 0; row < min_cu; row++)
3395                             {
3396                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3397                                 {
3398                                     pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3399                                 }
3400                             }
3401                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3402                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3404                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3405                         }
3406                     }
3407 
3408                     pu1_src_tmp_luma -= sao_blk_wd;
3409                     pu1_src_backup_luma -= sao_blk_wd;
3410                 }
3411 
3412                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3413                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3414             }
3415         }
3416 
3417         /* Chroma */
3418         if(no_loop_filter_enabled_chroma)
3419         {
3420             UWORD32 u4_no_loop_filter_flag;
3421             WORD32 loop_filter_bit_pos;
3422             WORD32 log2_min_cu = 3;
3423             WORD32 min_cu = (1 << log2_min_cu);
3424             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3425             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3426             WORD32 sao_blk_wd = ctb_size;
3427             WORD32 remaining_rows;
3428             WORD32 remaining_cols;
3429 
3430             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3431             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3433                 sao_blk_ht += remaining_rows;
3434             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3435                 sao_blk_wd += remaining_cols;
3436 
3437             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3438             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3439 
3440             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3441 
3442             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3443                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3444             if(ps_sao_ctxt->i4_ctb_x > 0)
3445                 loop_filter_bit_pos -= 2;
3446 
3447             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3448                             (loop_filter_bit_pos >> 3);
3449 
3450             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3451                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3452             {
3453                 WORD32 tmp_wd = sao_blk_wd;
3454 
3455                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3456                                 (loop_filter_bit_pos & 7);
3457                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3458 
3459                 if(u4_no_loop_filter_flag)
3460                 {
3461                     while(tmp_wd > 0)
3462                     {
3463                         if(CTZ(u4_no_loop_filter_flag))
3464                         {
3465                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3466                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3468                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3469                         }
3470                         else
3471                         {
3472                             for(row = 0; row < min_cu / 2; row++)
3473                             {
3474                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3475                                 {
3476                                     pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3477                                 }
3478                             }
3479 
3480                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3481                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3483                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3484                         }
3485                     }
3486 
3487                     pu1_src_tmp_chroma -= sao_blk_wd;
3488                     pu1_src_backup_chroma -= sao_blk_wd;
3489                 }
3490 
3491                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3492                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3493             }
3494         }
3495     }
3496 
3497 }
3498 
3499