1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_sao.c
22 *
23 * @brief
24 * Contains function definitions for sample adaptive offset process
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 * None
33 *
34 *******************************************************************************
35 */
36
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77
78 #define SAO_SHIFT_CTB 8
79
80 /**
81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82 */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86 UWORD8 *pu1_src_luma;
87 UWORD8 *pu1_src_chroma;
88 WORD32 src_strd;
89 WORD32 ctb_size;
90 WORD32 log2_ctb_size;
91 sps_t *ps_sps;
92 sao_t *ps_sao;
93 WORD32 row, col;
94 UWORD8 au1_avail_luma[8];
95 UWORD8 au1_avail_chroma[8];
96 WORD32 i;
97 UWORD8 *pu1_src_top_luma;
98 UWORD8 *pu1_src_top_chroma;
99 UWORD8 *pu1_src_left_luma;
100 UWORD8 *pu1_src_left_chroma;
101 UWORD8 au1_src_top_right[2];
102 UWORD8 au1_src_bot_left[2];
103 UWORD8 *pu1_no_loop_filter_flag;
104 WORD32 loop_filter_strd;
105
106 /* Only first 5 values are used, but arrays are large
107 enough so that SIMD functions can read 64 bits at a time */
108 WORD8 ai1_offset_y[8] = {0};
109 WORD8 ai1_offset_cb[8] = {0};
110 WORD8 ai1_offset_cr[8] = {0};
111
112 PROFILE_DISABLE_SAO();
113
114 ps_sps = ps_sao_ctxt->ps_sps;
115 log2_ctb_size = ps_sps->i1_log2_ctb_size;
116 ctb_size = (1 << log2_ctb_size);
117 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120
121 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123
124 /* Current CTB */
125 {
126 WORD32 sao_wd_luma;
127 WORD32 sao_wd_chroma;
128 WORD32 sao_ht_luma;
129 WORD32 sao_ht_chroma;
130
131 WORD32 remaining_rows;
132 WORD32 remaining_cols;
133
134 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135 sao_wd_luma = MIN(ctb_size, remaining_cols);
136 sao_wd_chroma = MIN(ctb_size, remaining_cols);
137
138 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139 sao_ht_luma = MIN(ctb_size, remaining_rows);
140 sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141
142 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146
147 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148 ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149 ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150
151 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155
156 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160
161 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165
166 for(i = 0; i < 8; i++)
167 {
168 au1_avail_luma[i] = 255;
169 au1_avail_chroma[i] = 255;
170 }
171
172
173 if(0 == ps_sao_ctxt->i4_ctb_x)
174 {
175 au1_avail_luma[0] = 0;
176 au1_avail_luma[4] = 0;
177 au1_avail_luma[6] = 0;
178
179 au1_avail_chroma[0] = 0;
180 au1_avail_chroma[4] = 0;
181 au1_avail_chroma[6] = 0;
182 }
183
184 if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185 {
186 au1_avail_luma[1] = 0;
187 au1_avail_luma[5] = 0;
188 au1_avail_luma[7] = 0;
189
190 au1_avail_chroma[1] = 0;
191 au1_avail_chroma[5] = 0;
192 au1_avail_chroma[7] = 0;
193 }
194
195 if(0 == ps_sao_ctxt->i4_ctb_y)
196 {
197 au1_avail_luma[2] = 0;
198 au1_avail_luma[4] = 0;
199 au1_avail_luma[5] = 0;
200
201 au1_avail_chroma[2] = 0;
202 au1_avail_chroma[4] = 0;
203 au1_avail_chroma[5] = 0;
204 }
205
206 if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207 {
208 au1_avail_luma[3] = 0;
209 au1_avail_luma[6] = 0;
210 au1_avail_luma[7] = 0;
211
212 au1_avail_chroma[3] = 0;
213 au1_avail_chroma[6] = 0;
214 au1_avail_chroma[7] = 0;
215 }
216
217
218 if(0 == ps_sao->b3_y_type_idx)
219 {
220 /* Update left, top and top-left */
221 for(row = 0; row < sao_ht_luma; row++)
222 {
223 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224 }
225 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226
227 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228
229 }
230 else
231 {
232 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234 WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235 WORD32 no_loop_filter_enabled = 0;
236
237 /* Check the loop filter flags and copy the original values for back up */
238 {
239 UWORD32 u4_no_loop_filter_flag;
240 WORD32 min_cu = 8;
241 UWORD8 *pu1_src_tmp = pu1_src_luma;
242
243 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244 {
245 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246 ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248
249 if(u4_no_loop_filter_flag)
250 {
251 WORD32 tmp_wd = sao_wd_luma;
252 no_loop_filter_enabled = 1;
253 while(tmp_wd > 0)
254 {
255 if(CTZ(u4_no_loop_filter_flag))
256 {
257 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
258 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261 }
262 else
263 {
264 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265 {
266 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267 {
268 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269 }
270 }
271
272 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
273 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276 }
277 }
278
279 pu1_src_tmp -= sao_wd_luma;
280 }
281
282 pu1_src_tmp += min_cu * src_strd;
283 pu1_src_copy += min_cu * tmp_strd;
284 }
285 }
286
287 if(1 == ps_sao->b3_y_type_idx)
288 {
289 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290 src_strd,
291 pu1_src_left_luma,
292 pu1_src_top_luma,
293 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294 ps_sao->b5_y_band_pos,
295 ai1_offset_y,
296 sao_wd_luma,
297 sao_ht_luma);
298 }
299 else // if(2 <= ps_sao->b3_y_type_idx)
300 {
301 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304 src_strd,
305 pu1_src_left_luma,
306 pu1_src_top_luma,
307 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308 au1_src_top_right,
309 au1_src_bot_left,
310 au1_avail_luma,
311 ai1_offset_y,
312 sao_wd_luma,
313 sao_ht_luma);
314 }
315
316 /* Check the loop filter flags and copy the original values back if they are set */
317 if(no_loop_filter_enabled)
318 {
319 UWORD32 u4_no_loop_filter_flag;
320 WORD32 min_cu = 8;
321 UWORD8 *pu1_src_tmp = pu1_src_luma;
322
323 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324 {
325 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327
328 if(u4_no_loop_filter_flag)
329 {
330 WORD32 tmp_wd = sao_wd_luma;
331 while(tmp_wd > 0)
332 {
333 if(CTZ(u4_no_loop_filter_flag))
334 {
335 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
336 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339 }
340 else
341 {
342 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343 {
344 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345 {
346 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347 }
348 }
349
350 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
351 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354 }
355 }
356
357 pu1_src_tmp -= sao_wd_luma;
358 }
359
360 pu1_src_tmp += min_cu * src_strd;
361 pu1_src_copy += min_cu * tmp_strd;
362 }
363 }
364
365 }
366
367 if(0 == ps_sao->b3_cb_type_idx)
368 {
369 for(row = 0; row < sao_ht_chroma; row++)
370 {
371 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373 }
374 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376
377 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378 }
379 else
380 {
381 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383 WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384 WORD32 no_loop_filter_enabled = 0;
385
386 /* Check the loop filter flags and copy the original values for back up */
387 {
388 UWORD32 u4_no_loop_filter_flag;
389 WORD32 min_cu = 4;
390 UWORD8 *pu1_src_tmp = pu1_src_chroma;
391
392 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393 {
394 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396
397 if(u4_no_loop_filter_flag)
398 {
399 WORD32 tmp_wd = sao_wd_chroma;
400 no_loop_filter_enabled = 1;
401 while(tmp_wd > 0)
402 {
403 if(CTZ(u4_no_loop_filter_flag))
404 {
405 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
406 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409 }
410 else
411 {
412 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413 {
414 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415 {
416 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417 }
418 }
419
420 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
421 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424 }
425 }
426
427 pu1_src_tmp -= sao_wd_chroma;
428 }
429
430 pu1_src_tmp += min_cu * src_strd;
431 pu1_src_copy += min_cu * tmp_strd;
432 }
433 }
434
435 if(1 == ps_sao->b3_cb_type_idx)
436 {
437 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438 src_strd,
439 pu1_src_left_chroma,
440 pu1_src_top_chroma,
441 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442 ps_sao->b5_cb_band_pos,
443 ps_sao->b5_cr_band_pos,
444 ai1_offset_cb,
445 ai1_offset_cr,
446 sao_wd_chroma,
447 sao_ht_chroma
448 );
449 }
450 else // if(2 <= ps_sao->b3_cb_type_idx)
451 {
452 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457 src_strd,
458 pu1_src_left_chroma,
459 pu1_src_top_chroma,
460 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461 au1_src_top_right,
462 au1_src_bot_left,
463 au1_avail_chroma,
464 ai1_offset_cb,
465 ai1_offset_cr,
466 sao_wd_chroma,
467 sao_ht_chroma);
468 }
469
470 /* Check the loop filter flags and copy the original values back if they are set */
471 if(no_loop_filter_enabled)
472 {
473 UWORD32 u4_no_loop_filter_flag;
474 WORD32 min_cu = 4;
475 UWORD8 *pu1_src_tmp = pu1_src_chroma;
476
477 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478 {
479 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481
482 if(u4_no_loop_filter_flag)
483 {
484 WORD32 tmp_wd = sao_wd_chroma;
485 while(tmp_wd > 0)
486 {
487 if(CTZ(u4_no_loop_filter_flag))
488 {
489 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
490 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493 }
494 else
495 {
496 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497 {
498 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499 {
500 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501 }
502 }
503
504 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
505 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508 }
509 }
510
511 pu1_src_tmp -= sao_wd_chroma;
512 }
513
514 pu1_src_tmp += min_cu * src_strd;
515 pu1_src_copy += min_cu * tmp_strd;
516 }
517 }
518
519 }
520
521 }
522 }
523
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)524 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525 {
526 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527 UWORD8 *pu1_src_luma;
528 UWORD8 *pu1_src_chroma;
529 WORD32 src_strd;
530 WORD32 ctb_size;
531 WORD32 log2_ctb_size;
532 sps_t *ps_sps;
533 sao_t *ps_sao;
534 pps_t *ps_pps;
535 slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536 tile_t *ps_tile;
537 UWORD16 *pu1_slice_idx;
538 UWORD16 *pu1_tile_idx;
539 WORD32 row, col;
540 UWORD8 au1_avail_luma[8];
541 UWORD8 au1_avail_chroma[8];
542 UWORD8 au1_tile_slice_boundary[8];
543 UWORD8 au4_ilf_across_tile_slice_enable[8];
544 WORD32 i;
545 UWORD8 *pu1_src_top_luma;
546 UWORD8 *pu1_src_top_chroma;
547 UWORD8 *pu1_src_left_luma;
548 UWORD8 *pu1_src_left_chroma;
549 UWORD8 au1_src_top_right[2];
550 UWORD8 au1_src_bot_left[2];
551 UWORD8 *pu1_no_loop_filter_flag;
552 UWORD8 *pu1_src_backup_luma;
553 UWORD8 *pu1_src_backup_chroma;
554 WORD32 backup_strd;
555 WORD32 loop_filter_strd;
556
557 WORD32 no_loop_filter_enabled_luma = 0;
558 WORD32 no_loop_filter_enabled_chroma = 0;
559 UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560 UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561 UWORD8 *pu1_sao_src_luma_top_left_ctb;
562 UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563 UWORD8 *pu1_sao_src_top_left_luma_top_right;
564 UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565 UWORD8 u1_sao_src_top_left_luma_bot_left;
566 UWORD8 *pu1_sao_src_top_left_luma_bot_left;
567 UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568 UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569 /* Only first 5 values are used, but arrays are large
570 enough so that SIMD functions can read 64 bits at a time */
571 WORD8 ai1_offset_y[8] = {0};
572 WORD8 ai1_offset_cb[8] = {0};
573 WORD8 ai1_offset_cr[8] = {0};
574 WORD32 chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575
576 PROFILE_DISABLE_SAO();
577
578 ps_sps = ps_sao_ctxt->ps_sps;
579 ps_pps = ps_sao_ctxt->ps_pps;
580 ps_tile = ps_sao_ctxt->ps_tile;
581
582 log2_ctb_size = ps_sps->i1_log2_ctb_size;
583 ctb_size = (1 << log2_ctb_size);
584 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585 ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586 ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587
588 pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589 pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592
593 /*Stores the left value for each row ctbs- Needed for column tiles*/
594 pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595 pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596 pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597 pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598 u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599 pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600 au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601 pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602 pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603 pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604
605 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607 backup_strd = 2 * MAX_CTB_SIZE;
608
609 DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610
611 {
612 /* Check the loop filter flags and copy the original values for back up */
613 /* Luma */
614
615 /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616 * can belong to different slice with their own sao_enable flag */
617 {
618 UWORD32 u4_no_loop_filter_flag;
619 WORD32 loop_filter_bit_pos;
620 WORD32 log2_min_cu = 3;
621 WORD32 min_cu = (1 << log2_min_cu);
622 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624 WORD32 sao_blk_wd = ctb_size;
625 WORD32 remaining_rows;
626 WORD32 remaining_cols;
627
628 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630 if(remaining_rows <= SAO_SHIFT_CTB)
631 sao_blk_ht += remaining_rows;
632 if(remaining_cols <= SAO_SHIFT_CTB)
633 sao_blk_wd += remaining_cols;
634
635 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637
638 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639
640 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642 if(ps_sao_ctxt->i4_ctb_x > 0)
643 loop_filter_bit_pos -= 1;
644
645 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646 (loop_filter_bit_pos >> 3);
647
648 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650 {
651 WORD32 tmp_wd = sao_blk_wd;
652
653 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654 (loop_filter_bit_pos & 7);
655 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656
657 if(u4_no_loop_filter_flag)
658 {
659 no_loop_filter_enabled_luma = 1;
660 while(tmp_wd > 0)
661 {
662 if(CTZ(u4_no_loop_filter_flag))
663 {
664 pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665 pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
668 }
669 else
670 {
671 for(row = 0; row < min_cu; row++)
672 {
673 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674 {
675 pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676 }
677 }
678 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
682 }
683 }
684
685 pu1_src_tmp_luma -= sao_blk_wd;
686 pu1_src_backup_luma -= sao_blk_wd;
687 }
688
689 pu1_src_tmp_luma += (src_strd << log2_min_cu);
690 pu1_src_backup_luma += (backup_strd << log2_min_cu);
691 }
692 }
693
694 /* Chroma */
695
696 {
697 UWORD32 u4_no_loop_filter_flag;
698 WORD32 loop_filter_bit_pos;
699 WORD32 log2_min_cu = 3;
700 WORD32 min_cu = (1 << log2_min_cu);
701 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
702 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
703 WORD32 sao_blk_wd = ctb_size;
704 WORD32 remaining_rows;
705 WORD32 remaining_cols;
706
707 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
708 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
710 sao_blk_ht += remaining_rows;
711 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
712 sao_blk_wd += remaining_cols;
713
714 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
715 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
716
717 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
718
719 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
720 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
721 if(ps_sao_ctxt->i4_ctb_x > 0)
722 loop_filter_bit_pos -= 2;
723
724 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
725 (loop_filter_bit_pos >> 3);
726
727 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
728 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
729 {
730 WORD32 tmp_wd = sao_blk_wd;
731
732 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
733 (loop_filter_bit_pos & 7);
734 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
735
736 if(u4_no_loop_filter_flag)
737 {
738 no_loop_filter_enabled_chroma = 1;
739 while(tmp_wd > 0)
740 {
741 if(CTZ(u4_no_loop_filter_flag))
742 {
743 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
744 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
746 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
747 }
748 else
749 {
750 for(row = 0; row < min_cu / 2; row++)
751 {
752 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
753 {
754 pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
755 }
756 }
757
758 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
759 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
761 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
762 }
763 }
764
765 pu1_src_tmp_chroma -= sao_blk_wd;
766 pu1_src_backup_chroma -= sao_blk_wd;
767 }
768
769 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
770 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
771 }
772 }
773 }
774
775 DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
776
777 /* Top-left CTB */
778 if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
779 {
780 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
781 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
782 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
783 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
784
785 WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
786 WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
787 WORD32 au4_idx_tl[8], idx_tl;
788
789 slice_header_t *ps_slice_hdr_top_left;
790 {
791 WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
792 (ps_sao_ctxt->i4_ctb_x - 1);
793 ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
794 }
795
796
797 pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
798 pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
799 ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
800 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
801 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
802 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
803 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
804
805 if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
806 {
807 if(0 == ps_sao->b3_y_type_idx)
808 {
809 /* Update left, top and top-left */
810 for(row = 0; row < sao_ht_luma; row++)
811 {
812 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
813 }
814 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
815
816 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
817
818
819 }
820
821 else if(1 == ps_sao->b3_y_type_idx)
822 {
823 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
824 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
825 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
826 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
827
828 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
829 src_strd,
830 pu1_src_left_luma,
831 pu1_src_top_luma,
832 pu1_sao_src_luma_top_left_ctb,
833 ps_sao->b5_y_band_pos,
834 ai1_offset_y,
835 sao_wd_luma,
836 sao_ht_luma
837 );
838 }
839
840 else // if(2 <= ps_sao->b3_y_type_idx)
841 {
842 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
843 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
844 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
845 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
846
847 for(i = 0; i < 8; i++)
848 {
849 au1_avail_luma[i] = 255;
850 au1_tile_slice_boundary[i] = 0;
851 au4_idx_tl[i] = 0;
852 au4_ilf_across_tile_slice_enable[i] = 1;
853 }
854
855 /******************************************************************
856 * Derive the Top-left CTB's neighbor pixel's slice indices.
857 *
858 * TL_T
859 * 4 _2__5________
860 * 0 | | |
861 * TL_L | TL | 1 TL_R|
862 * |____|_______|____
863 * 6|TL_D|7 | |
864 * | 3 | | |
865 * |____|_______| |
866 * | |
867 * | |
868 * |____________|
869 *
870 *****************************************************************/
871
872 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
873 {
874 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
875 {
876 {
877 /*Assuming that sao shift is uniform along x and y directions*/
878 if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
879 {
880 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
881 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
882 }
883 else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
884 {
885 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
886 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
887 }
888 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
889 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
890
891 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
892 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
893
894 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
895 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
896
897 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
898 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
899 }
900
901 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
902 {
903 /*Calculate slice indices for neighbor pixels*/
904 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
905 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
906 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
907 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
908 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
909 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910
911 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
912 {
913 if(ps_sao_ctxt->i4_ctb_x == 1)
914 {
915 au4_idx_tl[6] = -1;
916 au4_idx_tl[4] = -1;
917 }
918 else
919 {
920 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
921 }
922 if(ps_sao_ctxt->i4_ctb_y == 1)
923 {
924 au4_idx_tl[5] = -1;
925 au4_idx_tl[4] = -1;
926 }
927 else
928 {
929 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
930 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
931 }
932 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
933 }
934
935 /* Verify that the neighbor ctbs dont cross pic boundary.
936 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
937 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
938 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
939 * the respective pixel's flags are checked
940 */
941
942 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
943 {
944 au4_ilf_across_tile_slice_enable[4] = 0;
945 au4_ilf_across_tile_slice_enable[6] = 0;
946 }
947 else
948 {
949 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
950 }
951 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
952 {
953 au4_ilf_across_tile_slice_enable[5] = 0;
954 au4_ilf_across_tile_slice_enable[4] = 0;
955 }
956 else
957 {
958 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960 }
961 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
962 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
964 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
965 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
966
967 if(au4_idx_tl[5] > idx_tl)
968 {
969 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
970 }
971
972 /*
973 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
974 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
975 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
976 * the respective pixel's flags are checked
977 */
978 for(i = 0; i < 8; i++)
979 {
980 /*Sets the edges that lie on the slice/tile boundary*/
981 if(au4_idx_tl[i] != idx_tl)
982 {
983 au1_tile_slice_boundary[i] = 1;
984 }
985 else
986 {
987 au4_ilf_across_tile_slice_enable[i] = 1;
988 }
989 }
990
991 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
992 }
993
994 if(ps_pps->i1_tiles_enabled_flag)
995 {
996 /* Calculate availability flags at slice boundary */
997 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
998 {
999 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1000 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1001 {
1002 /*Set the boundary arrays*/
1003 /*Calculate tile indices for neighbor pixels*/
1004 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1005 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1006 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1007 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1008 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1009 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010
1011 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1012 {
1013 if(ps_sao_ctxt->i4_ctb_x == 1)
1014 {
1015 au4_idx_tl[6] = -1;
1016 au4_idx_tl[4] = -1;
1017 }
1018 else
1019 {
1020 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1021 }
1022 if(ps_sao_ctxt->i4_ctb_y == 1)
1023 {
1024 au4_idx_tl[5] = -1;
1025 au4_idx_tl[4] = -1;
1026 }
1027 else
1028 {
1029 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1030 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1031 }
1032 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1033 }
1034 for(i = 0; i < 8; i++)
1035 {
1036 /*Sets the edges that lie on the tile boundary*/
1037 if(au4_idx_tl[i] != idx_tl)
1038 {
1039 au1_tile_slice_boundary[i] |= 1;
1040 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1041 }
1042 }
1043 }
1044 }
1045 }
1046
1047
1048 /*Set availability flags based on tile and slice boundaries*/
1049 for(i = 0; i < 8; i++)
1050 {
1051 /*Sets the edges that lie on the slice/tile boundary*/
1052 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1053 {
1054 au1_avail_luma[i] = 0;
1055 }
1056 }
1057 }
1058 }
1059
1060 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1061 {
1062 au1_avail_luma[0] = 0;
1063 au1_avail_luma[4] = 0;
1064 au1_avail_luma[6] = 0;
1065 }
1066
1067 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1068 {
1069 au1_avail_luma[1] = 0;
1070 au1_avail_luma[5] = 0;
1071 au1_avail_luma[7] = 0;
1072 }
1073 //y==1 case
1074 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1075 {
1076 au1_avail_luma[2] = 0;
1077 au1_avail_luma[4] = 0;
1078 au1_avail_luma[5] = 0;
1079 }
1080 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1081 {
1082 au1_avail_luma[3] = 0;
1083 au1_avail_luma[6] = 0;
1084 au1_avail_luma[7] = 0;
1085 }
1086
1087 {
1088 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1089 u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1090 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1091 src_strd,
1092 pu1_src_left_luma,
1093 pu1_src_top_luma,
1094 pu1_sao_src_luma_top_left_ctb,
1095 au1_src_top_right,
1096 &u1_sao_src_top_left_luma_bot_left,
1097 au1_avail_luma,
1098 ai1_offset_y,
1099 sao_wd_luma,
1100 sao_ht_luma);
1101 }
1102 }
1103
1104 }
1105 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1106 {
1107 /* Update left, top and top-left */
1108 for(row = 0; row < sao_ht_luma; row++)
1109 {
1110 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1111 }
1112 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1113
1114 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1115 }
1116
1117 if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1118 {
1119 if(0 == ps_sao->b3_cb_type_idx)
1120 {
1121 for(row = 0; row < sao_ht_chroma; row++)
1122 {
1123 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1124 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1125 }
1126 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1127 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1128
1129 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1130
1131 }
1132
1133 else if(1 == ps_sao->b3_cb_type_idx)
1134 {
1135 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1136 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1137 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1138 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1139
1140 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1141 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1142 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1143 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1144
1145 if(chroma_yuv420sp_vu)
1146 {
1147 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1148 src_strd,
1149 pu1_src_left_chroma,
1150 pu1_src_top_chroma,
1151 pu1_sao_src_chroma_top_left_ctb,
1152 ps_sao->b5_cr_band_pos,
1153 ps_sao->b5_cb_band_pos,
1154 ai1_offset_cr,
1155 ai1_offset_cb,
1156 sao_wd_chroma,
1157 sao_ht_chroma
1158 );
1159 }
1160 else
1161 {
1162 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1163 src_strd,
1164 pu1_src_left_chroma,
1165 pu1_src_top_chroma,
1166 pu1_sao_src_chroma_top_left_ctb,
1167 ps_sao->b5_cb_band_pos,
1168 ps_sao->b5_cr_band_pos,
1169 ai1_offset_cb,
1170 ai1_offset_cr,
1171 sao_wd_chroma,
1172 sao_ht_chroma
1173 );
1174 }
1175 }
1176
1177 else // if(2 <= ps_sao->b3_cb_type_idx)
1178 {
1179 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1180 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1181 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1182 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1183
1184 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1185 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1186 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1187 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1188 for(i = 0; i < 8; i++)
1189 {
1190 au1_avail_chroma[i] = 255;
1191 au1_tile_slice_boundary[i] = 0;
1192 au4_idx_tl[i] = 0;
1193 au4_ilf_across_tile_slice_enable[i] = 1;
1194 }
1195 /*In case of slices*/
1196 {
1197 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1198 {
1199 if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1200 {
1201 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1202 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1203 }
1204 else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1205 {
1206 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1207 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1208 }
1209 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1210 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1211
1212 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1213 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1214
1215 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
1216 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
1217
1218 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1219 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1220
1221 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1222 {
1223
1224 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1225 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1226 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1227 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1228 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1229 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230
1231 if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1232 {
1233 if(ps_sao_ctxt->i4_ctb_x == 1)
1234 {
1235 au4_idx_tl[6] = -1;
1236 au4_idx_tl[4] = -1;
1237 }
1238 else
1239 {
1240 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1241 }
1242 if(ps_sao_ctxt->i4_ctb_y == 1)
1243 {
1244 au4_idx_tl[5] = -1;
1245 au4_idx_tl[4] = -1;
1246 }
1247 else
1248 {
1249 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1250 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1251 }
1252 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1253 }
1254
1255 /* Verify that the neighbor ctbs don't cross pic boundary
1256 * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1257 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1258 {
1259 au4_ilf_across_tile_slice_enable[4] = 0;
1260 au4_ilf_across_tile_slice_enable[6] = 0;
1261 }
1262 else
1263 {
1264 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1265 }
1266 if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1267 {
1268 au4_ilf_across_tile_slice_enable[5] = 0;
1269 au4_ilf_across_tile_slice_enable[4] = 0;
1270 }
1271 else
1272 {
1273 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1274 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1275 }
1276 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1277 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1279 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1280 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1281 /*
1282 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1283 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1284 */
1285 for(i = 0; i < 8; i++)
1286 {
1287 /*Sets the edges that lie on the slice/tile boundary*/
1288 if(au4_idx_tl[i] != idx_tl)
1289 {
1290 au1_tile_slice_boundary[i] = 1;
1291 }
1292 else
1293 {
1294 au4_ilf_across_tile_slice_enable[i] = 1;
1295 }
1296 }
1297
1298 /*Reset indices*/
1299 for(i = 0; i < 8; i++)
1300 {
1301 au4_idx_tl[i] = 0;
1302 }
1303 }
1304 if(ps_pps->i1_tiles_enabled_flag)
1305 {
1306 /* Calculate availability flags at slice boundary */
1307 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1308 {
1309 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1310 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1311 {
1312 /*Set the boundary arrays*/
1313 /*Calculate tile indices for neighbor pixels*/
1314 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1315 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1316 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1317 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1318 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1319 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320
1321 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1322 {
1323 if(ps_sao_ctxt->i4_ctb_x == 1)
1324 {
1325 au4_idx_tl[6] = -1;
1326 au4_idx_tl[4] = -1;
1327 }
1328 else
1329 {
1330 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1331 }
1332 if(ps_sao_ctxt->i4_ctb_y == 1)
1333 {
1334 au4_idx_tl[5] = -1;
1335 au4_idx_tl[4] = -1;
1336 }
1337 else
1338 {
1339 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1340 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1341 }
1342 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1343 }
1344 for(i = 0; i < 8; i++)
1345 {
1346 /*Sets the edges that lie on the tile boundary*/
1347 if(au4_idx_tl[i] != idx_tl)
1348 {
1349 au1_tile_slice_boundary[i] |= 1;
1350 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1351 }
1352 }
1353 }
1354 }
1355 }
1356
1357 for(i = 0; i < 8; i++)
1358 {
1359 /*Sets the edges that lie on the slice/tile boundary*/
1360 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1361 {
1362 au1_avail_chroma[i] = 0;
1363 }
1364 }
1365 }
1366 }
1367
1368 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1369 {
1370 au1_avail_chroma[0] = 0;
1371 au1_avail_chroma[4] = 0;
1372 au1_avail_chroma[6] = 0;
1373 }
1374 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1375 {
1376 au1_avail_chroma[1] = 0;
1377 au1_avail_chroma[5] = 0;
1378 au1_avail_chroma[7] = 0;
1379 }
1380
1381 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1382 {
1383 au1_avail_chroma[2] = 0;
1384 au1_avail_chroma[4] = 0;
1385 au1_avail_chroma[5] = 0;
1386 }
1387 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1388 {
1389 au1_avail_chroma[3] = 0;
1390 au1_avail_chroma[6] = 0;
1391 au1_avail_chroma[7] = 0;
1392 }
1393
1394 {
1395 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1396 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1397 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1398 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1399 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1400 {
1401 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1402 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1403 }
1404
1405 if(chroma_yuv420sp_vu)
1406 {
1407 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1408 src_strd,
1409 pu1_src_left_chroma,
1410 pu1_src_top_chroma,
1411 pu1_sao_src_chroma_top_left_ctb,
1412 au1_src_top_right,
1413 au1_sao_src_top_left_chroma_bot_left,
1414 au1_avail_chroma,
1415 ai1_offset_cr,
1416 ai1_offset_cb,
1417 sao_wd_chroma,
1418 sao_ht_chroma);
1419 }
1420 else
1421 {
1422 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1423 src_strd,
1424 pu1_src_left_chroma,
1425 pu1_src_top_chroma,
1426 pu1_sao_src_chroma_top_left_ctb,
1427 au1_src_top_right,
1428 au1_sao_src_top_left_chroma_bot_left,
1429 au1_avail_chroma,
1430 ai1_offset_cb,
1431 ai1_offset_cr,
1432 sao_wd_chroma,
1433 sao_ht_chroma);
1434 }
1435 }
1436 }
1437 }
1438 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1439 {
1440 for(row = 0; row < sao_ht_chroma; row++)
1441 {
1442 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1443 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1444 }
1445 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1446 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1447
1448 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1449 }
1450
1451 pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1452 pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1453 ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1454 }
1455
1456
1457 /* Top CTB */
1458 if((ps_sao_ctxt->i4_ctb_y > 0))
1459 {
1460 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1461 WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1462 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1463 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1464
1465 WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1466 WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1467 WORD32 au4_idx_t[8], idx_t;
1468
1469 WORD32 remaining_cols;
1470
1471 slice_header_t *ps_slice_hdr_top;
1472 {
1473 WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1474 (ps_sao_ctxt->i4_ctb_x);
1475 ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1476 }
1477
1478 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1479 if(remaining_cols <= SAO_SHIFT_CTB)
1480 {
1481 sao_wd_luma += remaining_cols;
1482 }
1483 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1484 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1485 {
1486 sao_wd_chroma += remaining_cols;
1487 }
1488
1489 pu1_src_luma -= (sao_ht_luma * src_strd);
1490 pu1_src_chroma -= (sao_ht_chroma * src_strd);
1491 ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1492 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1493 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1495 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1496
1497 if(0 != sao_wd_luma)
1498 {
1499 if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1500 {
1501 if(0 == ps_sao->b3_y_type_idx)
1502 {
1503 /* Update left, top and top-left */
1504 for(row = 0; row < sao_ht_luma; row++)
1505 {
1506 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1507 }
1508 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1509
1510 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1511
1512 }
1513
1514 else if(1 == ps_sao->b3_y_type_idx)
1515 {
1516 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1517 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1518 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1519 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1520
1521 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1522 src_strd,
1523 pu1_src_left_luma,
1524 pu1_src_top_luma,
1525 pu1_sao_src_luma_top_left_ctb,
1526 ps_sao->b5_y_band_pos,
1527 ai1_offset_y,
1528 sao_wd_luma,
1529 sao_ht_luma
1530 );
1531 }
1532
1533 else // if(2 <= ps_sao->b3_y_type_idx)
1534 {
1535 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1536 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1537 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1538 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1539
1540 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1541 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1542 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1543
1544 for(i = 0; i < 8; i++)
1545 {
1546
1547 au4_ilf_across_tile_slice_enable[i] = 1;
1548 }
1549 /******************************************************************
1550 * Derive the Top-left CTB's neighbor pixel's slice indices.
1551 *
1552 * T_T
1553 * ____________
1554 * | | |
1555 * | T_L| T |T_R
1556 * | | ______|____
1557 * | | T_D | |
1558 * | | | |
1559 * |____|_______| |
1560 * | |
1561 * | |
1562 * |____________|
1563 *
1564 *****************************************************************/
1565
1566 /*In case of slices*/
1567 {
1568 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1569 {
1570
1571 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1572 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1573
1574 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1575 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1576
1577 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1578 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1579
1580 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1581 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1582
1583 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1584 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1585
1586 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1587 {
1588 /*Calculate neighbor ctb slice indices*/
1589 if(0 == ps_sao_ctxt->i4_ctb_x)
1590 {
1591 au4_idx_t[0] = -1;
1592 au4_idx_t[6] = -1;
1593 au4_idx_t[4] = -1;
1594 }
1595 else
1596 {
1597 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1598 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1599 }
1600 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1601 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1602 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1603 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604
1605 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1606 if(0 == ps_sao_ctxt->i4_ctb_x)
1607 {
1608 au4_ilf_across_tile_slice_enable[4] = 0;
1609 au4_ilf_across_tile_slice_enable[6] = 0;
1610 au4_ilf_across_tile_slice_enable[0] = 0;
1611 }
1612 else
1613 {
1614 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1615 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1616 }
1617
1618
1619
1620 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1623 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1624 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1625
1626 if(au4_idx_t[6] < idx_t)
1627 {
1628 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1629 }
1630
1631 /*
1632 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1633 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1634 */
1635
1636 for(i = 0; i < 8; i++)
1637 {
1638 /*Sets the edges that lie on the slice/tile boundary*/
1639 if(au4_idx_t[i] != idx_t)
1640 {
1641 au1_tile_slice_boundary[i] = 1;
1642 /*Check for slice flag at such boundaries*/
1643 }
1644 else
1645 {
1646 au4_ilf_across_tile_slice_enable[i] = 1;
1647 }
1648 }
1649 /*Reset indices*/
1650 for(i = 0; i < 8; i++)
1651 {
1652 au4_idx_t[i] = 0;
1653 }
1654 }
1655
1656 if(ps_pps->i1_tiles_enabled_flag)
1657 {
1658 /* Calculate availability flags at slice boundary */
1659 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1660 {
1661 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1662 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1663 {
1664 /*Calculate neighbor ctb slice indices*/
1665 if(0 == ps_sao_ctxt->i4_ctb_x)
1666 {
1667 au4_idx_t[0] = -1;
1668 au4_idx_t[6] = -1;
1669 au4_idx_t[4] = -1;
1670 }
1671 else
1672 {
1673 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1674 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1675 }
1676 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1677 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1678 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1679 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680
1681 for(i = 0; i < 8; i++)
1682 {
1683 /*Sets the edges that lie on the tile boundary*/
1684 if(au4_idx_t[i] != idx_t)
1685 {
1686 au1_tile_slice_boundary[i] |= 1;
1687 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1688 }
1689 }
1690 }
1691 }
1692 }
1693
1694 for(i = 0; i < 8; i++)
1695 {
1696 /*Sets the edges that lie on the slice/tile boundary*/
1697 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1698 {
1699 au1_avail_luma[i] = 0;
1700 }
1701 }
1702 }
1703 }
1704
1705
1706 if(0 == ps_sao_ctxt->i4_ctb_x)
1707 {
1708 au1_avail_luma[0] = 0;
1709 au1_avail_luma[4] = 0;
1710 au1_avail_luma[6] = 0;
1711 }
1712
1713 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1714 {
1715 au1_avail_luma[1] = 0;
1716 au1_avail_luma[5] = 0;
1717 au1_avail_luma[7] = 0;
1718 }
1719
1720 if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1721 {
1722 au1_avail_luma[2] = 0;
1723 au1_avail_luma[4] = 0;
1724 au1_avail_luma[5] = 0;
1725 }
1726
1727 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1728 {
1729 au1_avail_luma[3] = 0;
1730 au1_avail_luma[6] = 0;
1731 au1_avail_luma[7] = 0;
1732 }
1733
1734 {
1735 au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1736 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1737 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1738 src_strd,
1739 pu1_src_left_luma,
1740 pu1_src_top_luma,
1741 pu1_sao_src_luma_top_left_ctb,
1742 au1_src_top_right,
1743 &u1_sao_src_top_left_luma_bot_left,
1744 au1_avail_luma,
1745 ai1_offset_y,
1746 sao_wd_luma,
1747 sao_ht_luma);
1748 }
1749 }
1750 }
1751 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1752 {
1753 /* Update left, top and top-left */
1754 for(row = 0; row < sao_ht_luma; row++)
1755 {
1756 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1757 }
1758 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1759
1760 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1761 }
1762 }
1763
1764 if(0 != sao_wd_chroma)
1765 {
1766 if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1767 {
1768 if(0 == ps_sao->b3_cb_type_idx)
1769 {
1770
1771 for(row = 0; row < sao_ht_chroma; row++)
1772 {
1773 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1774 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1775 }
1776 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1777 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1778
1779 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1780
1781 }
1782
1783 else if(1 == ps_sao->b3_cb_type_idx)
1784 {
1785 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1786 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1787 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1788 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1789
1790 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1791 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1792 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1793 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1794
1795 if(chroma_yuv420sp_vu)
1796 {
1797 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1798 src_strd,
1799 pu1_src_left_chroma,
1800 pu1_src_top_chroma,
1801 pu1_sao_src_chroma_top_left_ctb,
1802 ps_sao->b5_cr_band_pos,
1803 ps_sao->b5_cb_band_pos,
1804 ai1_offset_cr,
1805 ai1_offset_cb,
1806 sao_wd_chroma,
1807 sao_ht_chroma
1808 );
1809 }
1810 else
1811 {
1812 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1813 src_strd,
1814 pu1_src_left_chroma,
1815 pu1_src_top_chroma,
1816 pu1_sao_src_chroma_top_left_ctb,
1817 ps_sao->b5_cb_band_pos,
1818 ps_sao->b5_cr_band_pos,
1819 ai1_offset_cb,
1820 ai1_offset_cr,
1821 sao_wd_chroma,
1822 sao_ht_chroma
1823 );
1824 }
1825 }
1826 else // if(2 <= ps_sao->b3_cb_type_idx)
1827 {
1828 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1829 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1830 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1831 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1832
1833 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1834 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1835 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1836 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1837
1838 for(i = 0; i < 8; i++)
1839 {
1840 au1_avail_chroma[i] = 255;
1841 au1_tile_slice_boundary[i] = 0;
1842 au4_idx_t[i] = 0;
1843 au4_ilf_across_tile_slice_enable[i] = 1;
1844 }
1845
1846 {
1847 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1848 {
1849 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1850 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1851
1852 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1853 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1854
1855 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1856 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1857
1858 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1859 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1860
1861 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1862 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1863
1864 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1865 {
1866 if(0 == ps_sao_ctxt->i4_ctb_x)
1867 {
1868 au4_idx_t[0] = -1;
1869 au4_idx_t[6] = -1;
1870 au4_idx_t[4] = -1;
1871 }
1872 else
1873 {
1874 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1875 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1876 }
1877 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1878 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1879 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1880 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881
1882 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1883
1884 if(0 == ps_sao_ctxt->i4_ctb_x)
1885 {
1886 au4_ilf_across_tile_slice_enable[4] = 0;
1887 au4_ilf_across_tile_slice_enable[6] = 0;
1888 au4_ilf_across_tile_slice_enable[0] = 0;
1889 }
1890 else
1891 {
1892 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1893 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1894 }
1895
1896 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1897 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1899 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1900 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1901
1902 if(idx_t > au4_idx_t[6])
1903 {
1904 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1905 }
1906
1907 /*
1908 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1909 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1910 */
1911 for(i = 0; i < 8; i++)
1912 {
1913 /*Sets the edges that lie on the slice/tile boundary*/
1914 if(au4_idx_t[i] != idx_t)
1915 {
1916 au1_tile_slice_boundary[i] = 1;
1917 }
1918 else
1919 {
1920 /*Indicates that the neighbour belongs to same/dependent slice*/
1921 au4_ilf_across_tile_slice_enable[i] = 1;
1922 }
1923 }
1924 /*Reset indices*/
1925 for(i = 0; i < 8; i++)
1926 {
1927 au4_idx_t[i] = 0;
1928 }
1929 }
1930 if(ps_pps->i1_tiles_enabled_flag)
1931 {
1932 /* Calculate availability flags at slice boundary */
1933 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1934 {
1935 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1936 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1937 {
1938 /*Calculate neighbor ctb slice indices*/
1939 if(0 == ps_sao_ctxt->i4_ctb_x)
1940 {
1941 au4_idx_t[0] = -1;
1942 au4_idx_t[6] = -1;
1943 au4_idx_t[4] = -1;
1944 }
1945 else
1946 {
1947 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1948 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1949 }
1950 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1951 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1952 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1953 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954
1955 for(i = 0; i < 8; i++)
1956 {
1957 /*Sets the edges that lie on the tile boundary*/
1958 if(au4_idx_t[i] != idx_t)
1959 {
1960 au1_tile_slice_boundary[i] |= 1;
1961 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1962 }
1963 }
1964 }
1965 }
1966 }
1967 for(i = 0; i < 8; i++)
1968 {
1969 /*Sets the edges that lie on the slice/tile boundary*/
1970 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1971 {
1972 au1_avail_chroma[i] = 0;
1973 }
1974 }
1975
1976 }
1977 }
1978 if(0 == ps_sao_ctxt->i4_ctb_x)
1979 {
1980 au1_avail_chroma[0] = 0;
1981 au1_avail_chroma[4] = 0;
1982 au1_avail_chroma[6] = 0;
1983 }
1984
1985 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1986 {
1987 au1_avail_chroma[1] = 0;
1988 au1_avail_chroma[5] = 0;
1989 au1_avail_chroma[7] = 0;
1990 }
1991
1992 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1993 {
1994 au1_avail_chroma[2] = 0;
1995 au1_avail_chroma[4] = 0;
1996 au1_avail_chroma[5] = 0;
1997 }
1998
1999 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2000 {
2001 au1_avail_chroma[3] = 0;
2002 au1_avail_chroma[6] = 0;
2003 au1_avail_chroma[7] = 0;
2004 }
2005
2006 {
2007 au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2008 au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2009 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2010 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2011
2012 if(chroma_yuv420sp_vu)
2013 {
2014 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2015 src_strd,
2016 pu1_src_left_chroma,
2017 pu1_src_top_chroma,
2018 pu1_sao_src_chroma_top_left_ctb,
2019 au1_src_top_right,
2020 au1_sao_src_top_left_chroma_bot_left,
2021 au1_avail_chroma,
2022 ai1_offset_cr,
2023 ai1_offset_cb,
2024 sao_wd_chroma,
2025 sao_ht_chroma);
2026 }
2027 else
2028 {
2029 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2030 src_strd,
2031 pu1_src_left_chroma,
2032 pu1_src_top_chroma,
2033 pu1_sao_src_chroma_top_left_ctb,
2034 au1_src_top_right,
2035 au1_sao_src_top_left_chroma_bot_left,
2036 au1_avail_chroma,
2037 ai1_offset_cb,
2038 ai1_offset_cr,
2039 sao_wd_chroma,
2040 sao_ht_chroma);
2041 }
2042 }
2043
2044 }
2045 }
2046 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2047 {
2048 for(row = 0; row < sao_ht_chroma; row++)
2049 {
2050 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2051 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2052 }
2053 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2054 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2055
2056 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2057 }
2058 }
2059
2060 pu1_src_luma += sao_ht_luma * src_strd;
2061 pu1_src_chroma += sao_ht_chroma * src_strd;
2062 ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2063 }
2064
2065 /* Left CTB */
2066 if(ps_sao_ctxt->i4_ctb_x > 0)
2067 {
2068 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2069 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2070 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2071 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2072
2073 WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2074 WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2075 WORD32 au4_idx_l[8], idx_l;
2076
2077 WORD32 remaining_rows;
2078 slice_header_t *ps_slice_hdr_left;
2079 {
2080 WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2081 (ps_sao_ctxt->i4_ctb_x - 1);
2082 ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2083 }
2084
2085 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2086 if(remaining_rows <= SAO_SHIFT_CTB)
2087 {
2088 sao_ht_luma += remaining_rows;
2089 }
2090 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2091 if(remaining_rows <= SAO_SHIFT_CTB)
2092 {
2093 sao_ht_chroma += remaining_rows;
2094 }
2095
2096 pu1_src_luma -= sao_wd_luma;
2097 pu1_src_chroma -= sao_wd_chroma;
2098 ps_sao -= 1;
2099 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2100 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2101 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2102 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103
2104
2105 if(0 != sao_ht_luma)
2106 {
2107 if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2108 {
2109 if(0 == ps_sao->b3_y_type_idx)
2110 {
2111 /* Update left, top and top-left */
2112 for(row = 0; row < sao_ht_luma; row++)
2113 {
2114 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2115 }
2116 /*Update in next location*/
2117 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2118
2119 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2120
2121 }
2122
2123 else if(1 == ps_sao->b3_y_type_idx)
2124 {
2125 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2126 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2127 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2128 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2129
2130 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2131 src_strd,
2132 pu1_src_left_luma,
2133 pu1_src_top_luma,
2134 pu1_sao_src_top_left_luma_curr_ctb,
2135 ps_sao->b5_y_band_pos,
2136 ai1_offset_y,
2137 sao_wd_luma,
2138 sao_ht_luma
2139 );
2140 }
2141
2142 else // if(2 <= ps_sao->b3_y_type_idx)
2143 {
2144 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2145 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2146 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2147 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2148
2149 for(i = 0; i < 8; i++)
2150 {
2151 au1_avail_luma[i] = 255;
2152 au1_tile_slice_boundary[i] = 0;
2153 au4_idx_l[i] = 0;
2154 au4_ilf_across_tile_slice_enable[i] = 1;
2155 }
2156 /******************************************************************
2157 * Derive the Top-left CTB's neighbour pixel's slice indices.
2158 *
2159 *
2160 * ____________
2161 * | | |
2162 * | L_T| |
2163 * |____|_______|____
2164 * | | | |
2165 * L_L | L | L_R | |
2166 * |____|_______| |
2167 * | |
2168 * L_D | |
2169 * |____________|
2170 *
2171 *****************************************************************/
2172
2173 /*In case of slices or tiles*/
2174 {
2175 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2176 {
2177 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2178 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2179
2180 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2181 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2182
2183 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2184 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2185
2186 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2187 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2188
2189 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2190 ctby_l = ps_sao_ctxt->i4_ctb_y;
2191
2192 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2193 {
2194 if(0 == ps_sao_ctxt->i4_ctb_y)
2195 {
2196 au4_idx_l[2] = -1;
2197 au4_idx_l[4] = -1;
2198 au4_idx_l[5] = -1;
2199 }
2200 else
2201 {
2202 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2203 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2204 }
2205 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2206 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2207 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2208 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2209
2210 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2211 if(0 == ps_sao_ctxt->i4_ctb_y)
2212 {
2213 au4_ilf_across_tile_slice_enable[2] = 0;
2214 au4_ilf_across_tile_slice_enable[4] = 0;
2215 au4_ilf_across_tile_slice_enable[5] = 0;
2216 }
2217 else
2218 {
2219 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2220 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2221
2222 }
2223 //TODO: ILF flag checks for [0] and [6] is missing.
2224 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2225 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2226 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2227
2228 if(idx_l < au4_idx_l[5])
2229 {
2230 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2231 }
2232
2233 /*
2234 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2235 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2236 */
2237 for(i = 0; i < 8; i++)
2238 {
2239 /*Sets the edges that lie on the slice/tile boundary*/
2240 if(au4_idx_l[i] != idx_l)
2241 {
2242 au1_tile_slice_boundary[i] = 1;
2243 }
2244 else
2245 {
2246 au4_ilf_across_tile_slice_enable[i] = 1;
2247 }
2248 }
2249 /*Reset indices*/
2250 for(i = 0; i < 8; i++)
2251 {
2252 au4_idx_l[i] = 0;
2253 }
2254 }
2255
2256 if(ps_pps->i1_tiles_enabled_flag)
2257 {
2258 /* Calculate availability flags at slice boundary */
2259 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2260 {
2261 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2262 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2263 {
2264 if(0 == ps_sao_ctxt->i4_ctb_y)
2265 {
2266 au4_idx_l[2] = -1;
2267 au4_idx_l[4] = -1;
2268 au4_idx_l[5] = -1;
2269 }
2270 else
2271 {
2272 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2273 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2274 }
2275
2276 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2277 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2278 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2279 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2280
2281 for(i = 0; i < 8; i++)
2282 {
2283 /*Sets the edges that lie on the slice/tile boundary*/
2284 if(au4_idx_l[i] != idx_l)
2285 {
2286 au1_tile_slice_boundary[i] |= 1;
2287 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2288 }
2289 }
2290 }
2291 }
2292 }
2293
2294 for(i = 0; i < 8; i++)
2295 {
2296 /*Sets the edges that lie on the slice/tile boundary*/
2297 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2298 {
2299 au1_avail_luma[i] = 0;
2300 }
2301 }
2302 }
2303 }
2304 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2305 {
2306 au1_avail_luma[0] = 0;
2307 au1_avail_luma[4] = 0;
2308 au1_avail_luma[6] = 0;
2309 }
2310 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2311 {
2312 au1_avail_luma[1] = 0;
2313 au1_avail_luma[5] = 0;
2314 au1_avail_luma[7] = 0;
2315 }
2316
2317 if(0 == ps_sao_ctxt->i4_ctb_y)
2318 {
2319 au1_avail_luma[2] = 0;
2320 au1_avail_luma[4] = 0;
2321 au1_avail_luma[5] = 0;
2322 }
2323
2324 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2325 {
2326 au1_avail_luma[3] = 0;
2327 au1_avail_luma[6] = 0;
2328 au1_avail_luma[7] = 0;
2329 }
2330
2331 {
2332 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2333 u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2334 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2335 src_strd,
2336 pu1_src_left_luma,
2337 pu1_src_top_luma,
2338 pu1_sao_src_top_left_luma_curr_ctb,
2339 au1_src_top_right,
2340 &u1_sao_src_top_left_luma_bot_left,
2341 au1_avail_luma,
2342 ai1_offset_y,
2343 sao_wd_luma,
2344 sao_ht_luma);
2345 }
2346
2347 }
2348 }
2349 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2350 {
2351 /* Update left, top and top-left */
2352 for(row = 0; row < sao_ht_luma; row++)
2353 {
2354 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2355 }
2356 /*Update in next location*/
2357 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2358
2359 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2360 }
2361 }
2362
2363 if(0 != sao_ht_chroma)
2364 {
2365 if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2366 {
2367 if(0 == ps_sao->b3_cb_type_idx)
2368 {
2369 for(row = 0; row < sao_ht_chroma; row++)
2370 {
2371 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2372 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2373 }
2374 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2375 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2376
2377 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2378 }
2379
2380 else if(1 == ps_sao->b3_cb_type_idx)
2381 {
2382 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2383 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2384 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2385 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2386
2387 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2388 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2389 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2390 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2391
2392 if(chroma_yuv420sp_vu)
2393 {
2394 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2395 src_strd,
2396 pu1_src_left_chroma,
2397 pu1_src_top_chroma,
2398 pu1_sao_src_top_left_chroma_curr_ctb,
2399 ps_sao->b5_cr_band_pos,
2400 ps_sao->b5_cb_band_pos,
2401 ai1_offset_cr,
2402 ai1_offset_cb,
2403 sao_wd_chroma,
2404 sao_ht_chroma
2405 );
2406 }
2407 else
2408 {
2409 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2410 src_strd,
2411 pu1_src_left_chroma,
2412 pu1_src_top_chroma,
2413 pu1_sao_src_top_left_chroma_curr_ctb,
2414 ps_sao->b5_cb_band_pos,
2415 ps_sao->b5_cr_band_pos,
2416 ai1_offset_cb,
2417 ai1_offset_cr,
2418 sao_wd_chroma,
2419 sao_ht_chroma
2420 );
2421 }
2422 }
2423
2424 else // if(2 <= ps_sao->b3_cb_type_idx)
2425 {
2426 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2427 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2428 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2429 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2430
2431 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2432 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2433 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2434 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2435
2436 for(i = 0; i < 8; i++)
2437 {
2438 au1_avail_chroma[i] = 255;
2439 au1_tile_slice_boundary[i] = 0;
2440 au4_idx_l[i] = 0;
2441 au4_ilf_across_tile_slice_enable[i] = 1;
2442 }
2443 /*In case of slices*/
2444 {
2445 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2446 {
2447 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2448 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2449
2450 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2451 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2452
2453 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2454 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2455
2456 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2457 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2458
2459 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2460 ctby_l = ps_sao_ctxt->i4_ctb_y;
2461
2462 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2463 {
2464 if(0 == ps_sao_ctxt->i4_ctb_y)
2465 {
2466 au4_idx_l[2] = -1;
2467 au4_idx_l[4] = -1;
2468 au4_idx_l[5] = -1;
2469 }
2470 else
2471 {
2472 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2473 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2474 }
2475 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2476 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2477 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2478 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2479
2480 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2481 if(0 == ps_sao_ctxt->i4_ctb_y)
2482 {
2483 au4_ilf_across_tile_slice_enable[2] = 0;
2484 au4_ilf_across_tile_slice_enable[4] = 0;
2485 au4_ilf_across_tile_slice_enable[5] = 0;
2486 }
2487 else
2488 {
2489 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2490 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2491 }
2492
2493 if(au4_idx_l[5] > idx_l)
2494 {
2495 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2496 }
2497
2498 // au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2499 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2500 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2501 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2502 /*
2503 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2504 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2505 */
2506 for(i = 0; i < 8; i++)
2507 {
2508 /*Sets the edges that lie on the slice/tile boundary*/
2509 if(au4_idx_l[i] != idx_l)
2510 {
2511 au1_tile_slice_boundary[i] = 1;
2512 }
2513 else
2514 {
2515 au4_ilf_across_tile_slice_enable[i] = 1;
2516 }
2517 }
2518 /*Reset indices*/
2519 for(i = 0; i < 8; i++)
2520 {
2521 au4_idx_l[i] = 0;
2522 }
2523 }
2524 if(ps_pps->i1_tiles_enabled_flag)
2525 {
2526 /* Calculate availability flags at slice boundary */
2527 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2528 {
2529 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2530 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2531 {
2532 if(0 == ps_sao_ctxt->i4_ctb_y)
2533 {
2534 au4_idx_l[2] = -1;
2535 au4_idx_l[4] = -1;
2536 au4_idx_l[5] = -1;
2537 }
2538 else
2539 {
2540 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2541 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2542 }
2543
2544 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2545 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2546 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2547 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2548
2549 for(i = 0; i < 8; i++)
2550 {
2551 /*Sets the edges that lie on the slice/tile boundary*/
2552 if(au4_idx_l[i] != idx_l)
2553 {
2554 au1_tile_slice_boundary[i] |= 1;
2555 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2556 }
2557 }
2558 }
2559 }
2560 }
2561 for(i = 0; i < 8; i++)
2562 {
2563 /*Sets the edges that lie on the slice/tile boundary*/
2564 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2565 {
2566 au1_avail_chroma[i] = 0;
2567 }
2568 }
2569 }
2570 }
2571 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2572 {
2573 au1_avail_chroma[0] = 0;
2574 au1_avail_chroma[4] = 0;
2575 au1_avail_chroma[6] = 0;
2576 }
2577
2578 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2579 {
2580 au1_avail_chroma[1] = 0;
2581 au1_avail_chroma[5] = 0;
2582 au1_avail_chroma[7] = 0;
2583 }
2584
2585 if(0 == ps_sao_ctxt->i4_ctb_y)
2586 {
2587 au1_avail_chroma[2] = 0;
2588 au1_avail_chroma[4] = 0;
2589 au1_avail_chroma[5] = 0;
2590 }
2591
2592 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
2593 {
2594 au1_avail_chroma[3] = 0;
2595 au1_avail_chroma[6] = 0;
2596 au1_avail_chroma[7] = 0;
2597 }
2598
2599 {
2600 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2601 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2602 au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2603 au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2604 //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2605 //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2606 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2607 {
2608 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2609 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2610 }
2611
2612
2613 if(chroma_yuv420sp_vu)
2614 {
2615 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2616 src_strd,
2617 pu1_src_left_chroma,
2618 pu1_src_top_chroma,
2619 pu1_sao_src_top_left_chroma_curr_ctb,
2620 au1_src_top_right,
2621 au1_src_bot_left,
2622 au1_avail_chroma,
2623 ai1_offset_cr,
2624 ai1_offset_cb,
2625 sao_wd_chroma,
2626 sao_ht_chroma);
2627 }
2628 else
2629 {
2630 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2631 src_strd,
2632 pu1_src_left_chroma,
2633 pu1_src_top_chroma,
2634 pu1_sao_src_top_left_chroma_curr_ctb,
2635 au1_src_top_right,
2636 au1_src_bot_left,
2637 au1_avail_chroma,
2638 ai1_offset_cb,
2639 ai1_offset_cr,
2640 sao_wd_chroma,
2641 sao_ht_chroma);
2642 }
2643 }
2644
2645 }
2646 }
2647 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2648 {
2649 for(row = 0; row < sao_ht_chroma; row++)
2650 {
2651 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2652 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2653 }
2654 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2655 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2656
2657 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2658 }
2659
2660 }
2661 pu1_src_luma += sao_wd_luma;
2662 pu1_src_chroma += sao_wd_chroma;
2663 ps_sao += 1;
2664 }
2665
2666
2667 /* Current CTB */
2668 {
2669 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2670 WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2671 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2672 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2673 WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2674 WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2675 WORD32 au4_idx_c[8], idx_c;
2676
2677 WORD32 remaining_rows;
2678 WORD32 remaining_cols;
2679
2680 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2681 if(remaining_cols <= SAO_SHIFT_CTB)
2682 {
2683 sao_wd_luma += remaining_cols;
2684 }
2685 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2686 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2687 {
2688 sao_wd_chroma += remaining_cols;
2689 }
2690
2691 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2692 if(remaining_rows <= SAO_SHIFT_CTB)
2693 {
2694 sao_ht_luma += remaining_rows;
2695 }
2696 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2697 if(remaining_rows <= SAO_SHIFT_CTB)
2698 {
2699 sao_ht_chroma += remaining_rows;
2700 }
2701
2702 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2703 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2705 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706
2707 if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2708 {
2709 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2710 {
2711 if(0 == ps_sao->b3_y_type_idx)
2712 {
2713 /* Update left, top and top-left */
2714 for(row = 0; row < sao_ht_luma; row++)
2715 {
2716 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2717 }
2718 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2719
2720 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2721
2722 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2723
2724 }
2725
2726 else if(1 == ps_sao->b3_y_type_idx)
2727 {
2728 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2729 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2730 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2731 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2732
2733 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2734 src_strd,
2735 pu1_src_left_luma,
2736 pu1_src_top_luma,
2737 pu1_sao_src_top_left_luma_curr_ctb,
2738 ps_sao->b5_y_band_pos,
2739 ai1_offset_y,
2740 sao_wd_luma,
2741 sao_ht_luma
2742 );
2743 }
2744
2745 else // if(2 <= ps_sao->b3_y_type_idx)
2746 {
2747 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2748 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2749 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2750 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2751
2752 for(i = 0; i < 8; i++)
2753 {
2754 au1_avail_luma[i] = 255;
2755 au1_tile_slice_boundary[i] = 0;
2756 au4_idx_c[i] = 0;
2757 au4_ilf_across_tile_slice_enable[i] = 1;
2758 }
2759 /******************************************************************
2760 * Derive the Top-left CTB's neighbour pixel's slice indices.
2761 *
2762 *
2763 * ____________
2764 * | | |
2765 * | | C_T |
2766 * |____|_______|____
2767 * | | | |
2768 * | C_L| C | C_R|
2769 * |____|_______| |
2770 * | C_D |
2771 * | |
2772 * |____________|
2773 *
2774 *****************************************************************/
2775
2776 /*In case of slices*/
2777 {
2778 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2779 {
2780 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2781 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2782
2783 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2784 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2785
2786 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2787 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2788
2789 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
2790 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
2791
2792 ctbx_c = ps_sao_ctxt->i4_ctb_x;
2793 ctby_c = ps_sao_ctxt->i4_ctb_y;
2794
2795 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2796 {
2797 if(0 == ps_sao_ctxt->i4_ctb_x)
2798 {
2799 au4_idx_c[6] = -1;
2800 au4_idx_c[0] = -1;
2801 au4_idx_c[4] = -1;
2802 }
2803 else
2804 {
2805 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2806 }
2807
2808 if(0 == ps_sao_ctxt->i4_ctb_y)
2809 {
2810 au4_idx_c[2] = -1;
2811 au4_idx_c[5] = -1;
2812 au4_idx_c[4] = -1;
2813 }
2814 else
2815 {
2816 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2817 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2818 }
2819 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2820 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2821 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2822
2823 if(0 == ps_sao_ctxt->i4_ctb_x)
2824 {
2825 au4_ilf_across_tile_slice_enable[6] = 0;
2826 au4_ilf_across_tile_slice_enable[0] = 0;
2827 au4_ilf_across_tile_slice_enable[4] = 0;
2828 }
2829 else
2830 {
2831 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2832 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2833 }
2834 if(0 == ps_sao_ctxt->i4_ctb_y)
2835 {
2836 au4_ilf_across_tile_slice_enable[2] = 0;
2837 au4_ilf_across_tile_slice_enable[4] = 0;
2838 au4_ilf_across_tile_slice_enable[5] = 0;
2839 }
2840 else
2841 {
2842 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2843 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2844 }
2845 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2846 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2847 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2848
2849 if(au4_idx_c[6] < idx_c)
2850 {
2851 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2852 }
2853
2854 /*
2855 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2856 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2857 */
2858 for(i = 0; i < 8; i++)
2859 {
2860 /*Sets the edges that lie on the slice/tile boundary*/
2861 if(au4_idx_c[i] != idx_c)
2862 {
2863 au1_tile_slice_boundary[i] = 1;
2864 }
2865 else
2866 {
2867 au4_ilf_across_tile_slice_enable[i] = 1;
2868 }
2869 }
2870 /*Reset indices*/
2871 for(i = 0; i < 8; i++)
2872 {
2873 au4_idx_c[i] = 0;
2874 }
2875 }
2876
2877 if(ps_pps->i1_tiles_enabled_flag)
2878 {
2879 /* Calculate availability flags at slice boundary */
2880 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2881 {
2882 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2883 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2884 {
2885 if(0 == ps_sao_ctxt->i4_ctb_x)
2886 {
2887 au4_idx_c[6] = -1;
2888 au4_idx_c[0] = -1;
2889 au4_idx_c[4] = -1;
2890 }
2891 else
2892 {
2893 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2894 }
2895
2896 if(0 == ps_sao_ctxt->i4_ctb_y)
2897 {
2898 au4_idx_c[2] = -1;
2899 au4_idx_c[5] = -1;
2900 au4_idx_c[4] = -1;
2901 }
2902 else
2903 {
2904 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2905 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2906 }
2907 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2908 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2909 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2910
2911 for(i = 0; i < 8; i++)
2912 {
2913 /*Sets the edges that lie on the slice/tile boundary*/
2914 if(au4_idx_c[i] != idx_c)
2915 {
2916 au1_tile_slice_boundary[i] |= 1;
2917 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2918 }
2919 }
2920 }
2921 }
2922 }
2923
2924 for(i = 0; i < 8; i++)
2925 {
2926 /*Sets the edges that lie on the slice/tile boundary*/
2927 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2928 {
2929 au1_avail_luma[i] = 0;
2930 }
2931 }
2932
2933 }
2934 }
2935 if(0 == ps_sao_ctxt->i4_ctb_x)
2936 {
2937 au1_avail_luma[0] = 0;
2938 au1_avail_luma[4] = 0;
2939 au1_avail_luma[6] = 0;
2940 }
2941
2942 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2943 {
2944 au1_avail_luma[1] = 0;
2945 au1_avail_luma[5] = 0;
2946 au1_avail_luma[7] = 0;
2947 }
2948
2949 if(0 == ps_sao_ctxt->i4_ctb_y)
2950 {
2951 au1_avail_luma[2] = 0;
2952 au1_avail_luma[4] = 0;
2953 au1_avail_luma[5] = 0;
2954 }
2955
2956 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2957 {
2958 au1_avail_luma[3] = 0;
2959 au1_avail_luma[6] = 0;
2960 au1_avail_luma[7] = 0;
2961 }
2962
2963 {
2964 au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2965 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2966
2967 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2968 src_strd,
2969 pu1_src_left_luma,
2970 pu1_src_top_luma,
2971 pu1_sao_src_top_left_luma_curr_ctb,
2972 au1_src_top_right,
2973 &u1_sao_src_top_left_luma_bot_left,
2974 au1_avail_luma,
2975 ai1_offset_y,
2976 sao_wd_luma,
2977 sao_ht_luma);
2978 }
2979 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2980 pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2981 }
2982 }
2983 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2984 {
2985 /* Update left, top and top-left */
2986 for(row = 0; row < sao_ht_luma; row++)
2987 {
2988 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2989 }
2990 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2991
2992 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2993
2994 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2995 }
2996 }
2997
2998 if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2999 {
3000 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3001 {
3002 if(0 == ps_sao->b3_cb_type_idx)
3003 {
3004 for(row = 0; row < sao_ht_chroma; row++)
3005 {
3006 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3007 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3008 }
3009 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3010 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3011
3012 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3013
3014 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3015 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3016 }
3017
3018 else if(1 == ps_sao->b3_cb_type_idx)
3019 {
3020 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3021 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3022 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3023 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3024
3025 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3026 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3027 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3028 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3029
3030 if(chroma_yuv420sp_vu)
3031 {
3032 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3033 src_strd,
3034 pu1_src_left_chroma,
3035 pu1_src_top_chroma,
3036 pu1_sao_src_top_left_chroma_curr_ctb,
3037 ps_sao->b5_cr_band_pos,
3038 ps_sao->b5_cb_band_pos,
3039 ai1_offset_cr,
3040 ai1_offset_cb,
3041 sao_wd_chroma,
3042 sao_ht_chroma
3043 );
3044 }
3045 else
3046 {
3047 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3048 src_strd,
3049 pu1_src_left_chroma,
3050 pu1_src_top_chroma,
3051 pu1_sao_src_top_left_chroma_curr_ctb,
3052 ps_sao->b5_cb_band_pos,
3053 ps_sao->b5_cr_band_pos,
3054 ai1_offset_cb,
3055 ai1_offset_cr,
3056 sao_wd_chroma,
3057 sao_ht_chroma
3058 );
3059 }
3060 }
3061
3062 else // if(2 <= ps_sao->b3_cb_type_idx)
3063 {
3064 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3065 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3066 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3067 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3068
3069 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3070 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3071 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3072 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3073
3074 for(i = 0; i < 8; i++)
3075 {
3076 au1_avail_chroma[i] = 255;
3077 au1_tile_slice_boundary[i] = 0;
3078 au4_idx_c[i] = 0;
3079 au4_ilf_across_tile_slice_enable[i] = 1;
3080 }
3081 {
3082 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3083 {
3084 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3085 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3086
3087 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3088 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3089
3090 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3091 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3092
3093 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
3094 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
3095
3096 ctbx_c = ps_sao_ctxt->i4_ctb_x;
3097 ctby_c = ps_sao_ctxt->i4_ctb_y;
3098
3099 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3100 {
3101 if(0 == ps_sao_ctxt->i4_ctb_x)
3102 {
3103 au4_idx_c[0] = -1;
3104 au4_idx_c[4] = -1;
3105 au4_idx_c[6] = -1;
3106 }
3107 else
3108 {
3109 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3110 }
3111
3112 if(0 == ps_sao_ctxt->i4_ctb_y)
3113 {
3114 au4_idx_c[2] = -1;
3115 au4_idx_c[4] = -1;
3116 au4_idx_c[5] = -1;
3117 }
3118 else
3119 {
3120 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3121 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3122 }
3123 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3124 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3125 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3126
3127 if(0 == ps_sao_ctxt->i4_ctb_x)
3128 {
3129 au4_ilf_across_tile_slice_enable[0] = 0;
3130 au4_ilf_across_tile_slice_enable[4] = 0;
3131 au4_ilf_across_tile_slice_enable[6] = 0;
3132 }
3133 else
3134 {
3135 au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3136 au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3137 }
3138
3139 if(0 == ps_sao_ctxt->i4_ctb_y)
3140 {
3141 au4_ilf_across_tile_slice_enable[2] = 0;
3142 au4_ilf_across_tile_slice_enable[4] = 0;
3143 au4_ilf_across_tile_slice_enable[5] = 0;
3144 }
3145 else
3146 {
3147 au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3148 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3149 }
3150
3151 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3152 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3153 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3154
3155 if(idx_c > au4_idx_c[6])
3156 {
3157 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3158 }
3159
3160 /*
3161 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3162 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3163 */
3164 for(i = 0; i < 8; i++)
3165 {
3166 /*Sets the edges that lie on the slice/tile boundary*/
3167 if(au4_idx_c[i] != idx_c)
3168 {
3169 au1_tile_slice_boundary[i] = 1;
3170 }
3171 else
3172 {
3173 au4_ilf_across_tile_slice_enable[i] = 1;
3174 }
3175 }
3176 /*Reset indices*/
3177 for(i = 0; i < 8; i++)
3178 {
3179 au4_idx_c[i] = 0;
3180 }
3181 }
3182
3183 if(ps_pps->i1_tiles_enabled_flag)
3184 {
3185 /* Calculate availability flags at slice boundary */
3186 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3187 {
3188 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3189 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3190 {
3191 if(0 == ps_sao_ctxt->i4_ctb_x)
3192 {
3193 au4_idx_c[6] = -1;
3194 au4_idx_c[0] = -1;
3195 au4_idx_c[4] = -1;
3196 }
3197 else
3198 {
3199 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3200 }
3201
3202 if(0 == ps_sao_ctxt->i4_ctb_y)
3203 {
3204 au4_idx_c[2] = -1;
3205 au4_idx_c[5] = -1;
3206 au4_idx_c[4] = -1;
3207 }
3208 else
3209 {
3210 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3211 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3212 }
3213 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3214 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3215 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3216
3217 for(i = 0; i < 8; i++)
3218 {
3219 /*Sets the edges that lie on the slice/tile boundary*/
3220 if(au4_idx_c[i] != idx_c)
3221 {
3222 au1_tile_slice_boundary[i] |= 1;
3223 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3224 }
3225 }
3226 }
3227 }
3228 }
3229
3230 for(i = 0; i < 8; i++)
3231 {
3232 /*Sets the edges that lie on the slice/tile boundary*/
3233 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3234 {
3235 au1_avail_chroma[i] = 0;
3236 }
3237 }
3238 }
3239 }
3240
3241 if(0 == ps_sao_ctxt->i4_ctb_x)
3242 {
3243 au1_avail_chroma[0] = 0;
3244 au1_avail_chroma[4] = 0;
3245 au1_avail_chroma[6] = 0;
3246 }
3247
3248 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3249 {
3250 au1_avail_chroma[1] = 0;
3251 au1_avail_chroma[5] = 0;
3252 au1_avail_chroma[7] = 0;
3253 }
3254
3255 if(0 == ps_sao_ctxt->i4_ctb_y)
3256 {
3257 au1_avail_chroma[2] = 0;
3258 au1_avail_chroma[4] = 0;
3259 au1_avail_chroma[5] = 0;
3260 }
3261
3262 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
3263 {
3264 au1_avail_chroma[3] = 0;
3265 au1_avail_chroma[6] = 0;
3266 au1_avail_chroma[7] = 0;
3267 }
3268
3269 {
3270 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3271 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3272
3273 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3274 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3275
3276 if(chroma_yuv420sp_vu)
3277 {
3278 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3279 src_strd,
3280 pu1_src_left_chroma,
3281 pu1_src_top_chroma,
3282 pu1_sao_src_top_left_chroma_curr_ctb,
3283 au1_src_top_right,
3284 au1_sao_src_top_left_chroma_bot_left,
3285 au1_avail_chroma,
3286 ai1_offset_cr,
3287 ai1_offset_cb,
3288 sao_wd_chroma,
3289 sao_ht_chroma);
3290 }
3291 else
3292 {
3293 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3294 src_strd,
3295 pu1_src_left_chroma,
3296 pu1_src_top_chroma,
3297 pu1_sao_src_top_left_chroma_curr_ctb,
3298 au1_src_top_right,
3299 au1_sao_src_top_left_chroma_bot_left,
3300 au1_avail_chroma,
3301 ai1_offset_cb,
3302 ai1_offset_cr,
3303 sao_wd_chroma,
3304 sao_ht_chroma);
3305 }
3306 }
3307
3308 }
3309 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3310 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3311
3312 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3313 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3314 }
3315 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3316 {
3317 for(row = 0; row < sao_ht_chroma; row++)
3318 {
3319 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3320 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3321 }
3322 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3323 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3324
3325 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3326
3327 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3328 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3329 }
3330
3331 }
3332 }
3333
3334
3335
3336
3337 /* If no loop filter is enabled copy the backed up values */
3338 {
3339 /* Luma */
3340 if(no_loop_filter_enabled_luma)
3341 {
3342 UWORD32 u4_no_loop_filter_flag;
3343 WORD32 loop_filter_bit_pos;
3344 WORD32 log2_min_cu = 3;
3345 WORD32 min_cu = (1 << log2_min_cu);
3346 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3347 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3348 WORD32 sao_blk_wd = ctb_size;
3349 WORD32 remaining_rows;
3350 WORD32 remaining_cols;
3351
3352 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3353 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354 if(remaining_rows <= SAO_SHIFT_CTB)
3355 sao_blk_ht += remaining_rows;
3356 if(remaining_cols <= SAO_SHIFT_CTB)
3357 sao_blk_wd += remaining_cols;
3358
3359 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3360 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3361
3362 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3363
3364 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3365 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3366 if(ps_sao_ctxt->i4_ctb_x > 0)
3367 loop_filter_bit_pos -= 1;
3368
3369 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3370 (loop_filter_bit_pos >> 3);
3371
3372 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3373 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3374 {
3375 WORD32 tmp_wd = sao_blk_wd;
3376
3377 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3378 (loop_filter_bit_pos & 7);
3379 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3380
3381 if(u4_no_loop_filter_flag)
3382 {
3383 while(tmp_wd > 0)
3384 {
3385 if(CTZ(u4_no_loop_filter_flag))
3386 {
3387 pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3388 pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3390 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3391 }
3392 else
3393 {
3394 for(row = 0; row < min_cu; row++)
3395 {
3396 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3397 {
3398 pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3399 }
3400 }
3401 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3402 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3404 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3405 }
3406 }
3407
3408 pu1_src_tmp_luma -= sao_blk_wd;
3409 pu1_src_backup_luma -= sao_blk_wd;
3410 }
3411
3412 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3413 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3414 }
3415 }
3416
3417 /* Chroma */
3418 if(no_loop_filter_enabled_chroma)
3419 {
3420 UWORD32 u4_no_loop_filter_flag;
3421 WORD32 loop_filter_bit_pos;
3422 WORD32 log2_min_cu = 3;
3423 WORD32 min_cu = (1 << log2_min_cu);
3424 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3425 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3426 WORD32 sao_blk_wd = ctb_size;
3427 WORD32 remaining_rows;
3428 WORD32 remaining_cols;
3429
3430 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3431 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3433 sao_blk_ht += remaining_rows;
3434 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3435 sao_blk_wd += remaining_cols;
3436
3437 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3438 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3439
3440 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3441
3442 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3443 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3444 if(ps_sao_ctxt->i4_ctb_x > 0)
3445 loop_filter_bit_pos -= 2;
3446
3447 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3448 (loop_filter_bit_pos >> 3);
3449
3450 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3451 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3452 {
3453 WORD32 tmp_wd = sao_blk_wd;
3454
3455 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3456 (loop_filter_bit_pos & 7);
3457 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3458
3459 if(u4_no_loop_filter_flag)
3460 {
3461 while(tmp_wd > 0)
3462 {
3463 if(CTZ(u4_no_loop_filter_flag))
3464 {
3465 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3466 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3468 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3469 }
3470 else
3471 {
3472 for(row = 0; row < min_cu / 2; row++)
3473 {
3474 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3475 {
3476 pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3477 }
3478 }
3479
3480 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3481 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3483 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3484 }
3485 }
3486
3487 pu1_src_tmp_chroma -= sao_blk_wd;
3488 pu1_src_backup_chroma -= sao_blk_wd;
3489 }
3490
3491 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3492 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3493 }
3494 }
3495 }
3496
3497 }
3498
3499