1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_sao.c
22 *
23 * @brief
24 * Contains function definitions for sample adaptive offset process
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 * None
33 *
34 *******************************************************************************
35 */
36
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77
78 #define SAO_SHIFT_CTB 8
79
80 /**
81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82 */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86 UWORD8 *pu1_src_luma;
87 UWORD8 *pu1_src_chroma;
88 WORD32 src_strd;
89 WORD32 ctb_size;
90 WORD32 log2_ctb_size;
91 sps_t *ps_sps;
92 sao_t *ps_sao;
93 WORD32 row, col;
94 UWORD8 au1_avail_luma[8];
95 UWORD8 au1_avail_chroma[8];
96 WORD32 i;
97 UWORD8 *pu1_src_top_luma;
98 UWORD8 *pu1_src_top_chroma;
99 UWORD8 *pu1_src_left_luma;
100 UWORD8 *pu1_src_left_chroma;
101 UWORD8 au1_src_top_right[2];
102 UWORD8 au1_src_bot_left[2];
103 UWORD8 *pu1_no_loop_filter_flag;
104 WORD32 loop_filter_strd;
105
106 WORD8 ai1_offset_y[5];
107 WORD8 ai1_offset_cb[5];
108 WORD8 ai1_offset_cr[5];
109
110 PROFILE_DISABLE_SAO();
111
112 ai1_offset_y[0] = 0;
113 ai1_offset_cb[0] = 0;
114 ai1_offset_cr[0] = 0;
115
116 ps_sps = ps_sao_ctxt->ps_sps;
117 log2_ctb_size = ps_sps->i1_log2_ctb_size;
118 ctb_size = (1 << log2_ctb_size);
119 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
120 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
121 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
122
123 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
124 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
125
126 /* Current CTB */
127 {
128 WORD32 sao_wd_luma;
129 WORD32 sao_wd_chroma;
130 WORD32 sao_ht_luma;
131 WORD32 sao_ht_chroma;
132
133 WORD32 remaining_rows;
134 WORD32 remaining_cols;
135
136 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
137 sao_wd_luma = MIN(ctb_size, remaining_cols);
138 sao_wd_chroma = MIN(ctb_size, remaining_cols);
139
140 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
141 sao_ht_luma = MIN(ctb_size, remaining_rows);
142 sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
143
144 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
145 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
146 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
147 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
148
149 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
150 ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
151 ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
152
153 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
154 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
155 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
156 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
157
158 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
159 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
160 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
161 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
162
163 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
164 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
165 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
166 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
167
168 for(i = 0; i < 8; i++)
169 {
170 au1_avail_luma[i] = 255;
171 au1_avail_chroma[i] = 255;
172 }
173
174
175 if(0 == ps_sao_ctxt->i4_ctb_x)
176 {
177 au1_avail_luma[0] = 0;
178 au1_avail_luma[4] = 0;
179 au1_avail_luma[6] = 0;
180
181 au1_avail_chroma[0] = 0;
182 au1_avail_chroma[4] = 0;
183 au1_avail_chroma[6] = 0;
184 }
185
186 if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
187 {
188 au1_avail_luma[1] = 0;
189 au1_avail_luma[5] = 0;
190 au1_avail_luma[7] = 0;
191
192 au1_avail_chroma[1] = 0;
193 au1_avail_chroma[5] = 0;
194 au1_avail_chroma[7] = 0;
195 }
196
197 if(0 == ps_sao_ctxt->i4_ctb_y)
198 {
199 au1_avail_luma[2] = 0;
200 au1_avail_luma[4] = 0;
201 au1_avail_luma[5] = 0;
202
203 au1_avail_chroma[2] = 0;
204 au1_avail_chroma[4] = 0;
205 au1_avail_chroma[5] = 0;
206 }
207
208 if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
209 {
210 au1_avail_luma[3] = 0;
211 au1_avail_luma[6] = 0;
212 au1_avail_luma[7] = 0;
213
214 au1_avail_chroma[3] = 0;
215 au1_avail_chroma[6] = 0;
216 au1_avail_chroma[7] = 0;
217 }
218
219
220 if(0 == ps_sao->b3_y_type_idx)
221 {
222 /* Update left, top and top-left */
223 for(row = 0; row < sao_ht_luma; row++)
224 {
225 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
226 }
227 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
228
229 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
230
231 }
232 else
233 {
234 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
235 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
236 WORD32 tmp_strd = MAX_CTB_SIZE + 2;
237 WORD32 no_loop_filter_enabled = 0;
238
239 /* Check the loop filter flags and copy the original values for back up */
240 {
241 UWORD32 u4_no_loop_filter_flag;
242 WORD32 min_cu = 8;
243 UWORD8 *pu1_src_tmp = pu1_src_luma;
244
245 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
246 {
247 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
248 ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
249 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
250
251 if(u4_no_loop_filter_flag)
252 {
253 WORD32 tmp_wd = sao_wd_luma;
254 no_loop_filter_enabled = 1;
255 while(tmp_wd > 0)
256 {
257 if(CTZ(u4_no_loop_filter_flag))
258 {
259 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
260 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
261 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
262 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
263 }
264 else
265 {
266 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
267 {
268 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
269 {
270 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
271 }
272 }
273
274 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
275 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
276 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
277 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
278 }
279 }
280
281 pu1_src_tmp -= sao_wd_luma;
282 }
283
284 pu1_src_tmp += min_cu * src_strd;
285 pu1_src_copy += min_cu * tmp_strd;
286 }
287 }
288
289 if(1 == ps_sao->b3_y_type_idx)
290 {
291 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
292 src_strd,
293 pu1_src_left_luma,
294 pu1_src_top_luma,
295 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
296 ps_sao->b5_y_band_pos,
297 ai1_offset_y,
298 sao_wd_luma,
299 sao_ht_luma);
300 }
301 else // if(2 <= ps_sao->b3_y_type_idx)
302 {
303 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
304 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
305 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
306 src_strd,
307 pu1_src_left_luma,
308 pu1_src_top_luma,
309 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
310 au1_src_top_right,
311 au1_src_bot_left,
312 au1_avail_luma,
313 ai1_offset_y,
314 sao_wd_luma,
315 sao_ht_luma);
316 }
317
318 /* Check the loop filter flags and copy the original values back if they are set */
319 if(no_loop_filter_enabled)
320 {
321 UWORD32 u4_no_loop_filter_flag;
322 WORD32 min_cu = 8;
323 UWORD8 *pu1_src_tmp = pu1_src_luma;
324
325 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
326 {
327 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
328 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
329
330 if(u4_no_loop_filter_flag)
331 {
332 WORD32 tmp_wd = sao_wd_luma;
333 while(tmp_wd > 0)
334 {
335 if(CTZ(u4_no_loop_filter_flag))
336 {
337 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
338 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
339 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
340 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
341 }
342 else
343 {
344 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
345 {
346 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
347 {
348 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
349 }
350 }
351
352 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
353 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
354 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
355 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
356 }
357 }
358
359 pu1_src_tmp -= sao_wd_luma;
360 }
361
362 pu1_src_tmp += min_cu * src_strd;
363 pu1_src_copy += min_cu * tmp_strd;
364 }
365 }
366
367 }
368
369 if(0 == ps_sao->b3_cb_type_idx)
370 {
371 for(row = 0; row < sao_ht_chroma; row++)
372 {
373 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
374 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
375 }
376 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
377 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
378
379 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
380 }
381 else
382 {
383 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
384 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
385 WORD32 tmp_strd = MAX_CTB_SIZE + 4;
386 WORD32 no_loop_filter_enabled = 0;
387
388 /* Check the loop filter flags and copy the original values for back up */
389 {
390 UWORD32 u4_no_loop_filter_flag;
391 WORD32 min_cu = 4;
392 UWORD8 *pu1_src_tmp = pu1_src_chroma;
393
394 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
395 {
396 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
397 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
398
399 if(u4_no_loop_filter_flag)
400 {
401 WORD32 tmp_wd = sao_wd_chroma;
402 no_loop_filter_enabled = 1;
403 while(tmp_wd > 0)
404 {
405 if(CTZ(u4_no_loop_filter_flag))
406 {
407 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
408 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
409 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
410 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
411 }
412 else
413 {
414 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
415 {
416 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
417 {
418 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
419 }
420 }
421
422 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
423 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
424 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
425 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
426 }
427 }
428
429 pu1_src_tmp -= sao_wd_chroma;
430 }
431
432 pu1_src_tmp += min_cu * src_strd;
433 pu1_src_copy += min_cu * tmp_strd;
434 }
435 }
436
437 if(1 == ps_sao->b3_cb_type_idx)
438 {
439 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
440 src_strd,
441 pu1_src_left_chroma,
442 pu1_src_top_chroma,
443 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
444 ps_sao->b5_cb_band_pos,
445 ps_sao->b5_cr_band_pos,
446 ai1_offset_cb,
447 ai1_offset_cr,
448 sao_wd_chroma,
449 sao_ht_chroma
450 );
451 }
452 else // if(2 <= ps_sao->b3_cb_type_idx)
453 {
454 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
455 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
456 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
457 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
458 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
459 src_strd,
460 pu1_src_left_chroma,
461 pu1_src_top_chroma,
462 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
463 au1_src_top_right,
464 au1_src_bot_left,
465 au1_avail_chroma,
466 ai1_offset_cb,
467 ai1_offset_cr,
468 sao_wd_chroma,
469 sao_ht_chroma);
470 }
471
472 /* Check the loop filter flags and copy the original values back if they are set */
473 if(no_loop_filter_enabled)
474 {
475 UWORD32 u4_no_loop_filter_flag;
476 WORD32 min_cu = 4;
477 UWORD8 *pu1_src_tmp = pu1_src_chroma;
478
479 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
480 {
481 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
482 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
483
484 if(u4_no_loop_filter_flag)
485 {
486 WORD32 tmp_wd = sao_wd_chroma;
487 while(tmp_wd > 0)
488 {
489 if(CTZ(u4_no_loop_filter_flag))
490 {
491 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
492 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
493 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
494 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
495 }
496 else
497 {
498 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
499 {
500 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
501 {
502 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
503 }
504 }
505
506 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
507 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
508 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
509 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
510 }
511 }
512
513 pu1_src_tmp -= sao_wd_chroma;
514 }
515
516 pu1_src_tmp += min_cu * src_strd;
517 pu1_src_copy += min_cu * tmp_strd;
518 }
519 }
520
521 }
522
523 }
524 }
525
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
527 {
528 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
529 UWORD8 *pu1_src_luma;
530 UWORD8 *pu1_src_chroma;
531 WORD32 src_strd;
532 WORD32 ctb_size;
533 WORD32 log2_ctb_size;
534 sps_t *ps_sps;
535 sao_t *ps_sao;
536 pps_t *ps_pps;
537 slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
538 tile_t *ps_tile;
539 UWORD16 *pu1_slice_idx;
540 UWORD16 *pu1_tile_idx;
541 WORD32 row, col;
542 UWORD8 au1_avail_luma[8];
543 UWORD8 au1_avail_chroma[8];
544 UWORD8 au1_tile_slice_boundary[8];
545 UWORD8 au4_ilf_across_tile_slice_enable[8];
546 WORD32 i;
547 UWORD8 *pu1_src_top_luma;
548 UWORD8 *pu1_src_top_chroma;
549 UWORD8 *pu1_src_left_luma;
550 UWORD8 *pu1_src_left_chroma;
551 UWORD8 au1_src_top_right[2];
552 UWORD8 au1_src_bot_left[2];
553 UWORD8 *pu1_no_loop_filter_flag;
554 UWORD8 *pu1_src_backup_luma;
555 UWORD8 *pu1_src_backup_chroma;
556 WORD32 backup_strd;
557 WORD32 loop_filter_strd;
558
559 WORD32 no_loop_filter_enabled_luma = 0;
560 WORD32 no_loop_filter_enabled_chroma = 0;
561 UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
562 UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
563 UWORD8 *pu1_sao_src_luma_top_left_ctb;
564 UWORD8 *pu1_sao_src_chroma_top_left_ctb;
565 UWORD8 *pu1_sao_src_top_left_luma_top_right;
566 UWORD8 *pu1_sao_src_top_left_chroma_top_right;
567 UWORD8 u1_sao_src_top_left_luma_bot_left;
568 UWORD8 *pu1_sao_src_top_left_luma_bot_left;
569 UWORD8 *au1_sao_src_top_left_chroma_bot_left;
570 UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
571
572 WORD8 ai1_offset_y[5];
573 WORD8 ai1_offset_cb[5];
574 WORD8 ai1_offset_cr[5];
575 WORD32 chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
576
577 PROFILE_DISABLE_SAO();
578
579 ai1_offset_y[0] = 0;
580 ai1_offset_cb[0] = 0;
581 ai1_offset_cr[0] = 0;
582
583 ps_sps = ps_sao_ctxt->ps_sps;
584 ps_pps = ps_sao_ctxt->ps_pps;
585 ps_tile = ps_sao_ctxt->ps_tile;
586
587 log2_ctb_size = ps_sps->i1_log2_ctb_size;
588 ctb_size = (1 << log2_ctb_size);
589 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
590 ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
591 ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
592
593 pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
594 pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
595 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
596 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
597
598 /*Stores the left value for each row ctbs- Needed for column tiles*/
599 pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
600 pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
601 pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
602 pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
603 u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
604 pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
605 au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
606 pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
607 pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
608 pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
609
610 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
611 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
612 backup_strd = 2 * MAX_CTB_SIZE;
613
614 DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
615
616 {
617 /* Check the loop filter flags and copy the original values for back up */
618 /* Luma */
619
620 /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
621 * can belong to different slice with their own sao_enable flag */
622 {
623 UWORD32 u4_no_loop_filter_flag;
624 WORD32 loop_filter_bit_pos;
625 WORD32 log2_min_cu = 3;
626 WORD32 min_cu = (1 << log2_min_cu);
627 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
628 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
629 WORD32 sao_blk_wd = ctb_size;
630 WORD32 remaining_rows;
631 WORD32 remaining_cols;
632
633 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
634 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
635 if(remaining_rows <= SAO_SHIFT_CTB)
636 sao_blk_ht += remaining_rows;
637 if(remaining_cols <= SAO_SHIFT_CTB)
638 sao_blk_wd += remaining_cols;
639
640 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
641 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
642
643 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
644
645 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
646 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
647 if(ps_sao_ctxt->i4_ctb_x > 0)
648 loop_filter_bit_pos -= 1;
649
650 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
651 (loop_filter_bit_pos >> 3);
652
653 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
654 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
655 {
656 WORD32 tmp_wd = sao_blk_wd;
657
658 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
659 (loop_filter_bit_pos & 7);
660 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
661
662 if(u4_no_loop_filter_flag)
663 {
664 no_loop_filter_enabled_luma = 1;
665 while(tmp_wd > 0)
666 {
667 if(CTZ(u4_no_loop_filter_flag))
668 {
669 pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
670 pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
671 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
672 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
673 }
674 else
675 {
676 for(row = 0; row < min_cu; row++)
677 {
678 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
679 {
680 pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
681 }
682 }
683 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
684 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
685 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
686 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
687 }
688 }
689
690 pu1_src_tmp_luma -= sao_blk_wd;
691 pu1_src_backup_luma -= sao_blk_wd;
692 }
693
694 pu1_src_tmp_luma += (src_strd << log2_min_cu);
695 pu1_src_backup_luma += (backup_strd << log2_min_cu);
696 }
697 }
698
699 /* Chroma */
700
701 {
702 UWORD32 u4_no_loop_filter_flag;
703 WORD32 loop_filter_bit_pos;
704 WORD32 log2_min_cu = 3;
705 WORD32 min_cu = (1 << log2_min_cu);
706 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
707 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
708 WORD32 sao_blk_wd = ctb_size;
709 WORD32 remaining_rows;
710 WORD32 remaining_cols;
711
712 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
713 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
714 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
715 sao_blk_ht += remaining_rows;
716 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
717 sao_blk_wd += remaining_cols;
718
719 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
720 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
721
722 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
723
724 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
725 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
726 if(ps_sao_ctxt->i4_ctb_x > 0)
727 loop_filter_bit_pos -= 2;
728
729 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
730 (loop_filter_bit_pos >> 3);
731
732 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
733 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
734 {
735 WORD32 tmp_wd = sao_blk_wd;
736
737 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
738 (loop_filter_bit_pos & 7);
739 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
740
741 if(u4_no_loop_filter_flag)
742 {
743 no_loop_filter_enabled_chroma = 1;
744 while(tmp_wd > 0)
745 {
746 if(CTZ(u4_no_loop_filter_flag))
747 {
748 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
749 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
750 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
751 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
752 }
753 else
754 {
755 for(row = 0; row < min_cu / 2; row++)
756 {
757 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
758 {
759 pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
760 }
761 }
762
763 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
764 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
765 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
766 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
767 }
768 }
769
770 pu1_src_tmp_chroma -= sao_blk_wd;
771 pu1_src_backup_chroma -= sao_blk_wd;
772 }
773
774 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
775 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
776 }
777 }
778 }
779
780 DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
781
782 /* Top-left CTB */
783 if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
784 {
785 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
786 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
787 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
788 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
789
790 WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
791 WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
792 WORD32 au4_idx_tl[8], idx_tl;
793
794 slice_header_t *ps_slice_hdr_top_left;
795 {
796 WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
797 (ps_sao_ctxt->i4_ctb_x - 1);
798 ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
799 }
800
801
802 pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
803 pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
804 ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
805 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
806 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
807 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
808 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
809
810 if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
811 {
812 if(0 == ps_sao->b3_y_type_idx)
813 {
814 /* Update left, top and top-left */
815 for(row = 0; row < sao_ht_luma; row++)
816 {
817 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
818 }
819 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
820
821 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
822
823
824 }
825
826 else if(1 == ps_sao->b3_y_type_idx)
827 {
828 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
829 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
830 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
831 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
832
833 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
834 src_strd,
835 pu1_src_left_luma,
836 pu1_src_top_luma,
837 pu1_sao_src_luma_top_left_ctb,
838 ps_sao->b5_y_band_pos,
839 ai1_offset_y,
840 sao_wd_luma,
841 sao_ht_luma
842 );
843 }
844
845 else // if(2 <= ps_sao->b3_y_type_idx)
846 {
847 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
848 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
849 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
850 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
851
852 for(i = 0; i < 8; i++)
853 {
854 au1_avail_luma[i] = 255;
855 au1_tile_slice_boundary[i] = 0;
856 au4_idx_tl[i] = 0;
857 au4_ilf_across_tile_slice_enable[i] = 1;
858 }
859
860 /******************************************************************
861 * Derive the Top-left CTB's neighbor pixel's slice indices.
862 *
863 * TL_T
864 * 4 _2__5________
865 * 0 | | |
866 * TL_L | TL | 1 TL_R|
867 * |____|_______|____
868 * 6|TL_D|7 | |
869 * | 3 | | |
870 * |____|_______| |
871 * | |
872 * | |
873 * |____________|
874 *
875 *****************************************************************/
876
877 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
878 {
879 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
880 {
881 {
882 /*Assuming that sao shift is uniform along x and y directions*/
883 if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
884 {
885 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
886 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
887 }
888 else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
889 {
890 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
891 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
892 }
893 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
894 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
895
896 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
897 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
898
899 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
900 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
901
902 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
903 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
904 }
905
906 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
907 {
908 /*Calculate slice indices for neighbor pixels*/
909 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
910 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
911 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
912 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
913 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
914 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
915
916 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
917 {
918 if(ps_sao_ctxt->i4_ctb_x == 1)
919 {
920 au4_idx_tl[6] = -1;
921 au4_idx_tl[4] = -1;
922 }
923 else
924 {
925 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
926 }
927 if(ps_sao_ctxt->i4_ctb_y == 1)
928 {
929 au4_idx_tl[5] = -1;
930 au4_idx_tl[4] = -1;
931 }
932 else
933 {
934 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
935 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
936 }
937 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
938 }
939
940 /* Verify that the neighbor ctbs dont cross pic boundary.
941 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
942 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
943 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
944 * the respective pixel's flags are checked
945 */
946
947 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
948 {
949 au4_ilf_across_tile_slice_enable[4] = 0;
950 au4_ilf_across_tile_slice_enable[6] = 0;
951 }
952 else
953 {
954 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
955 }
956 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
957 {
958 au4_ilf_across_tile_slice_enable[5] = 0;
959 au4_ilf_across_tile_slice_enable[4] = 0;
960 }
961 else
962 {
963 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
964 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
965 }
966 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
967 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
968 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
969 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
970 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
971
972 if(au4_idx_tl[5] > idx_tl)
973 {
974 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
975 }
976
977 /*
978 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
979 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
980 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
981 * the respective pixel's flags are checked
982 */
983 for(i = 0; i < 8; i++)
984 {
985 /*Sets the edges that lie on the slice/tile boundary*/
986 if(au4_idx_tl[i] != idx_tl)
987 {
988 au1_tile_slice_boundary[i] = 1;
989 }
990 else
991 {
992 au4_ilf_across_tile_slice_enable[i] = 1;
993 }
994 }
995
996 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
997 }
998
999 if(ps_pps->i1_tiles_enabled_flag)
1000 {
1001 /* Calculate availability flags at slice boundary */
1002 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1003 {
1004 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1005 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1006 {
1007 /*Set the boundary arrays*/
1008 /*Calculate tile indices for neighbor pixels*/
1009 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1010 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1011 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1012 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1013 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1014 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1015
1016 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1017 {
1018 if(ps_sao_ctxt->i4_ctb_x == 1)
1019 {
1020 au4_idx_tl[6] = -1;
1021 au4_idx_tl[4] = -1;
1022 }
1023 else
1024 {
1025 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1026 }
1027 if(ps_sao_ctxt->i4_ctb_y == 1)
1028 {
1029 au4_idx_tl[5] = -1;
1030 au4_idx_tl[4] = -1;
1031 }
1032 else
1033 {
1034 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1035 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1036 }
1037 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1038 }
1039 for(i = 0; i < 8; i++)
1040 {
1041 /*Sets the edges that lie on the tile boundary*/
1042 if(au4_idx_tl[i] != idx_tl)
1043 {
1044 au1_tile_slice_boundary[i] |= 1;
1045 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1046 }
1047 }
1048 }
1049 }
1050 }
1051
1052
1053 /*Set availability flags based on tile and slice boundaries*/
1054 for(i = 0; i < 8; i++)
1055 {
1056 /*Sets the edges that lie on the slice/tile boundary*/
1057 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1058 {
1059 au1_avail_luma[i] = 0;
1060 }
1061 }
1062 }
1063 }
1064
1065 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1066 {
1067 au1_avail_luma[0] = 0;
1068 au1_avail_luma[4] = 0;
1069 au1_avail_luma[6] = 0;
1070 }
1071
1072 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1073 {
1074 au1_avail_luma[1] = 0;
1075 au1_avail_luma[5] = 0;
1076 au1_avail_luma[7] = 0;
1077 }
1078 //y==1 case
1079 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1080 {
1081 au1_avail_luma[2] = 0;
1082 au1_avail_luma[4] = 0;
1083 au1_avail_luma[5] = 0;
1084 }
1085 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1086 {
1087 au1_avail_luma[3] = 0;
1088 au1_avail_luma[6] = 0;
1089 au1_avail_luma[7] = 0;
1090 }
1091
1092 {
1093 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1094 u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1095 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1096 src_strd,
1097 pu1_src_left_luma,
1098 pu1_src_top_luma,
1099 pu1_sao_src_luma_top_left_ctb,
1100 au1_src_top_right,
1101 &u1_sao_src_top_left_luma_bot_left,
1102 au1_avail_luma,
1103 ai1_offset_y,
1104 sao_wd_luma,
1105 sao_ht_luma);
1106 }
1107 }
1108
1109 }
1110 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1111 {
1112 /* Update left, top and top-left */
1113 for(row = 0; row < sao_ht_luma; row++)
1114 {
1115 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1116 }
1117 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1118
1119 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1120 }
1121
1122 if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1123 {
1124 if(0 == ps_sao->b3_cb_type_idx)
1125 {
1126 for(row = 0; row < sao_ht_chroma; row++)
1127 {
1128 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1129 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1130 }
1131 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1132 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1133
1134 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1135
1136 }
1137
1138 else if(1 == ps_sao->b3_cb_type_idx)
1139 {
1140 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1141 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1142 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1143 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1144
1145 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1146 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1147 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1148 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1149
1150 if(chroma_yuv420sp_vu)
1151 {
1152 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1153 src_strd,
1154 pu1_src_left_chroma,
1155 pu1_src_top_chroma,
1156 pu1_sao_src_chroma_top_left_ctb,
1157 ps_sao->b5_cr_band_pos,
1158 ps_sao->b5_cb_band_pos,
1159 ai1_offset_cr,
1160 ai1_offset_cb,
1161 sao_wd_chroma,
1162 sao_ht_chroma
1163 );
1164 }
1165 else
1166 {
1167 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1168 src_strd,
1169 pu1_src_left_chroma,
1170 pu1_src_top_chroma,
1171 pu1_sao_src_chroma_top_left_ctb,
1172 ps_sao->b5_cb_band_pos,
1173 ps_sao->b5_cr_band_pos,
1174 ai1_offset_cb,
1175 ai1_offset_cr,
1176 sao_wd_chroma,
1177 sao_ht_chroma
1178 );
1179 }
1180 }
1181
1182 else // if(2 <= ps_sao->b3_cb_type_idx)
1183 {
1184 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1185 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1186 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1187 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1188
1189 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1190 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1191 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1192 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1193 for(i = 0; i < 8; i++)
1194 {
1195 au1_avail_chroma[i] = 255;
1196 au1_tile_slice_boundary[i] = 0;
1197 au4_idx_tl[i] = 0;
1198 au4_ilf_across_tile_slice_enable[i] = 1;
1199 }
1200 /*In case of slices*/
1201 {
1202 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1203 {
1204 if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1205 {
1206 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1207 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1208 }
1209 else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1210 {
1211 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1212 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1213 }
1214 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1215 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1216
1217 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1218 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1219
1220 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
1221 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
1222
1223 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1224 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1225
1226 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1227 {
1228
1229 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1230 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1231 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1232 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1233 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1234 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1235
1236 if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1237 {
1238 if(ps_sao_ctxt->i4_ctb_x == 1)
1239 {
1240 au4_idx_tl[6] = -1;
1241 au4_idx_tl[4] = -1;
1242 }
1243 else
1244 {
1245 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1246 }
1247 if(ps_sao_ctxt->i4_ctb_y == 1)
1248 {
1249 au4_idx_tl[5] = -1;
1250 au4_idx_tl[4] = -1;
1251 }
1252 else
1253 {
1254 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1255 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1256 }
1257 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1258 }
1259
1260 /* Verify that the neighbor ctbs don't cross pic boundary
1261 * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1262 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1263 {
1264 au4_ilf_across_tile_slice_enable[4] = 0;
1265 au4_ilf_across_tile_slice_enable[6] = 0;
1266 }
1267 else
1268 {
1269 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1270 }
1271 if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1272 {
1273 au4_ilf_across_tile_slice_enable[5] = 0;
1274 au4_ilf_across_tile_slice_enable[4] = 0;
1275 }
1276 else
1277 {
1278 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1279 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1280 }
1281 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1282 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1283 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1284 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1285 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1286 /*
1287 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1288 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1289 */
1290 for(i = 0; i < 8; i++)
1291 {
1292 /*Sets the edges that lie on the slice/tile boundary*/
1293 if(au4_idx_tl[i] != idx_tl)
1294 {
1295 au1_tile_slice_boundary[i] = 1;
1296 }
1297 else
1298 {
1299 au4_ilf_across_tile_slice_enable[i] = 1;
1300 }
1301 }
1302
1303 /*Reset indices*/
1304 for(i = 0; i < 8; i++)
1305 {
1306 au4_idx_tl[i] = 0;
1307 }
1308 }
1309 if(ps_pps->i1_tiles_enabled_flag)
1310 {
1311 /* Calculate availability flags at slice boundary */
1312 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1313 {
1314 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1315 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1316 {
1317 /*Set the boundary arrays*/
1318 /*Calculate tile indices for neighbor pixels*/
1319 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1320 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1321 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1322 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1323 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1324 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1325
1326 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1327 {
1328 if(ps_sao_ctxt->i4_ctb_x == 1)
1329 {
1330 au4_idx_tl[6] = -1;
1331 au4_idx_tl[4] = -1;
1332 }
1333 else
1334 {
1335 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1336 }
1337 if(ps_sao_ctxt->i4_ctb_y == 1)
1338 {
1339 au4_idx_tl[5] = -1;
1340 au4_idx_tl[4] = -1;
1341 }
1342 else
1343 {
1344 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1345 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1346 }
1347 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1348 }
1349 for(i = 0; i < 8; i++)
1350 {
1351 /*Sets the edges that lie on the tile boundary*/
1352 if(au4_idx_tl[i] != idx_tl)
1353 {
1354 au1_tile_slice_boundary[i] |= 1;
1355 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1356 }
1357 }
1358 }
1359 }
1360 }
1361
1362 for(i = 0; i < 8; i++)
1363 {
1364 /*Sets the edges that lie on the slice/tile boundary*/
1365 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1366 {
1367 au1_avail_chroma[i] = 0;
1368 }
1369 }
1370 }
1371 }
1372
1373 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1374 {
1375 au1_avail_chroma[0] = 0;
1376 au1_avail_chroma[4] = 0;
1377 au1_avail_chroma[6] = 0;
1378 }
1379 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1380 {
1381 au1_avail_chroma[1] = 0;
1382 au1_avail_chroma[5] = 0;
1383 au1_avail_chroma[7] = 0;
1384 }
1385
1386 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1387 {
1388 au1_avail_chroma[2] = 0;
1389 au1_avail_chroma[4] = 0;
1390 au1_avail_chroma[5] = 0;
1391 }
1392 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1393 {
1394 au1_avail_chroma[3] = 0;
1395 au1_avail_chroma[6] = 0;
1396 au1_avail_chroma[7] = 0;
1397 }
1398
1399 {
1400 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1401 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1402 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1403 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1404 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1405 {
1406 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1407 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1408 }
1409
1410 if(chroma_yuv420sp_vu)
1411 {
1412 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1413 src_strd,
1414 pu1_src_left_chroma,
1415 pu1_src_top_chroma,
1416 pu1_sao_src_chroma_top_left_ctb,
1417 au1_src_top_right,
1418 au1_sao_src_top_left_chroma_bot_left,
1419 au1_avail_chroma,
1420 ai1_offset_cr,
1421 ai1_offset_cb,
1422 sao_wd_chroma,
1423 sao_ht_chroma);
1424 }
1425 else
1426 {
1427 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1428 src_strd,
1429 pu1_src_left_chroma,
1430 pu1_src_top_chroma,
1431 pu1_sao_src_chroma_top_left_ctb,
1432 au1_src_top_right,
1433 au1_sao_src_top_left_chroma_bot_left,
1434 au1_avail_chroma,
1435 ai1_offset_cb,
1436 ai1_offset_cr,
1437 sao_wd_chroma,
1438 sao_ht_chroma);
1439 }
1440 }
1441 }
1442 }
1443 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1444 {
1445 for(row = 0; row < sao_ht_chroma; row++)
1446 {
1447 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1448 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1449 }
1450 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1451 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1452
1453 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1454 }
1455
1456 pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1457 pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1458 ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1459 }
1460
1461
1462 /* Top CTB */
1463 if((ps_sao_ctxt->i4_ctb_y > 0))
1464 {
1465 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1466 WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1467 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1468 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1469
1470 WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1471 WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1472 WORD32 au4_idx_t[8], idx_t;
1473
1474 WORD32 remaining_cols;
1475
1476 slice_header_t *ps_slice_hdr_top;
1477 {
1478 WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1479 (ps_sao_ctxt->i4_ctb_x);
1480 ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1481 }
1482
1483 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1484 if(remaining_cols <= SAO_SHIFT_CTB)
1485 {
1486 sao_wd_luma += remaining_cols;
1487 }
1488 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1489 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1490 {
1491 sao_wd_chroma += remaining_cols;
1492 }
1493
1494 pu1_src_luma -= (sao_ht_luma * src_strd);
1495 pu1_src_chroma -= (sao_ht_chroma * src_strd);
1496 ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1497 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1498 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1499 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1500 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1501
1502 if(0 != sao_wd_luma)
1503 {
1504 if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1505 {
1506 if(0 == ps_sao->b3_y_type_idx)
1507 {
1508 /* Update left, top and top-left */
1509 for(row = 0; row < sao_ht_luma; row++)
1510 {
1511 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1512 }
1513 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1514
1515 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1516
1517 }
1518
1519 else if(1 == ps_sao->b3_y_type_idx)
1520 {
1521 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1522 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1523 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1524 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1525
1526 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1527 src_strd,
1528 pu1_src_left_luma,
1529 pu1_src_top_luma,
1530 pu1_sao_src_luma_top_left_ctb,
1531 ps_sao->b5_y_band_pos,
1532 ai1_offset_y,
1533 sao_wd_luma,
1534 sao_ht_luma
1535 );
1536 }
1537
1538 else // if(2 <= ps_sao->b3_y_type_idx)
1539 {
1540 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1541 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1542 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1543 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1544
1545 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1546 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1547 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1548
1549 for(i = 0; i < 8; i++)
1550 {
1551
1552 au4_ilf_across_tile_slice_enable[i] = 1;
1553 }
1554 /******************************************************************
1555 * Derive the Top-left CTB's neighbor pixel's slice indices.
1556 *
1557 * T_T
1558 * ____________
1559 * | | |
1560 * | T_L| T |T_R
1561 * | | ______|____
1562 * | | T_D | |
1563 * | | | |
1564 * |____|_______| |
1565 * | |
1566 * | |
1567 * |____________|
1568 *
1569 *****************************************************************/
1570
1571 /*In case of slices*/
1572 {
1573 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1574 {
1575
1576 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1577 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1578
1579 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1580 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1581
1582 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1583 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1584
1585 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1586 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1587
1588 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1589 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1590
1591 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1592 {
1593 /*Calculate neighbor ctb slice indices*/
1594 if(0 == ps_sao_ctxt->i4_ctb_x)
1595 {
1596 au4_idx_t[0] = -1;
1597 au4_idx_t[6] = -1;
1598 au4_idx_t[4] = -1;
1599 }
1600 else
1601 {
1602 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1603 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604 }
1605 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1606 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1607 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1608 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1609
1610 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1611 if(0 == ps_sao_ctxt->i4_ctb_x)
1612 {
1613 au4_ilf_across_tile_slice_enable[4] = 0;
1614 au4_ilf_across_tile_slice_enable[6] = 0;
1615 au4_ilf_across_tile_slice_enable[0] = 0;
1616 }
1617 else
1618 {
1619 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1620 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1621 }
1622
1623
1624
1625 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1626 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1627 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1628 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1629 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1630
1631 if(au4_idx_t[6] < idx_t)
1632 {
1633 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1634 }
1635
1636 /*
1637 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1638 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1639 */
1640
1641 for(i = 0; i < 8; i++)
1642 {
1643 /*Sets the edges that lie on the slice/tile boundary*/
1644 if(au4_idx_t[i] != idx_t)
1645 {
1646 au1_tile_slice_boundary[i] = 1;
1647 /*Check for slice flag at such boundaries*/
1648 }
1649 else
1650 {
1651 au4_ilf_across_tile_slice_enable[i] = 1;
1652 }
1653 }
1654 /*Reset indices*/
1655 for(i = 0; i < 8; i++)
1656 {
1657 au4_idx_t[i] = 0;
1658 }
1659 }
1660
1661 if(ps_pps->i1_tiles_enabled_flag)
1662 {
1663 /* Calculate availability flags at slice boundary */
1664 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1665 {
1666 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1667 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1668 {
1669 /*Calculate neighbor ctb slice indices*/
1670 if(0 == ps_sao_ctxt->i4_ctb_x)
1671 {
1672 au4_idx_t[0] = -1;
1673 au4_idx_t[6] = -1;
1674 au4_idx_t[4] = -1;
1675 }
1676 else
1677 {
1678 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1679 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680 }
1681 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1682 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1683 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1684 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1685
1686 for(i = 0; i < 8; i++)
1687 {
1688 /*Sets the edges that lie on the tile boundary*/
1689 if(au4_idx_t[i] != idx_t)
1690 {
1691 au1_tile_slice_boundary[i] |= 1;
1692 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1693 }
1694 }
1695 }
1696 }
1697 }
1698
1699 for(i = 0; i < 8; i++)
1700 {
1701 /*Sets the edges that lie on the slice/tile boundary*/
1702 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1703 {
1704 au1_avail_luma[i] = 0;
1705 }
1706 }
1707 }
1708 }
1709
1710
1711 if(0 == ps_sao_ctxt->i4_ctb_x)
1712 {
1713 au1_avail_luma[0] = 0;
1714 au1_avail_luma[4] = 0;
1715 au1_avail_luma[6] = 0;
1716 }
1717
1718 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1719 {
1720 au1_avail_luma[1] = 0;
1721 au1_avail_luma[5] = 0;
1722 au1_avail_luma[7] = 0;
1723 }
1724
1725 if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1726 {
1727 au1_avail_luma[2] = 0;
1728 au1_avail_luma[4] = 0;
1729 au1_avail_luma[5] = 0;
1730 }
1731
1732 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1733 {
1734 au1_avail_luma[3] = 0;
1735 au1_avail_luma[6] = 0;
1736 au1_avail_luma[7] = 0;
1737 }
1738
1739 {
1740 au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1741 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1742 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1743 src_strd,
1744 pu1_src_left_luma,
1745 pu1_src_top_luma,
1746 pu1_sao_src_luma_top_left_ctb,
1747 au1_src_top_right,
1748 &u1_sao_src_top_left_luma_bot_left,
1749 au1_avail_luma,
1750 ai1_offset_y,
1751 sao_wd_luma,
1752 sao_ht_luma);
1753 }
1754 }
1755 }
1756 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1757 {
1758 /* Update left, top and top-left */
1759 for(row = 0; row < sao_ht_luma; row++)
1760 {
1761 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1762 }
1763 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1764
1765 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1766 }
1767 }
1768
1769 if(0 != sao_wd_chroma)
1770 {
1771 if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1772 {
1773 if(0 == ps_sao->b3_cb_type_idx)
1774 {
1775
1776 for(row = 0; row < sao_ht_chroma; row++)
1777 {
1778 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1779 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1780 }
1781 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1782 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1783
1784 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1785
1786 }
1787
1788 else if(1 == ps_sao->b3_cb_type_idx)
1789 {
1790 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1791 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1792 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1793 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1794
1795 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1796 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1797 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1798 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1799
1800 if(chroma_yuv420sp_vu)
1801 {
1802 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1803 src_strd,
1804 pu1_src_left_chroma,
1805 pu1_src_top_chroma,
1806 pu1_sao_src_chroma_top_left_ctb,
1807 ps_sao->b5_cr_band_pos,
1808 ps_sao->b5_cb_band_pos,
1809 ai1_offset_cr,
1810 ai1_offset_cb,
1811 sao_wd_chroma,
1812 sao_ht_chroma
1813 );
1814 }
1815 else
1816 {
1817 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1818 src_strd,
1819 pu1_src_left_chroma,
1820 pu1_src_top_chroma,
1821 pu1_sao_src_chroma_top_left_ctb,
1822 ps_sao->b5_cb_band_pos,
1823 ps_sao->b5_cr_band_pos,
1824 ai1_offset_cb,
1825 ai1_offset_cr,
1826 sao_wd_chroma,
1827 sao_ht_chroma
1828 );
1829 }
1830 }
1831 else // if(2 <= ps_sao->b3_cb_type_idx)
1832 {
1833 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1834 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1835 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1836 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1837
1838 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1839 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1840 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1841 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1842
1843 for(i = 0; i < 8; i++)
1844 {
1845 au1_avail_chroma[i] = 255;
1846 au1_tile_slice_boundary[i] = 0;
1847 au4_idx_t[i] = 0;
1848 au4_ilf_across_tile_slice_enable[i] = 1;
1849 }
1850
1851 {
1852 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1853 {
1854 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1855 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1856
1857 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1858 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1859
1860 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1861 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1862
1863 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1864 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1865
1866 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1867 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1868
1869 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1870 {
1871 if(0 == ps_sao_ctxt->i4_ctb_x)
1872 {
1873 au4_idx_t[0] = -1;
1874 au4_idx_t[6] = -1;
1875 au4_idx_t[4] = -1;
1876 }
1877 else
1878 {
1879 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1880 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881 }
1882 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1883 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1884 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1885 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1886
1887 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1888
1889 if(0 == ps_sao_ctxt->i4_ctb_x)
1890 {
1891 au4_ilf_across_tile_slice_enable[4] = 0;
1892 au4_ilf_across_tile_slice_enable[6] = 0;
1893 au4_ilf_across_tile_slice_enable[0] = 0;
1894 }
1895 else
1896 {
1897 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1899 }
1900
1901 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1902 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1903 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1904 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1905 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1906
1907 if(idx_t > au4_idx_t[6])
1908 {
1909 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1910 }
1911
1912 /*
1913 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1914 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1915 */
1916 for(i = 0; i < 8; i++)
1917 {
1918 /*Sets the edges that lie on the slice/tile boundary*/
1919 if(au4_idx_t[i] != idx_t)
1920 {
1921 au1_tile_slice_boundary[i] = 1;
1922 }
1923 else
1924 {
1925 /*Indicates that the neighbour belongs to same/dependent slice*/
1926 au4_ilf_across_tile_slice_enable[i] = 1;
1927 }
1928 }
1929 /*Reset indices*/
1930 for(i = 0; i < 8; i++)
1931 {
1932 au4_idx_t[i] = 0;
1933 }
1934 }
1935 if(ps_pps->i1_tiles_enabled_flag)
1936 {
1937 /* Calculate availability flags at slice boundary */
1938 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1939 {
1940 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1941 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1942 {
1943 /*Calculate neighbor ctb slice indices*/
1944 if(0 == ps_sao_ctxt->i4_ctb_x)
1945 {
1946 au4_idx_t[0] = -1;
1947 au4_idx_t[6] = -1;
1948 au4_idx_t[4] = -1;
1949 }
1950 else
1951 {
1952 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1953 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954 }
1955 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1956 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1957 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1958 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1959
1960 for(i = 0; i < 8; i++)
1961 {
1962 /*Sets the edges that lie on the tile boundary*/
1963 if(au4_idx_t[i] != idx_t)
1964 {
1965 au1_tile_slice_boundary[i] |= 1;
1966 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1967 }
1968 }
1969 }
1970 }
1971 }
1972 for(i = 0; i < 8; i++)
1973 {
1974 /*Sets the edges that lie on the slice/tile boundary*/
1975 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1976 {
1977 au1_avail_chroma[i] = 0;
1978 }
1979 }
1980
1981 }
1982 }
1983 if(0 == ps_sao_ctxt->i4_ctb_x)
1984 {
1985 au1_avail_chroma[0] = 0;
1986 au1_avail_chroma[4] = 0;
1987 au1_avail_chroma[6] = 0;
1988 }
1989
1990 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1991 {
1992 au1_avail_chroma[1] = 0;
1993 au1_avail_chroma[5] = 0;
1994 au1_avail_chroma[7] = 0;
1995 }
1996
1997 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1998 {
1999 au1_avail_chroma[2] = 0;
2000 au1_avail_chroma[4] = 0;
2001 au1_avail_chroma[5] = 0;
2002 }
2003
2004 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2005 {
2006 au1_avail_chroma[3] = 0;
2007 au1_avail_chroma[6] = 0;
2008 au1_avail_chroma[7] = 0;
2009 }
2010
2011 {
2012 au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2013 au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2014 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2015 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2016
2017 if(chroma_yuv420sp_vu)
2018 {
2019 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2020 src_strd,
2021 pu1_src_left_chroma,
2022 pu1_src_top_chroma,
2023 pu1_sao_src_chroma_top_left_ctb,
2024 au1_src_top_right,
2025 au1_sao_src_top_left_chroma_bot_left,
2026 au1_avail_chroma,
2027 ai1_offset_cr,
2028 ai1_offset_cb,
2029 sao_wd_chroma,
2030 sao_ht_chroma);
2031 }
2032 else
2033 {
2034 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2035 src_strd,
2036 pu1_src_left_chroma,
2037 pu1_src_top_chroma,
2038 pu1_sao_src_chroma_top_left_ctb,
2039 au1_src_top_right,
2040 au1_sao_src_top_left_chroma_bot_left,
2041 au1_avail_chroma,
2042 ai1_offset_cb,
2043 ai1_offset_cr,
2044 sao_wd_chroma,
2045 sao_ht_chroma);
2046 }
2047 }
2048
2049 }
2050 }
2051 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2052 {
2053 for(row = 0; row < sao_ht_chroma; row++)
2054 {
2055 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2056 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2057 }
2058 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2059 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2060
2061 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2062 }
2063 }
2064
2065 pu1_src_luma += sao_ht_luma * src_strd;
2066 pu1_src_chroma += sao_ht_chroma * src_strd;
2067 ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2068 }
2069
2070 /* Left CTB */
2071 if(ps_sao_ctxt->i4_ctb_x > 0)
2072 {
2073 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2074 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2075 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2076 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2077
2078 WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2079 WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2080 WORD32 au4_idx_l[8], idx_l;
2081
2082 WORD32 remaining_rows;
2083 slice_header_t *ps_slice_hdr_left;
2084 {
2085 WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2086 (ps_sao_ctxt->i4_ctb_x - 1);
2087 ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2088 }
2089
2090 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2091 if(remaining_rows <= SAO_SHIFT_CTB)
2092 {
2093 sao_ht_luma += remaining_rows;
2094 }
2095 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2096 if(remaining_rows <= SAO_SHIFT_CTB)
2097 {
2098 sao_ht_chroma += remaining_rows;
2099 }
2100
2101 pu1_src_luma -= sao_wd_luma;
2102 pu1_src_chroma -= sao_wd_chroma;
2103 ps_sao -= 1;
2104 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2105 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2106 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2107 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2108
2109
2110 if(0 != sao_ht_luma)
2111 {
2112 if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2113 {
2114 if(0 == ps_sao->b3_y_type_idx)
2115 {
2116 /* Update left, top and top-left */
2117 for(row = 0; row < sao_ht_luma; row++)
2118 {
2119 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2120 }
2121 /*Update in next location*/
2122 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2123
2124 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2125
2126 }
2127
2128 else if(1 == ps_sao->b3_y_type_idx)
2129 {
2130 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2131 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2132 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2133 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2134
2135 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2136 src_strd,
2137 pu1_src_left_luma,
2138 pu1_src_top_luma,
2139 pu1_sao_src_top_left_luma_curr_ctb,
2140 ps_sao->b5_y_band_pos,
2141 ai1_offset_y,
2142 sao_wd_luma,
2143 sao_ht_luma
2144 );
2145 }
2146
2147 else // if(2 <= ps_sao->b3_y_type_idx)
2148 {
2149 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2150 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2151 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2152 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2153
2154 for(i = 0; i < 8; i++)
2155 {
2156 au1_avail_luma[i] = 255;
2157 au1_tile_slice_boundary[i] = 0;
2158 au4_idx_l[i] = 0;
2159 au4_ilf_across_tile_slice_enable[i] = 1;
2160 }
2161 /******************************************************************
2162 * Derive the Top-left CTB's neighbour pixel's slice indices.
2163 *
2164 *
2165 * ____________
2166 * | | |
2167 * | L_T| |
2168 * |____|_______|____
2169 * | | | |
2170 * L_L | L | L_R | |
2171 * |____|_______| |
2172 * | |
2173 * L_D | |
2174 * |____________|
2175 *
2176 *****************************************************************/
2177
2178 /*In case of slices or tiles*/
2179 {
2180 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2181 {
2182 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2183 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2184
2185 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2186 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2187
2188 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2189 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2190
2191 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2192 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2193
2194 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2195 ctby_l = ps_sao_ctxt->i4_ctb_y;
2196
2197 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2198 {
2199 if(0 == ps_sao_ctxt->i4_ctb_y)
2200 {
2201 au4_idx_l[2] = -1;
2202 au4_idx_l[4] = -1;
2203 au4_idx_l[5] = -1;
2204 }
2205 else
2206 {
2207 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2208 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2209 }
2210 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2211 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2212 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2213 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2214
2215 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2216 if(0 == ps_sao_ctxt->i4_ctb_y)
2217 {
2218 au4_ilf_across_tile_slice_enable[2] = 0;
2219 au4_ilf_across_tile_slice_enable[4] = 0;
2220 au4_ilf_across_tile_slice_enable[5] = 0;
2221 }
2222 else
2223 {
2224 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2225 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2226
2227 }
2228 //TODO: ILF flag checks for [0] and [6] is missing.
2229 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2230 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2231 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2232
2233 if(idx_l < au4_idx_l[5])
2234 {
2235 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2236 }
2237
2238 /*
2239 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2240 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2241 */
2242 for(i = 0; i < 8; i++)
2243 {
2244 /*Sets the edges that lie on the slice/tile boundary*/
2245 if(au4_idx_l[i] != idx_l)
2246 {
2247 au1_tile_slice_boundary[i] = 1;
2248 }
2249 else
2250 {
2251 au4_ilf_across_tile_slice_enable[i] = 1;
2252 }
2253 }
2254 /*Reset indices*/
2255 for(i = 0; i < 8; i++)
2256 {
2257 au4_idx_l[i] = 0;
2258 }
2259 }
2260
2261 if(ps_pps->i1_tiles_enabled_flag)
2262 {
2263 /* Calculate availability flags at slice boundary */
2264 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2265 {
2266 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2267 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2268 {
2269 if(0 == ps_sao_ctxt->i4_ctb_y)
2270 {
2271 au4_idx_l[2] = -1;
2272 au4_idx_l[4] = -1;
2273 au4_idx_l[5] = -1;
2274 }
2275 else
2276 {
2277 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2278 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2279 }
2280
2281 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2282 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2283 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2284 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2285
2286 for(i = 0; i < 8; i++)
2287 {
2288 /*Sets the edges that lie on the slice/tile boundary*/
2289 if(au4_idx_l[i] != idx_l)
2290 {
2291 au1_tile_slice_boundary[i] |= 1;
2292 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2293 }
2294 }
2295 }
2296 }
2297 }
2298
2299 for(i = 0; i < 8; i++)
2300 {
2301 /*Sets the edges that lie on the slice/tile boundary*/
2302 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2303 {
2304 au1_avail_luma[i] = 0;
2305 }
2306 }
2307 }
2308 }
2309 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2310 {
2311 au1_avail_luma[0] = 0;
2312 au1_avail_luma[4] = 0;
2313 au1_avail_luma[6] = 0;
2314 }
2315 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2316 {
2317 au1_avail_luma[1] = 0;
2318 au1_avail_luma[5] = 0;
2319 au1_avail_luma[7] = 0;
2320 }
2321
2322 if(0 == ps_sao_ctxt->i4_ctb_y)
2323 {
2324 au1_avail_luma[2] = 0;
2325 au1_avail_luma[4] = 0;
2326 au1_avail_luma[5] = 0;
2327 }
2328
2329 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2330 {
2331 au1_avail_luma[3] = 0;
2332 au1_avail_luma[6] = 0;
2333 au1_avail_luma[7] = 0;
2334 }
2335
2336 {
2337 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2338 u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2339 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2340 src_strd,
2341 pu1_src_left_luma,
2342 pu1_src_top_luma,
2343 pu1_sao_src_top_left_luma_curr_ctb,
2344 au1_src_top_right,
2345 &u1_sao_src_top_left_luma_bot_left,
2346 au1_avail_luma,
2347 ai1_offset_y,
2348 sao_wd_luma,
2349 sao_ht_luma);
2350 }
2351
2352 }
2353 }
2354 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2355 {
2356 /* Update left, top and top-left */
2357 for(row = 0; row < sao_ht_luma; row++)
2358 {
2359 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2360 }
2361 /*Update in next location*/
2362 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2363
2364 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2365 }
2366 }
2367
2368 if(0 != sao_ht_chroma)
2369 {
2370 if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2371 {
2372 if(0 == ps_sao->b3_cb_type_idx)
2373 {
2374 for(row = 0; row < sao_ht_chroma; row++)
2375 {
2376 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2377 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2378 }
2379 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2380 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2381
2382 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2383 }
2384
2385 else if(1 == ps_sao->b3_cb_type_idx)
2386 {
2387 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2388 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2389 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2390 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2391
2392 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2393 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2394 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2395 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2396
2397 if(chroma_yuv420sp_vu)
2398 {
2399 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2400 src_strd,
2401 pu1_src_left_chroma,
2402 pu1_src_top_chroma,
2403 pu1_sao_src_top_left_chroma_curr_ctb,
2404 ps_sao->b5_cr_band_pos,
2405 ps_sao->b5_cb_band_pos,
2406 ai1_offset_cr,
2407 ai1_offset_cb,
2408 sao_wd_chroma,
2409 sao_ht_chroma
2410 );
2411 }
2412 else
2413 {
2414 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2415 src_strd,
2416 pu1_src_left_chroma,
2417 pu1_src_top_chroma,
2418 pu1_sao_src_top_left_chroma_curr_ctb,
2419 ps_sao->b5_cb_band_pos,
2420 ps_sao->b5_cr_band_pos,
2421 ai1_offset_cb,
2422 ai1_offset_cr,
2423 sao_wd_chroma,
2424 sao_ht_chroma
2425 );
2426 }
2427 }
2428
2429 else // if(2 <= ps_sao->b3_cb_type_idx)
2430 {
2431 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2432 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2433 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2434 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2435
2436 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2437 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2438 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2439 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2440
2441 for(i = 0; i < 8; i++)
2442 {
2443 au1_avail_chroma[i] = 255;
2444 au1_tile_slice_boundary[i] = 0;
2445 au4_idx_l[i] = 0;
2446 au4_ilf_across_tile_slice_enable[i] = 1;
2447 }
2448 /*In case of slices*/
2449 {
2450 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2451 {
2452 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2453 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2454
2455 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2456 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2457
2458 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2459 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2460
2461 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2462 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2463
2464 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2465 ctby_l = ps_sao_ctxt->i4_ctb_y;
2466
2467 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2468 {
2469 if(0 == ps_sao_ctxt->i4_ctb_y)
2470 {
2471 au4_idx_l[2] = -1;
2472 au4_idx_l[4] = -1;
2473 au4_idx_l[5] = -1;
2474 }
2475 else
2476 {
2477 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2478 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2479 }
2480 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2481 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2482 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2483 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2484
2485 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2486 if(0 == ps_sao_ctxt->i4_ctb_y)
2487 {
2488 au4_ilf_across_tile_slice_enable[2] = 0;
2489 au4_ilf_across_tile_slice_enable[4] = 0;
2490 au4_ilf_across_tile_slice_enable[5] = 0;
2491 }
2492 else
2493 {
2494 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2495 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2496 }
2497
2498 if(au4_idx_l[5] > idx_l)
2499 {
2500 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2501 }
2502
2503 // au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2504 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2505 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2506 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2507 /*
2508 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2509 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2510 */
2511 for(i = 0; i < 8; i++)
2512 {
2513 /*Sets the edges that lie on the slice/tile boundary*/
2514 if(au4_idx_l[i] != idx_l)
2515 {
2516 au1_tile_slice_boundary[i] = 1;
2517 }
2518 else
2519 {
2520 au4_ilf_across_tile_slice_enable[i] = 1;
2521 }
2522 }
2523 /*Reset indices*/
2524 for(i = 0; i < 8; i++)
2525 {
2526 au4_idx_l[i] = 0;
2527 }
2528 }
2529 if(ps_pps->i1_tiles_enabled_flag)
2530 {
2531 /* Calculate availability flags at slice boundary */
2532 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2533 {
2534 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2535 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2536 {
2537 if(0 == ps_sao_ctxt->i4_ctb_y)
2538 {
2539 au4_idx_l[2] = -1;
2540 au4_idx_l[4] = -1;
2541 au4_idx_l[5] = -1;
2542 }
2543 else
2544 {
2545 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2546 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2547 }
2548
2549 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2550 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2551 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2552 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2553
2554 for(i = 0; i < 8; i++)
2555 {
2556 /*Sets the edges that lie on the slice/tile boundary*/
2557 if(au4_idx_l[i] != idx_l)
2558 {
2559 au1_tile_slice_boundary[i] |= 1;
2560 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2561 }
2562 }
2563 }
2564 }
2565 }
2566 for(i = 0; i < 8; i++)
2567 {
2568 /*Sets the edges that lie on the slice/tile boundary*/
2569 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2570 {
2571 au1_avail_chroma[i] = 0;
2572 }
2573 }
2574 }
2575 }
2576 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2577 {
2578 au1_avail_chroma[0] = 0;
2579 au1_avail_chroma[4] = 0;
2580 au1_avail_chroma[6] = 0;
2581 }
2582
2583 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2584 {
2585 au1_avail_chroma[1] = 0;
2586 au1_avail_chroma[5] = 0;
2587 au1_avail_chroma[7] = 0;
2588 }
2589
2590 if(0 == ps_sao_ctxt->i4_ctb_y)
2591 {
2592 au1_avail_chroma[2] = 0;
2593 au1_avail_chroma[4] = 0;
2594 au1_avail_chroma[5] = 0;
2595 }
2596
2597 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
2598 {
2599 au1_avail_chroma[3] = 0;
2600 au1_avail_chroma[6] = 0;
2601 au1_avail_chroma[7] = 0;
2602 }
2603
2604 {
2605 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2606 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2607 au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2608 au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2609 //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2610 //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2611 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2612 {
2613 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2614 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2615 }
2616
2617
2618 if(chroma_yuv420sp_vu)
2619 {
2620 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2621 src_strd,
2622 pu1_src_left_chroma,
2623 pu1_src_top_chroma,
2624 pu1_sao_src_top_left_chroma_curr_ctb,
2625 au1_src_top_right,
2626 au1_src_bot_left,
2627 au1_avail_chroma,
2628 ai1_offset_cr,
2629 ai1_offset_cb,
2630 sao_wd_chroma,
2631 sao_ht_chroma);
2632 }
2633 else
2634 {
2635 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2636 src_strd,
2637 pu1_src_left_chroma,
2638 pu1_src_top_chroma,
2639 pu1_sao_src_top_left_chroma_curr_ctb,
2640 au1_src_top_right,
2641 au1_src_bot_left,
2642 au1_avail_chroma,
2643 ai1_offset_cb,
2644 ai1_offset_cr,
2645 sao_wd_chroma,
2646 sao_ht_chroma);
2647 }
2648 }
2649
2650 }
2651 }
2652 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2653 {
2654 for(row = 0; row < sao_ht_chroma; row++)
2655 {
2656 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2657 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2658 }
2659 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2660 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2661
2662 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2663 }
2664
2665 }
2666 pu1_src_luma += sao_wd_luma;
2667 pu1_src_chroma += sao_wd_chroma;
2668 ps_sao += 1;
2669 }
2670
2671
2672 /* Current CTB */
2673 {
2674 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2675 WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2676 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2677 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2678 WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2679 WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2680 WORD32 au4_idx_c[8], idx_c;
2681
2682 WORD32 remaining_rows;
2683 WORD32 remaining_cols;
2684
2685 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2686 if(remaining_cols <= SAO_SHIFT_CTB)
2687 {
2688 sao_wd_luma += remaining_cols;
2689 }
2690 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2691 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2692 {
2693 sao_wd_chroma += remaining_cols;
2694 }
2695
2696 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2697 if(remaining_rows <= SAO_SHIFT_CTB)
2698 {
2699 sao_ht_luma += remaining_rows;
2700 }
2701 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2702 if(remaining_rows <= SAO_SHIFT_CTB)
2703 {
2704 sao_ht_chroma += remaining_rows;
2705 }
2706
2707 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2708 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2709 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2710 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2711
2712 if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2713 {
2714 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2715 {
2716 if(0 == ps_sao->b3_y_type_idx)
2717 {
2718 /* Update left, top and top-left */
2719 for(row = 0; row < sao_ht_luma; row++)
2720 {
2721 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2722 }
2723 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2724
2725 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2726
2727 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2728
2729 }
2730
2731 else if(1 == ps_sao->b3_y_type_idx)
2732 {
2733 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2734 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2735 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2736 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2737
2738 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2739 src_strd,
2740 pu1_src_left_luma,
2741 pu1_src_top_luma,
2742 pu1_sao_src_top_left_luma_curr_ctb,
2743 ps_sao->b5_y_band_pos,
2744 ai1_offset_y,
2745 sao_wd_luma,
2746 sao_ht_luma
2747 );
2748 }
2749
2750 else // if(2 <= ps_sao->b3_y_type_idx)
2751 {
2752 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2753 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2754 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2755 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2756
2757 for(i = 0; i < 8; i++)
2758 {
2759 au1_avail_luma[i] = 255;
2760 au1_tile_slice_boundary[i] = 0;
2761 au4_idx_c[i] = 0;
2762 au4_ilf_across_tile_slice_enable[i] = 1;
2763 }
2764 /******************************************************************
2765 * Derive the Top-left CTB's neighbour pixel's slice indices.
2766 *
2767 *
2768 * ____________
2769 * | | |
2770 * | | C_T |
2771 * |____|_______|____
2772 * | | | |
2773 * | C_L| C | C_R|
2774 * |____|_______| |
2775 * | C_D |
2776 * | |
2777 * |____________|
2778 *
2779 *****************************************************************/
2780
2781 /*In case of slices*/
2782 {
2783 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2784 {
2785 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2786 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2787
2788 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2789 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2790
2791 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2792 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2793
2794 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
2795 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
2796
2797 ctbx_c = ps_sao_ctxt->i4_ctb_x;
2798 ctby_c = ps_sao_ctxt->i4_ctb_y;
2799
2800 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2801 {
2802 if(0 == ps_sao_ctxt->i4_ctb_x)
2803 {
2804 au4_idx_c[6] = -1;
2805 au4_idx_c[0] = -1;
2806 au4_idx_c[4] = -1;
2807 }
2808 else
2809 {
2810 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2811 }
2812
2813 if(0 == ps_sao_ctxt->i4_ctb_y)
2814 {
2815 au4_idx_c[2] = -1;
2816 au4_idx_c[5] = -1;
2817 au4_idx_c[4] = -1;
2818 }
2819 else
2820 {
2821 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2822 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2823 }
2824 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2825 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2826 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2827
2828 if(0 == ps_sao_ctxt->i4_ctb_x)
2829 {
2830 au4_ilf_across_tile_slice_enable[6] = 0;
2831 au4_ilf_across_tile_slice_enable[0] = 0;
2832 au4_ilf_across_tile_slice_enable[4] = 0;
2833 }
2834 else
2835 {
2836 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2837 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2838 }
2839 if(0 == ps_sao_ctxt->i4_ctb_y)
2840 {
2841 au4_ilf_across_tile_slice_enable[2] = 0;
2842 au4_ilf_across_tile_slice_enable[4] = 0;
2843 au4_ilf_across_tile_slice_enable[5] = 0;
2844 }
2845 else
2846 {
2847 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2848 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2849 }
2850 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2851 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2852 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2853
2854 if(au4_idx_c[6] < idx_c)
2855 {
2856 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2857 }
2858
2859 /*
2860 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2861 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2862 */
2863 for(i = 0; i < 8; i++)
2864 {
2865 /*Sets the edges that lie on the slice/tile boundary*/
2866 if(au4_idx_c[i] != idx_c)
2867 {
2868 au1_tile_slice_boundary[i] = 1;
2869 }
2870 else
2871 {
2872 au4_ilf_across_tile_slice_enable[i] = 1;
2873 }
2874 }
2875 /*Reset indices*/
2876 for(i = 0; i < 8; i++)
2877 {
2878 au4_idx_c[i] = 0;
2879 }
2880 }
2881
2882 if(ps_pps->i1_tiles_enabled_flag)
2883 {
2884 /* Calculate availability flags at slice boundary */
2885 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2886 {
2887 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2888 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2889 {
2890 if(0 == ps_sao_ctxt->i4_ctb_x)
2891 {
2892 au4_idx_c[6] = -1;
2893 au4_idx_c[0] = -1;
2894 au4_idx_c[4] = -1;
2895 }
2896 else
2897 {
2898 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2899 }
2900
2901 if(0 == ps_sao_ctxt->i4_ctb_y)
2902 {
2903 au4_idx_c[2] = -1;
2904 au4_idx_c[5] = -1;
2905 au4_idx_c[4] = -1;
2906 }
2907 else
2908 {
2909 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2910 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2911 }
2912 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2913 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2914 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2915
2916 for(i = 0; i < 8; i++)
2917 {
2918 /*Sets the edges that lie on the slice/tile boundary*/
2919 if(au4_idx_c[i] != idx_c)
2920 {
2921 au1_tile_slice_boundary[i] |= 1;
2922 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2923 }
2924 }
2925 }
2926 }
2927 }
2928
2929 for(i = 0; i < 8; i++)
2930 {
2931 /*Sets the edges that lie on the slice/tile boundary*/
2932 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2933 {
2934 au1_avail_luma[i] = 0;
2935 }
2936 }
2937
2938 }
2939 }
2940 if(0 == ps_sao_ctxt->i4_ctb_x)
2941 {
2942 au1_avail_luma[0] = 0;
2943 au1_avail_luma[4] = 0;
2944 au1_avail_luma[6] = 0;
2945 }
2946
2947 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2948 {
2949 au1_avail_luma[1] = 0;
2950 au1_avail_luma[5] = 0;
2951 au1_avail_luma[7] = 0;
2952 }
2953
2954 if(0 == ps_sao_ctxt->i4_ctb_y)
2955 {
2956 au1_avail_luma[2] = 0;
2957 au1_avail_luma[4] = 0;
2958 au1_avail_luma[5] = 0;
2959 }
2960
2961 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2962 {
2963 au1_avail_luma[3] = 0;
2964 au1_avail_luma[6] = 0;
2965 au1_avail_luma[7] = 0;
2966 }
2967
2968 {
2969 au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2970 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2971
2972 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2973 src_strd,
2974 pu1_src_left_luma,
2975 pu1_src_top_luma,
2976 pu1_sao_src_top_left_luma_curr_ctb,
2977 au1_src_top_right,
2978 &u1_sao_src_top_left_luma_bot_left,
2979 au1_avail_luma,
2980 ai1_offset_y,
2981 sao_wd_luma,
2982 sao_ht_luma);
2983 }
2984 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2985 pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2986 }
2987 }
2988 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2989 {
2990 /* Update left, top and top-left */
2991 for(row = 0; row < sao_ht_luma; row++)
2992 {
2993 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2994 }
2995 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2996
2997 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2998
2999 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
3000 }
3001 }
3002
3003 if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
3004 {
3005 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3006 {
3007 if(0 == ps_sao->b3_cb_type_idx)
3008 {
3009 for(row = 0; row < sao_ht_chroma; row++)
3010 {
3011 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3012 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3013 }
3014 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3015 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3016
3017 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3018
3019 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3020 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3021 }
3022
3023 else if(1 == ps_sao->b3_cb_type_idx)
3024 {
3025 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3026 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3027 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3028 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3029
3030 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3031 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3032 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3033 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3034
3035 if(chroma_yuv420sp_vu)
3036 {
3037 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3038 src_strd,
3039 pu1_src_left_chroma,
3040 pu1_src_top_chroma,
3041 pu1_sao_src_top_left_chroma_curr_ctb,
3042 ps_sao->b5_cr_band_pos,
3043 ps_sao->b5_cb_band_pos,
3044 ai1_offset_cr,
3045 ai1_offset_cb,
3046 sao_wd_chroma,
3047 sao_ht_chroma
3048 );
3049 }
3050 else
3051 {
3052 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3053 src_strd,
3054 pu1_src_left_chroma,
3055 pu1_src_top_chroma,
3056 pu1_sao_src_top_left_chroma_curr_ctb,
3057 ps_sao->b5_cb_band_pos,
3058 ps_sao->b5_cr_band_pos,
3059 ai1_offset_cb,
3060 ai1_offset_cr,
3061 sao_wd_chroma,
3062 sao_ht_chroma
3063 );
3064 }
3065 }
3066
3067 else // if(2 <= ps_sao->b3_cb_type_idx)
3068 {
3069 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3070 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3071 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3072 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3073
3074 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3075 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3076 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3077 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3078
3079 for(i = 0; i < 8; i++)
3080 {
3081 au1_avail_chroma[i] = 255;
3082 au1_tile_slice_boundary[i] = 0;
3083 au4_idx_c[i] = 0;
3084 au4_ilf_across_tile_slice_enable[i] = 1;
3085 }
3086 {
3087 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3088 {
3089 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3090 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3091
3092 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3093 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3094
3095 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3096 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3097
3098 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
3099 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
3100
3101 ctbx_c = ps_sao_ctxt->i4_ctb_x;
3102 ctby_c = ps_sao_ctxt->i4_ctb_y;
3103
3104 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3105 {
3106 if(0 == ps_sao_ctxt->i4_ctb_x)
3107 {
3108 au4_idx_c[0] = -1;
3109 au4_idx_c[4] = -1;
3110 au4_idx_c[6] = -1;
3111 }
3112 else
3113 {
3114 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3115 }
3116
3117 if(0 == ps_sao_ctxt->i4_ctb_y)
3118 {
3119 au4_idx_c[2] = -1;
3120 au4_idx_c[4] = -1;
3121 au4_idx_c[5] = -1;
3122 }
3123 else
3124 {
3125 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3126 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3127 }
3128 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3129 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3130 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3131
3132 if(0 == ps_sao_ctxt->i4_ctb_x)
3133 {
3134 au4_ilf_across_tile_slice_enable[0] = 0;
3135 au4_ilf_across_tile_slice_enable[4] = 0;
3136 au4_ilf_across_tile_slice_enable[6] = 0;
3137 }
3138 else
3139 {
3140 au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3141 au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3142 }
3143
3144 if(0 == ps_sao_ctxt->i4_ctb_y)
3145 {
3146 au4_ilf_across_tile_slice_enable[2] = 0;
3147 au4_ilf_across_tile_slice_enable[4] = 0;
3148 au4_ilf_across_tile_slice_enable[5] = 0;
3149 }
3150 else
3151 {
3152 au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3153 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3154 }
3155
3156 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3157 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3158 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3159
3160 if(idx_c > au4_idx_c[6])
3161 {
3162 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3163 }
3164
3165 /*
3166 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3167 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3168 */
3169 for(i = 0; i < 8; i++)
3170 {
3171 /*Sets the edges that lie on the slice/tile boundary*/
3172 if(au4_idx_c[i] != idx_c)
3173 {
3174 au1_tile_slice_boundary[i] = 1;
3175 }
3176 else
3177 {
3178 au4_ilf_across_tile_slice_enable[i] = 1;
3179 }
3180 }
3181 /*Reset indices*/
3182 for(i = 0; i < 8; i++)
3183 {
3184 au4_idx_c[i] = 0;
3185 }
3186 }
3187
3188 if(ps_pps->i1_tiles_enabled_flag)
3189 {
3190 /* Calculate availability flags at slice boundary */
3191 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3192 {
3193 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3194 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3195 {
3196 if(0 == ps_sao_ctxt->i4_ctb_x)
3197 {
3198 au4_idx_c[6] = -1;
3199 au4_idx_c[0] = -1;
3200 au4_idx_c[4] = -1;
3201 }
3202 else
3203 {
3204 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3205 }
3206
3207 if(0 == ps_sao_ctxt->i4_ctb_y)
3208 {
3209 au4_idx_c[2] = -1;
3210 au4_idx_c[5] = -1;
3211 au4_idx_c[4] = -1;
3212 }
3213 else
3214 {
3215 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3216 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3217 }
3218 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3219 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3220 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3221
3222 for(i = 0; i < 8; i++)
3223 {
3224 /*Sets the edges that lie on the slice/tile boundary*/
3225 if(au4_idx_c[i] != idx_c)
3226 {
3227 au1_tile_slice_boundary[i] |= 1;
3228 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3229 }
3230 }
3231 }
3232 }
3233 }
3234
3235 for(i = 0; i < 8; i++)
3236 {
3237 /*Sets the edges that lie on the slice/tile boundary*/
3238 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3239 {
3240 au1_avail_chroma[i] = 0;
3241 }
3242 }
3243 }
3244 }
3245
3246 if(0 == ps_sao_ctxt->i4_ctb_x)
3247 {
3248 au1_avail_chroma[0] = 0;
3249 au1_avail_chroma[4] = 0;
3250 au1_avail_chroma[6] = 0;
3251 }
3252
3253 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3254 {
3255 au1_avail_chroma[1] = 0;
3256 au1_avail_chroma[5] = 0;
3257 au1_avail_chroma[7] = 0;
3258 }
3259
3260 if(0 == ps_sao_ctxt->i4_ctb_y)
3261 {
3262 au1_avail_chroma[2] = 0;
3263 au1_avail_chroma[4] = 0;
3264 au1_avail_chroma[5] = 0;
3265 }
3266
3267 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
3268 {
3269 au1_avail_chroma[3] = 0;
3270 au1_avail_chroma[6] = 0;
3271 au1_avail_chroma[7] = 0;
3272 }
3273
3274 {
3275 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3276 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3277
3278 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3279 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3280
3281 if(chroma_yuv420sp_vu)
3282 {
3283 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3284 src_strd,
3285 pu1_src_left_chroma,
3286 pu1_src_top_chroma,
3287 pu1_sao_src_top_left_chroma_curr_ctb,
3288 au1_src_top_right,
3289 au1_sao_src_top_left_chroma_bot_left,
3290 au1_avail_chroma,
3291 ai1_offset_cr,
3292 ai1_offset_cb,
3293 sao_wd_chroma,
3294 sao_ht_chroma);
3295 }
3296 else
3297 {
3298 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3299 src_strd,
3300 pu1_src_left_chroma,
3301 pu1_src_top_chroma,
3302 pu1_sao_src_top_left_chroma_curr_ctb,
3303 au1_src_top_right,
3304 au1_sao_src_top_left_chroma_bot_left,
3305 au1_avail_chroma,
3306 ai1_offset_cb,
3307 ai1_offset_cr,
3308 sao_wd_chroma,
3309 sao_ht_chroma);
3310 }
3311 }
3312
3313 }
3314 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3315 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3316
3317 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3318 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3319 }
3320 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3321 {
3322 for(row = 0; row < sao_ht_chroma; row++)
3323 {
3324 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3325 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3326 }
3327 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3328 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3329
3330 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3331
3332 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3333 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3334 }
3335
3336 }
3337 }
3338
3339
3340
3341
3342 /* If no loop filter is enabled copy the backed up values */
3343 {
3344 /* Luma */
3345 if(no_loop_filter_enabled_luma)
3346 {
3347 UWORD32 u4_no_loop_filter_flag;
3348 WORD32 loop_filter_bit_pos;
3349 WORD32 log2_min_cu = 3;
3350 WORD32 min_cu = (1 << log2_min_cu);
3351 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3352 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3353 WORD32 sao_blk_wd = ctb_size;
3354 WORD32 remaining_rows;
3355 WORD32 remaining_cols;
3356
3357 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3358 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3359 if(remaining_rows <= SAO_SHIFT_CTB)
3360 sao_blk_ht += remaining_rows;
3361 if(remaining_cols <= SAO_SHIFT_CTB)
3362 sao_blk_wd += remaining_cols;
3363
3364 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3365 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3366
3367 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3368
3369 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3370 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3371 if(ps_sao_ctxt->i4_ctb_x > 0)
3372 loop_filter_bit_pos -= 1;
3373
3374 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3375 (loop_filter_bit_pos >> 3);
3376
3377 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3378 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3379 {
3380 WORD32 tmp_wd = sao_blk_wd;
3381
3382 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3383 (loop_filter_bit_pos & 7);
3384 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3385
3386 if(u4_no_loop_filter_flag)
3387 {
3388 while(tmp_wd > 0)
3389 {
3390 if(CTZ(u4_no_loop_filter_flag))
3391 {
3392 pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3393 pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3394 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3395 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3396 }
3397 else
3398 {
3399 for(row = 0; row < min_cu; row++)
3400 {
3401 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3402 {
3403 pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3404 }
3405 }
3406 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3407 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3408 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3409 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3410 }
3411 }
3412
3413 pu1_src_tmp_luma -= sao_blk_wd;
3414 pu1_src_backup_luma -= sao_blk_wd;
3415 }
3416
3417 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3418 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3419 }
3420 }
3421
3422 /* Chroma */
3423 if(no_loop_filter_enabled_chroma)
3424 {
3425 UWORD32 u4_no_loop_filter_flag;
3426 WORD32 loop_filter_bit_pos;
3427 WORD32 log2_min_cu = 3;
3428 WORD32 min_cu = (1 << log2_min_cu);
3429 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3430 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3431 WORD32 sao_blk_wd = ctb_size;
3432 WORD32 remaining_rows;
3433 WORD32 remaining_cols;
3434
3435 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3436 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3437 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3438 sao_blk_ht += remaining_rows;
3439 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3440 sao_blk_wd += remaining_cols;
3441
3442 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3443 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3444
3445 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3446
3447 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3448 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3449 if(ps_sao_ctxt->i4_ctb_x > 0)
3450 loop_filter_bit_pos -= 2;
3451
3452 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3453 (loop_filter_bit_pos >> 3);
3454
3455 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3456 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3457 {
3458 WORD32 tmp_wd = sao_blk_wd;
3459
3460 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3461 (loop_filter_bit_pos & 7);
3462 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3463
3464 if(u4_no_loop_filter_flag)
3465 {
3466 while(tmp_wd > 0)
3467 {
3468 if(CTZ(u4_no_loop_filter_flag))
3469 {
3470 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3471 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3472 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3473 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3474 }
3475 else
3476 {
3477 for(row = 0; row < min_cu / 2; row++)
3478 {
3479 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3480 {
3481 pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3482 }
3483 }
3484
3485 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3486 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3487 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3488 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3489 }
3490 }
3491
3492 pu1_src_tmp_chroma -= sao_blk_wd;
3493 pu1_src_backup_chroma -= sao_blk_wd;
3494 }
3495
3496 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3497 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3498 }
3499 }
3500 }
3501
3502 }
3503
3504