1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ihevce_sao.c
24 *
25 * @brief
26 * Contains definition for the ctb level sao function
27 *
28 * @author
29 * Ittiam
30 *
31 * @par List of Functions:
32 * ihevce_sao_set_avilability()
33 * ihevce_sao_ctb()
34 * ihevce_sao_analyse()
35 *
36 * @remarks
37 * None
38 *
39 *******************************************************************************
40 */
41
42 /*****************************************************************************/
43 /* File Includes */
44 /*****************************************************************************/
45 /* System include files */
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <assert.h>
50 #include <stdarg.h>
51 #include <math.h>
52
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61
62 #include "ihevc_defs.h"
63 #include "ihevc_structs.h"
64 #include "ihevc_platform_macros.h"
65 #include "ihevc_deblk.h"
66 #include "ihevc_itrans_recon.h"
67 #include "ihevc_chroma_itrans_recon.h"
68 #include "ihevc_chroma_intra_pred.h"
69 #include "ihevc_intra_pred.h"
70 #include "ihevc_inter_pred.h"
71 #include "ihevc_mem_fns.h"
72 #include "ihevc_padding.h"
73 #include "ihevc_weighted_pred.h"
74 #include "ihevc_sao.h"
75 #include "ihevc_resi_trans.h"
76 #include "ihevc_quant_iquant_ssd.h"
77 #include "ihevc_cabac_tables.h"
78
79 #include "ihevce_defs.h"
80 #include "ihevce_lap_enc_structs.h"
81 #include "ihevce_multi_thrd_structs.h"
82 #include "ihevce_me_common_defs.h"
83 #include "ihevce_had_satd.h"
84 #include "ihevce_error_codes.h"
85 #include "ihevce_bitstream.h"
86 #include "ihevce_cabac.h"
87 #include "ihevce_rdoq_macros.h"
88 #include "ihevce_function_selector.h"
89 #include "ihevce_enc_structs.h"
90 #include "ihevce_entropy_structs.h"
91 #include "ihevce_cmn_utils_instr_set_router.h"
92 #include "ihevce_enc_loop_structs.h"
93 #include "ihevce_cabac_rdo.h"
94 #include "ihevce_sao.h"
95
96 /*****************************************************************************/
97 /* Function Definitions */
98 /*****************************************************************************/
99
100 /**
101 *******************************************************************************
102 *
103 * @brief
104 * ihevce_sao_set_avilability
105 *
106 * @par Description:
107 * Sets the availability flag for SAO.
108 *
109 * @param[in]
110 * ps_sao_ctxt: Pointer to SAO context
111 * @returns
112 *
113 * @remarks
114 * None
115 *
116 *******************************************************************************
117 */
ihevce_sao_set_avilability(UWORD8 * pu1_avail,sao_ctxt_t * ps_sao_ctxt,ihevce_tile_params_t * ps_tile_params)118 void ihevce_sao_set_avilability(
119 UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
120 {
121 WORD32 i;
122
123 WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x;
124 WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y;
125
126 for(i = 0; i < 8; i++)
127 {
128 pu1_avail[i] = 255;
129 }
130
131 /* SAO_note_01: If the CTB lies on a tile or a slice boundary and
132 in-loop filtering is enabled at tile and slice boundary, then SAO must
133 be performed at tile/slice boundaries also.
134 Hence the boundary checks should be based on frame position of CTB
135 rather than s_ctb_nbr_avail_flags.u1_left_avail flags.
136 Search for <SAO_note_01> in workspace to know more */
137 /* Availaibility flags for first col*/
138 if(ctb_x_pos == ps_tile_params->i4_first_ctb_x)
139 {
140 pu1_avail[0] = 0;
141 pu1_avail[4] = 0;
142 pu1_avail[6] = 0;
143 }
144
145 /* Availaibility flags for last col*/
146 if((ctb_x_pos + 1) ==
147 (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit))
148 {
149 pu1_avail[1] = 0;
150 pu1_avail[5] = 0;
151 pu1_avail[7] = 0;
152 }
153
154 /* Availaibility flags for first row*/
155 if(ctb_y_pos == ps_tile_params->i4_first_ctb_y)
156 {
157 pu1_avail[2] = 0;
158 pu1_avail[4] = 0;
159 pu1_avail[5] = 0;
160 }
161
162 /* Availaibility flags for last row*/
163 if((ctb_y_pos + 1) ==
164 (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit))
165 {
166 pu1_avail[3] = 0;
167 pu1_avail[6] = 0;
168 pu1_avail[7] = 0;
169 }
170 }
171
172 /**
173 *******************************************************************************
174 *
175 * @brief
176 * Sao CTB level function.
177 *
178 * @par Description:
179 * For a given CTB, sao is done. Both the luma and chroma
180 * blocks are processed
181 *
182 * @param[in]
183 * ps_sao_ctxt: Pointer to SAO context
184 *
185 * @returns
186 *
187 * @remarks
188 * None
189 *
190 *******************************************************************************
191 */
ihevce_sao_ctb(sao_ctxt_t * ps_sao_ctxt,ihevce_tile_params_t * ps_tile_params)192 void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
193 {
194 sao_enc_t *ps_sao;
195 UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2];
196 UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf;
197 UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf;
198 UWORD8 *pu1_src_luma, *pu1_src_chroma;
199 WORD32 luma_src_stride, ctb_size;
200 WORD32 chroma_src_stride;
201 UWORD8 au1_avail_luma[8], au1_avail_chroma[8];
202 WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma;
203 UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma;
204 UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma;
205 UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma;
206 UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
207
208 ps_sao = ps_sao_ctxt->ps_sao;
209
210 ASSERT(
211 (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) &&
212 (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7));
213 ASSERT(
214 (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) &&
215 (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7));
216 ASSERT(
217 (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) &&
218 (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7));
219 ASSERT(
220 (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) &&
221 (ps_sao->b5_cr_band_pos <= 28));
222
223 if(ps_sao_ctxt->i1_slice_sao_luma_flag)
224 {
225 /*initialize the src pointer to current row*/
226 luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
227
228 ctb_size = ps_sao_ctxt->i4_ctb_size;
229
230 /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
231 ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used */
232 sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd;
233 sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht;
234
235 pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
236 /* Pointer to the top luma buffer corresponding to the current ctb row*/
237 pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma;
238
239 /* Pointer to left luma buffer corresponding to the current ctb row*/
240 pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch;
241
242 /* Pointer to the top right luma buffer corresponding to the current ctb row*/
243 pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd;
244
245 /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
246 pu1_src_bot_left_luma =
247 ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride -
248 1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
249 (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
250
251 /* Back up the top left pixel for (x+1, y+1)th ctb*/
252 u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1);
253 pu1_top_left_luma = pu1_src_top_luma_buf - 1;
254
255 if(SAO_BAND == ps_sao->b3_y_type_idx)
256 {
257 ihevc_sao_band_offset_luma(
258 pu1_src_luma,
259 luma_src_stride,
260 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
261 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
262 pu1_src_top_luma_buf - 1, /* Top left*/
263 ps_sao->b5_y_band_pos,
264 ps_sao->u1_y_offset,
265 sao_blk_wd,
266 sao_blk_ht);
267
268 if((ps_sao_ctxt->i4_ctb_y > 0))
269 {
270 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
271 }
272 }
273 else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG)
274 {
275 /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
276 * corresponding to EO category 1 and 2 which should be always positive
277 * And 3rd and 4th offsets are always inferred as offsets corresponding to
278 * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
279 */
280 // clang-format off
281 ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0));
282 ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0));
283 // clang-format on
284
285 ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params);
286
287 ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2](
288 pu1_src_luma,
289 luma_src_stride,
290 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
291 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
292 pu1_top_left_luma, /* Top left*/
293 pu1_src_top_right_luma, /* Top right*/
294 pu1_src_bot_left_luma, /* Bottom left*/
295 au1_avail_luma,
296 ps_sao->u1_y_offset,
297 sao_blk_wd,
298 sao_blk_ht);
299
300 if((ps_sao_ctxt->i4_ctb_y > 0))
301 {
302 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
303 }
304 }
305 }
306
307 if(ps_sao_ctxt->i1_slice_sao_chroma_flag)
308 {
309 /*initialize the src pointer to current row*/
310 chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
311 ctb_size = ps_sao_ctxt->i4_ctb_size;
312
313 /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
314 //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2;
315 ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used */
316 ps_sao->u1_cr_offset[0] = 0;
317 sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd;
318 sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1);
319
320 pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
321 /* Pointer to the top luma buffer corresponding to the current ctb row*/
322 pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma;
323 // clang-format off
324 /* Pointer to left luma buffer corresponding to the current ctb row*/
325 pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch; //ps_sao_ctxt->au1_sao_src_left_chroma;
326 // clang-format on
327 /* Pointer to the top right chroma buffer corresponding to the current ctb row*/
328 pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma;
329
330 /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
331 pu1_src_bot_left_chroma =
332 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
333 (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 +
334 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
335 (ctb_size >> !u1_is_422)) +
336 (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
337
338 /* Back up the top left pixel for (x+1, y+1)th ctb*/
339 u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2);
340 u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1);
341 pu1_top_left_chroma = pu1_src_top_chroma_buf - 2;
342
343 if(SAO_BAND == ps_sao->b3_cb_type_idx)
344 {
345 ihevc_sao_band_offset_chroma(
346 pu1_src_chroma,
347 chroma_src_stride,
348 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
349 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
350 pu1_top_left_chroma, /* Top left*/
351 ps_sao->b5_cb_band_pos,
352 ps_sao->b5_cr_band_pos,
353 ps_sao->u1_cb_offset,
354 ps_sao->u1_cr_offset,
355 sao_wd_chroma,
356 sao_ht_chroma);
357
358 if((ps_sao_ctxt->i4_ctb_y > 0))
359 {
360 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
361 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
362 }
363 }
364 else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG)
365 {
366 /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
367 * corresponding to EO category 1 and 2 which should be always positive
368 * And 3rd and 4th offsets are always inferred as offsets corresponding to
369 * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
370 */
371 ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0));
372 ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0));
373
374 ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0));
375 ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0));
376
377 ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params);
378
379 ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](
380 pu1_src_chroma,
381 chroma_src_stride,
382 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
383 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
384 pu1_top_left_chroma, /* Top left*/
385 pu1_src_top_right_chroma, /* Top right*/
386 pu1_src_bot_left_chroma, /* Bottom left*/
387 au1_avail_chroma,
388 ps_sao->u1_cb_offset,
389 ps_sao->u1_cr_offset,
390 sao_wd_chroma,
391 sao_ht_chroma);
392
393 if((ps_sao_ctxt->i4_ctb_y > 0))
394 {
395 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
396 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
397 }
398 }
399 }
400 }
401
402 /**
403 *******************************************************************************
404 *
405 * @brief
406 * CTB level function to do SAO analysis.
407 *
408 * @par Description:
409 * For a given CTB, sao analysis is done for both luma and chroma.
410 *
411 *
412 * @param[in]
413 * ps_sao_ctxt: Pointer to SAO context
414 * ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop
415 *
416 * @returns
417 *
418 * @remarks
419 * None
420 *
421 * @Assumptions:
422 * 1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be
423 * almost same as cabac state of (x,y)th ctb.
424 * 2) Distortion is calculated in spatial domain but lamda used to calculate the cost is
425 * in freq domain.
426 *******************************************************************************
427 */
ihevce_sao_analyse(sao_ctxt_t * ps_sao_ctxt,ctb_enc_loop_out_t * ps_ctb_enc_loop_out,UWORD32 * pu4_frame_rdopt_header_bits,ihevce_tile_params_t * ps_tile_params)428 void ihevce_sao_analyse(
429 sao_ctxt_t *ps_sao_ctxt,
430 ctb_enc_loop_out_t *ps_ctb_enc_loop_out,
431 UWORD32 *pu4_frame_rdopt_header_bits,
432 ihevce_tile_params_t *ps_tile_params)
433 {
434 UWORD8 *pu1_luma_scratch_buf;
435 UWORD8 *pu1_chroma_scratch_buf;
436 UWORD8 *pu1_src_luma, *pu1_recon_luma;
437 UWORD8 *pu1_src_chroma, *pu1_recon_chroma;
438 WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht;
439 WORD32 chroma_src_stride, chroma_recon_stride;
440 WORD32 i4_luma_scratch_buf_stride;
441 WORD32 i4_chroma_scratch_buf_stride;
442 sao_ctxt_t s_sao_ctxt;
443 UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0;
444 LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf;
445 WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0;
446 WORD32 curr_buf_idx, best_buf_idx, best_cand_idx;
447 WORD32 row;
448 WORD32 edgeidx;
449 WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 };
450 sao_enc_t s_best_luma_chroma_cand;
451 WORD32 best_ctb_sao_bits = 0;
452 #if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2)
453 UWORD8 u1_force_no_offset =
454 ps_sao_ctxt
455 ->ps_ctb_data
456 [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y]
457 .s_ctb_noise_params.i4_noise_present;
458 #endif
459 UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
460
461 *pu4_frame_rdopt_header_bits = 0;
462
463 ctb_size = ps_sao_ctxt->i4_ctb_size;
464 ctb_wd = ps_sao_ctxt->i4_sao_blk_wd;
465 ctb_ht = ps_sao_ctxt->i4_sao_blk_ht;
466
467 s_sao_ctxt = ps_sao_ctxt[0];
468
469 /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/
470 memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t));
471
472 /* Initialize the pointer and strides for luma buffers*/
473 pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
474 luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
475
476 pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf;
477 luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride;
478 i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
479
480 /* Initialize the pointer and strides for luma buffers*/
481 pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
482 chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
483
484 pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf;
485 chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride;
486 i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
487
488 i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf;
489 i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf;
490
491 /*****************************************************/
492 /********************RDO FOR LUMA CAND****************/
493 /*****************************************************/
494
495 #if !DISABLE_SAO_WHEN_NOISY
496 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
497 #else
498 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset)
499 #endif
500 {
501 /* Candidate for Edge offset SAO*/
502 /* Following is the convention for curr pixel and
503 * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */
504 /*
505 * 0 deg : a c b 90 deg: a 135 deg: a 45 deg: a
506 * c c c
507 * b b b
508 */
509
510 /* 0 deg SAO CAND*/
511 /* Reset the error and edge count*/
512 for(edgeidx = 0; edgeidx < 5; edgeidx++)
513 {
514 acc_error_category[edgeidx] = 0;
515 category_count[edgeidx] = 0;
516 }
517
518 /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/
519 // clang-format off
520 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG,
521 acc_error_category, category_count);
522 // clang-format on
523 // clang-format off
524 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG;
525 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
526 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
527 : 0;
528 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
529 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
530 : 0;
531 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
532 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
533 : 0;
534 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4]
535 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
536 : 0;
537 // clang-format on
538 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
539 // clang-format off
540 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
541 // clang-format on
542 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
543 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
544 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
545 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
546 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
547
548 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
549 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
550 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
551 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
552 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
553 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
554 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
555 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
556
557 num_luma_rdo_cand++;
558
559 /* 90 degree SAO CAND*/
560 for(edgeidx = 0; edgeidx < 5; edgeidx++)
561 {
562 acc_error_category[edgeidx] = 0;
563 category_count[edgeidx] = 0;
564 }
565
566 /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/
567 // clang-format off
568 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG,
569 acc_error_category, category_count);
570
571 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG;
572 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
573 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
574 : 0;
575 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
576 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
577 : 0;
578 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
579 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
580 : 0;
581 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
582 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
583 : 0;
584 // clang-format on
585 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
586
587 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
588 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
589 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
590 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
591 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
592 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
593
594 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
595 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
596 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
597 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
598 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
599 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
600 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
601 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
602
603 num_luma_rdo_cand++;
604
605 /* 135 degree SAO CAND*/
606 for(edgeidx = 0; edgeidx < 5; edgeidx++)
607 {
608 acc_error_category[edgeidx] = 0;
609 category_count[edgeidx] = 0;
610 }
611
612 /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/
613 // clang-format off
614 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG,
615 acc_error_category, category_count);
616
617 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG;
618 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
619 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
620 : 0;
621 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
622 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
623 : 0;
624 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
625 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
626 : 0;
627 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
628 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
629 : 0;
630 // clang-format on
631 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
632
633 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
634 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
635 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
636 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
637 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
638 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
639
640 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
641 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
642 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
643 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
644 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
645 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
646 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
647 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
648
649 num_luma_rdo_cand++;
650
651 /* 45 degree SAO CAND*/
652 for(edgeidx = 0; edgeidx < 5; edgeidx++)
653 {
654 acc_error_category[edgeidx] = 0;
655 category_count[edgeidx] = 0;
656 }
657
658 /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/
659 // clang-format off
660 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG,
661 acc_error_category, category_count);
662
663 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG;
664 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
665 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
666 : 0;
667 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
668 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
669 : 0;
670 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
671 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
672 : 0;
673 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
674 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
675 : 0;
676 // clang-format on
677 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
678
679 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
680 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
681 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
682 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
683 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
684 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
685
686 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
687 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
688 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
689 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
690 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
691 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
692 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
693 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
694
695 num_luma_rdo_cand++;
696
697 /* First cand will be best cand after 1st iteration*/
698 curr_buf_idx = 0;
699 best_buf_idx = 1;
700 best_cost = 0xFFFFFFFF;
701 best_cand_idx = 0;
702
703 /*Back up the top pixels for (x,y+1)th ctb*/
704 if(!ps_sao_ctxt->i4_is_last_ctb_row)
705 {
706 memcpy(
707 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
708 pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
709 ps_sao_ctxt->i4_sao_blk_wd);
710 }
711
712 for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++)
713 {
714 s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
715
716 /* This memcpy is required because cabac uses parameters from this structure
717 * to evaluate bits and this structure ptr is sent to cabac through
718 * "ihevce_cabac_rdo_encode_sao" function
719 */
720 memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
721
722 /* Copy the left pixels to the scratch buffer for evry rdo cand because its
723 overwritten by the sao leaf level function for next ctb*/
724 memcpy(
725 s_sao_ctxt.au1_left_luma_scratch,
726 ps_sao_ctxt->au1_sao_src_left_luma,
727 ps_sao_ctxt->i4_sao_blk_ht);
728
729 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
730 overwritten by the sao leaf level function for next ctb*/
731 memcpy(
732 s_sao_ctxt.au1_top_luma_scratch,
733 ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
734 ps_sao_ctxt->i4_sao_blk_wd + 2);
735 s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
736
737 pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
738
739 ASSERT(
740 (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
741 (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
742 (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
743 (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
744 ASSERT(
745 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
746 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
747 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
748 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
749 ASSERT(
750 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
751 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
752 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
753 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
754 ASSERT(
755 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
756 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
757 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
758
759 /* Copy the deblocked recon data to scratch buffer to do sao*/
760
761 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
762 pu1_luma_scratch_buf,
763 i4_luma_scratch_buf_stride,
764 pu1_recon_luma,
765 luma_recon_stride,
766 SCRATCH_BUF_STRIDE,
767 ctb_ht + 1);
768
769 s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
770 s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
771
772 s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
773 s_sao_ctxt.i1_slice_sao_chroma_flag = 0;
774
775 ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
776
777 /* Calculate the distortion between sao'ed ctb and original src ctb*/
778 // clang-format off
779 distortion =
780 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
781 s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
782 s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE);
783 // clang-format on
784
785 ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
786 ctb_bits = ihevce_cabac_rdo_encode_sao(
787 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
788
789 /* Calculate the cost as D+(lamda)*R */
790 curr_cost = distortion +
791 COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
792
793 if(curr_cost < best_cost)
794 {
795 best_cost = curr_cost;
796 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
797 best_cand_idx = rdo_cand;
798 curr_buf_idx = !curr_buf_idx;
799 }
800 }
801
802 /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
803 * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
804 */
805 s_best_luma_chroma_cand.b3_y_type_idx =
806 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx;
807 s_best_luma_chroma_cand.u1_y_offset[1] =
808 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1];
809 s_best_luma_chroma_cand.u1_y_offset[2] =
810 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2];
811 s_best_luma_chroma_cand.u1_y_offset[3] =
812 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3];
813 s_best_luma_chroma_cand.u1_y_offset[4] =
814 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4];
815 s_best_luma_chroma_cand.b5_y_band_pos =
816 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos;
817 }
818 else
819 {
820 /*Back up the top pixels for (x,y+1)th ctb*/
821 if(!ps_sao_ctxt->i4_is_last_ctb_row)
822 {
823 memcpy(
824 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
825 pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
826 ps_sao_ctxt->i4_sao_blk_wd);
827 }
828
829 s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE;
830 s_best_luma_chroma_cand.u1_y_offset[1] = 0;
831 s_best_luma_chroma_cand.u1_y_offset[2] = 0;
832 s_best_luma_chroma_cand.u1_y_offset[3] = 0;
833 s_best_luma_chroma_cand.u1_y_offset[4] = 0;
834 s_best_luma_chroma_cand.b5_y_band_pos = 0;
835 s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
836 s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
837
838 s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
839 s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
840 s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
841 s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
842 s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
843 s_best_luma_chroma_cand.b5_cb_band_pos = 0;
844
845 s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
846 s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
847 s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
848 s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
849 s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
850 s_best_luma_chroma_cand.b5_cr_band_pos = 0;
851 }
852 /*****************************************************/
853 /********************RDO FOR CHROMA CAND**************/
854 /*****************************************************/
855 #if !DISABLE_SAO_WHEN_NOISY
856 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
857 #else
858 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset)
859 #endif
860 {
861 /*Back up the top pixels for (x,y+1)th ctb*/
862 if(!ps_sao_ctxt->i4_is_last_ctb_row)
863 {
864 memcpy(
865 ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
866 ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
867 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
868 ps_sao_ctxt->i4_sao_blk_wd);
869 }
870
871 /* Reset the error and edge count*/
872 for(edgeidx = 0; edgeidx < 5; edgeidx++)
873 {
874 acc_error_category[edgeidx] = 0;
875 category_count[edgeidx] = 0;
876 }
877 // clang-format off
878 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt,
879 s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category,
880 category_count);
881 // clang-format on
882
883 /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
884 * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
885 */
886 // clang-format off
887 s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
888 s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0]
889 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
890 : 0;
891 s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1]
892 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
893 : 0;
894 s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3]
895 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
896 : 0;
897 s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4]
898 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
899 : 0;
900 s_best_luma_chroma_cand.b5_cb_band_pos = 0;
901
902 s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
903 s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0]
904 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
905 : 0;
906 s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1]
907 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
908 : 0;
909 s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3]
910 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
911 : 0;
912 s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4]
913 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
914 : 0;
915 // clang-format on
916 s_best_luma_chroma_cand.b5_cr_band_pos = 0;
917 }
918 else
919 {
920 /*Back up the top pixels for (x,y+1)th ctb*/
921 if(!ps_sao_ctxt->i4_is_last_ctb_row)
922 {
923 memcpy(
924 ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
925 ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
926 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
927 ps_sao_ctxt->i4_sao_blk_wd);
928 }
929
930 s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
931 s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
932 s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
933 s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
934 s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
935 s_best_luma_chroma_cand.b5_cb_band_pos = 0;
936
937 s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
938 s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
939 s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
940 s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
941 s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
942 s_best_luma_chroma_cand.b5_cr_band_pos = 0;
943
944 s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
945 s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
946 }
947
948 s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
949 s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
950
951 /*****************************************************/
952 /**RDO for Best Luma - Chroma combined, No SAO,*******/
953 /*************Left merge and Top merge****************/
954 /*****************************************************/
955
956 /* No SAO cand*/
957 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
958 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
959
960 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE;
961 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0;
962 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0;
963 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0;
964 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0;
965 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0;
966
967 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE;
968 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0;
969 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0;
970 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0;
971 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0;
972 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0;
973
974 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE;
975 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0;
976 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0;
977 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0;
978 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0;
979 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0;
980 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
981 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
982
983 num_rdo_cand++;
984
985 /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then
986 the standard mandates that the merge candidates must be set to unavailable.
987 Hence, check for tile boundary condition by reading
988 s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB.
989 A special case: Merge-candidates should be available at dependent-slices boundaries.
990 Search for <SAO_note_01> in workspace to know more */
991
992 #if !DISABLE_SAO_WHEN_NOISY
993 if(1)
994 #else
995 if(!u1_force_no_offset)
996 #endif
997 {
998 /* Merge left cand*/
999 if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail)
1000 {
1001 memcpy(
1002 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1003 &ps_sao_ctxt->s_left_ctb_sao,
1004 sizeof(sao_enc_t));
1005 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1;
1006 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
1007 num_rdo_cand++;
1008 }
1009
1010 /* Merge top cand*/
1011 if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail)
1012 {
1013 memcpy(
1014 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1015 (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz),
1016 sizeof(sao_enc_t));
1017 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
1018 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1;
1019 num_rdo_cand++;
1020 }
1021
1022 /* Best luma-chroma candidate*/
1023 memcpy(
1024 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1025 &s_best_luma_chroma_cand,
1026 sizeof(sao_enc_t));
1027 num_rdo_cand++;
1028 }
1029
1030 {
1031 UWORD32 luma_distortion = 0, chroma_distortion = 0;
1032 /* First cand will be best cand after 1st iteration*/
1033 curr_buf_idx = 0;
1034 best_buf_idx = 1;
1035 best_cost = 0xFFFFFFFF;
1036 best_cand_idx = 0;
1037
1038 for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++)
1039 {
1040 s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
1041
1042 distortion = 0;
1043
1044 /* This memcpy is required because cabac uses parameters from this structure
1045 * to evaluate bits and this structure ptr is sent to cabac through
1046 * "ihevce_cabac_rdo_encode_sao" function
1047 */
1048 memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
1049
1050 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1051 {
1052 /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1053 overwritten by the sao leaf level function for next ctb*/
1054 memcpy(
1055 s_sao_ctxt.au1_left_luma_scratch,
1056 ps_sao_ctxt->au1_sao_src_left_luma,
1057 ps_sao_ctxt->i4_sao_blk_ht);
1058
1059 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1060 overwritten by the sao leaf level function for next ctb*/
1061 memcpy(
1062 s_sao_ctxt.au1_top_luma_scratch,
1063 ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
1064 ps_sao_ctxt->i4_sao_blk_wd + 2);
1065 s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
1066
1067 pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
1068
1069 /* Copy the deblocked recon data to scratch buffer to do sao*/
1070
1071 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1072 pu1_luma_scratch_buf,
1073 i4_luma_scratch_buf_stride,
1074 pu1_recon_luma,
1075 luma_recon_stride,
1076 SCRATCH_BUF_STRIDE,
1077 ctb_ht + 1);
1078 s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
1079 s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
1080
1081 ASSERT(
1082 (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
1083 (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
1084 (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
1085 (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
1086 }
1087 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1088 {
1089 /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1090 overwritten by the sao leaf level function for next ctb*/
1091 memcpy(
1092 s_sao_ctxt.au1_left_chroma_scratch,
1093 ps_sao_ctxt->au1_sao_src_left_chroma,
1094 (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2);
1095
1096 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1097 overwritten by the sao leaf level function for next ctb*/
1098 memcpy(
1099 s_sao_ctxt.au1_top_chroma_scratch,
1100 ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2,
1101 ps_sao_ctxt->i4_sao_blk_wd + 4);
1102
1103 s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2;
1104
1105 pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx];
1106
1107 /* Copy the deblocked recon data to scratch buffer to do sao*/
1108
1109 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1110 pu1_chroma_scratch_buf,
1111 i4_chroma_scratch_buf_stride,
1112 pu1_recon_chroma,
1113 chroma_recon_stride,
1114 SCRATCH_BUF_STRIDE,
1115 (ctb_ht >> !u1_is_422) + 1);
1116
1117 s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf;
1118 s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride;
1119
1120 ASSERT(
1121 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
1122 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
1123 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
1124 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
1125 ASSERT(
1126 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
1127 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
1128 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
1129 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
1130 }
1131
1132 ASSERT(
1133 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
1134 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
1135 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
1136
1137 s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
1138 s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag;
1139
1140 ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
1141
1142 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1143 { // clang-format off
1144 luma_distortion =
1145 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
1146 s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
1147 s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
1148 ctb_ht,
1149 NULL_PLANE);
1150 } // clang-format on
1151
1152 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1153 { // clang-format off
1154 chroma_distortion =
1155 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma,
1156 s_sao_ctxt.pu1_cur_chroma_recon_buf,
1157 chroma_src_stride,
1158 s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
1159 (ctb_ht >> !u1_is_422),
1160 NULL_PLANE);
1161 } // clang-format on
1162
1163 /*chroma distortion is added after correction because of lambda difference*/
1164 distortion =
1165 luma_distortion +
1166 (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf));
1167
1168 ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
1169 ctb_bits = ihevce_cabac_rdo_encode_sao(
1170 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
1171
1172 /* Calculate the cost as D+(lamda)*R */
1173 curr_cost = distortion +
1174 COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
1175
1176 if(curr_cost < best_cost)
1177 {
1178 best_ctb_sao_bits = ctb_bits;
1179 best_cost = curr_cost;
1180 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1181 best_cand_idx = rdo_cand;
1182 curr_buf_idx = !curr_buf_idx;
1183 }
1184 }
1185 /*Adding sao bits to header bits*/
1186 *pu4_frame_rdopt_header_bits = best_ctb_sao_bits;
1187
1188 ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx);
1189
1190 /* store the sao parameters of curr ctb for top merge and left merge*/
1191 memcpy(
1192 ps_sao_ctxt->ps_top_ctb_sao,
1193 &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1194 sizeof(sao_enc_t));
1195 memcpy(
1196 &ps_sao_ctxt->s_left_ctb_sao,
1197 &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1198 sizeof(sao_enc_t));
1199
1200 /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/
1201 memcpy(
1202 &ps_ctb_enc_loop_out->s_sao,
1203 &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1204 sizeof(sao_enc_t));
1205
1206 if(!ps_sao_ctxt->i4_is_last_ctb_col)
1207 {
1208 /* Update left luma buffer for next ctb */
1209 for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++)
1210 {
1211 ps_sao_ctxt->au1_sao_src_left_luma[row] =
1212 ps_sao_ctxt->pu1_cur_luma_recon_buf
1213 [row * ps_sao_ctxt->i4_cur_luma_recon_stride +
1214 (ps_sao_ctxt->i4_sao_blk_wd - 1)];
1215 }
1216 }
1217
1218 if(!ps_sao_ctxt->i4_is_last_ctb_col)
1219 {
1220 /* Update left chroma buffer for next ctb */
1221 for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++)
1222 {
1223 *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) =
1224 *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf +
1225 row * ps_sao_ctxt->i4_cur_chroma_recon_stride +
1226 (ps_sao_ctxt->i4_sao_blk_wd - 2));
1227 }
1228 }
1229
1230 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1231 {
1232 /* Copy the sao'ed output of the best candidate to the recon buffer*/
1233
1234 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1235 ps_sao_ctxt->pu1_cur_luma_recon_buf,
1236 ps_sao_ctxt->i4_cur_luma_recon_stride,
1237 ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx],
1238 i4_luma_scratch_buf_stride,
1239 ctb_wd,
1240 ctb_ht);
1241 }
1242 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1243 {
1244 /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/
1245
1246 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1247 ps_sao_ctxt->pu1_cur_chroma_recon_buf,
1248 ps_sao_ctxt->i4_cur_chroma_recon_stride,
1249 ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx],
1250 i4_chroma_scratch_buf_stride,
1251 ctb_wd,
1252 ctb_ht >> !u1_is_422);
1253 }
1254 }
1255 }
1256